diff options
Diffstat (limited to 'fs/xfs')
101 files changed, 4652 insertions, 5042 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 7a59daed178..5c5a366aa33 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,7 +16,7 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char +EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 XFS_LINUX := linux-2.6 @@ -26,6 +26,8 @@ endif obj-$(CONFIG_XFS_FS) += xfs.o +xfs-y += linux-2.6/xfs_trace.o + xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ xfs_dquot.o \ xfs_dquot_item.o \ @@ -90,8 +92,7 @@ xfs-y += xfs_alloc.o \ xfs_rw.o \ xfs_dmops.o -xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ - xfs_dir2_trace.o +xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ @@ -113,6 +114,3 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs-y += $(addprefix support/, \ debug.o \ uuid.o) - -xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o - diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 2d3f90afe5f..bc7405585de 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/swap.h> #include <linux/blkdev.h> @@ -24,8 +23,25 @@ #include "time.h" #include "kmem.h" -#define MAX_VMALLOCS 6 -#define MAX_SLAB_SIZE 0x20000 +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} void * kmem_alloc(size_t size, unsigned int __nocast flags) @@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; -#ifdef DEBUG - if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { - printk(KERN_WARNING "Large %s attempt, size=%ld\n", - __func__, (long)size); - dump_stack(); - } -#endif - do { - if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) - ptr = kmalloc(size, lflags); - else - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = kmalloc(size, lflags); if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) @@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags) return ptr; } -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, - unsigned int __nocast flags) -{ - void *ptr; - size_t kmsize = maxsize; - unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP; - - while (!(ptr = kmem_zalloc(kmsize, kmflags))) { - if ((kmsize <= minsize) && (flags & KM_NOSLEEP)) - break; - if ((kmsize >>= 1) <= minsize) { - kmsize = minsize; - kmflags = flags; - } - } - if (ptr) - *size = kmsize; - return ptr; -} - void kmem_free(const void *ptr) { diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 179cbd630f6..f7c8f7a9ea6 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/vmalloc.h> /* * General memory allocation interfaces @@ -30,7 +31,6 @@ #define KM_NOSLEEP 0x0002u #define KM_NOFS 0x0004u #define KM_MAYFAIL 0x0008u -#define KM_LARGE 0x0010u /* * We use a special process flag to avoid recursive callbacks into @@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags) extern void *kmem_alloc(size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(const void *); +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + /* * Zone interfaces */ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b23a5450644..bf85bbe4a9a 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -21,6 +21,7 @@ #include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> @@ -105,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type) struct posix_acl *acl; struct xfs_acl *xfs_acl; int len = sizeof(struct xfs_acl); - char *ea_name; + unsigned char *ea_name; int error; acl = get_cached_acl(inode, type); @@ -132,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type) if (!xfs_acl) return ERR_PTR(-ENOMEM); - error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); + error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + &len, ATTR_ROOT); if (error) { /* * If the attribute doesn't exist make sure we have a negative @@ -161,7 +163,7 @@ STATIC int xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); - char *ea_name; + unsigned char *ea_name; int error; if (S_ISLNK(inode->i_mode)) @@ -193,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) (sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES - acl->a_count)); - error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, + error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); kfree(xfs_acl); @@ -250,8 +252,9 @@ xfs_set_mode(struct inode *inode, mode_t mode) if (mode != inode->i_mode) { struct iattr iattr; - iattr.ia_valid = ATTR_MODE; + iattr.ia_valid = ATTR_MODE | ATTR_CTIME; iattr.ia_mode = mode; + iattr.ia_ctime = current_fs_time(inode->i_sb); error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } @@ -260,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) } static int -xfs_acl_exists(struct inode *inode, char *name) +xfs_acl_exists(struct inode *inode, unsigned char *name) { int len = sizeof(struct xfs_acl); @@ -353,37 +356,14 @@ xfs_acl_chmod(struct inode *inode) return error; } -/* - * System xattr handlers. - * - * Currently Posix ACLs are the only system namespace extended attribute - * handlers supported by XFS, so we just implement the handlers here. - * If we ever support other system extended attributes this will need - * some refactoring. - */ - static int -xfs_decode_acl(const char *name) -{ - if (strcmp(name, "posix_acl_access") == 0) - return ACL_TYPE_ACCESS; - else if (strcmp(name, "posix_acl_default") == 0) - return ACL_TYPE_DEFAULT; - return -EINVAL; -} - -static int -xfs_xattr_system_get(struct inode *inode, const char *name, - void *value, size_t size) +xfs_xattr_acl_get(struct dentry *dentry, const char *name, + void *value, size_t size, int type) { struct posix_acl *acl; - int type, error; - - type = xfs_decode_acl(name); - if (type < 0) - return type; + int error; - acl = xfs_get_acl(inode, type); + acl = xfs_get_acl(dentry->d_inode, type); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -396,15 +376,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name, } static int -xfs_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +xfs_xattr_acl_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { + struct inode *inode = dentry->d_inode; struct posix_acl *acl = NULL; - int error = 0, type; + int error = 0; - type = xfs_decode_acl(name); - if (type < 0) - return type; if (flags & XATTR_CREATE) return -EINVAL; if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) @@ -461,8 +439,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name, return error; } -struct xattr_handler xfs_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = xfs_xattr_system_get, - .set = xfs_xattr_system_set, +struct xattr_handler xfs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, +}; + +struct xattr_handler xfs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, }; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 70f989895d1..66abe36c121 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -38,6 +38,7 @@ #include "xfs_rw.h" #include "xfs_iomap.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" #include <linux/mpage.h> #include <linux/pagevec.h> #include <linux/writeback.h> @@ -76,7 +77,7 @@ xfs_ioend_wake( wake_up(to_ioend_wq(ip)); } -STATIC void +void xfs_count_page_state( struct page *page, int *delalloc, @@ -98,48 +99,6 @@ xfs_count_page_state( } while ((bh = bh->b_this_page) != head); } -#if defined(XFS_RW_TRACE) -void -xfs_page_trace( - int tag, - struct inode *inode, - struct page *page, - unsigned long pgoff) -{ - xfs_inode_t *ip; - loff_t isize = i_size_read(inode); - loff_t offset = page_offset(page); - int delalloc = -1, unmapped = -1, unwritten = -1; - - if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - - ip = XFS_I(inode); - if (!ip->i_rwtrace) - return; - - ktrace_enter(ip->i_rwtrace, - (void *)((unsigned long)tag), - (void *)ip, - (void *)inode, - (void *)page, - (void *)pgoff, - (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), - (void *)((unsigned long)((isize >> 32) & 0xffffffff)), - (void *)((unsigned long)(isize & 0xffffffff)), - (void *)((unsigned long)((offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(offset & 0xffffffff)), - (void *)((unsigned long)delalloc), - (void *)((unsigned long)unmapped), - (void *)((unsigned long)unwritten), - (void *)((unsigned long)current_pid()), - (void *)NULL); -} -#else -#define xfs_page_trace(tag, inode, page, pgoff) -#endif - STATIC struct block_device * xfs_find_bdev_for_inode( struct xfs_inode *ip) @@ -235,71 +194,36 @@ xfs_setfilesize( } /* - * Buffered IO write completion for delayed allocate extents. - */ -STATIC void -xfs_end_bio_delalloc( - struct work_struct *work) -{ - xfs_ioend_t *ioend = - container_of(work, xfs_ioend_t, io_work); - - xfs_setfilesize(ioend); - xfs_destroy_ioend(ioend); -} - -/* - * Buffered IO write completion for regular, written extents. + * IO write completion. */ STATIC void -xfs_end_bio_written( - struct work_struct *work) -{ - xfs_ioend_t *ioend = - container_of(work, xfs_ioend_t, io_work); - - xfs_setfilesize(ioend); - xfs_destroy_ioend(ioend); -} - -/* - * IO write completion for unwritten extents. - * - * Issue transactions to convert a buffer range from unwritten - * to written extents. - */ -STATIC void -xfs_end_bio_unwritten( +xfs_end_io( struct work_struct *work) { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); - xfs_off_t offset = ioend->io_offset; - size_t size = ioend->io_size; - - if (likely(!ioend->io_error)) { - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - int error; - error = xfs_iomap_write_unwritten(ip, offset, size); - if (error) - ioend->io_error = error; - } - xfs_setfilesize(ioend); - } - xfs_destroy_ioend(ioend); -} -/* - * IO read completion for regular, written extents. - */ -STATIC void -xfs_end_bio_read( - struct work_struct *work) -{ - xfs_ioend_t *ioend = - container_of(work, xfs_ioend_t, io_work); + /* + * For unwritten extents we need to issue transactions to convert a + * range to normal written extens after the data I/O has finished. + */ + if (ioend->io_type == IOMAP_UNWRITTEN && + likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { + int error; + + error = xfs_iomap_write_unwritten(ip, ioend->io_offset, + ioend->io_size); + if (error) + ioend->io_error = error; + } + /* + * We might have to update the on-disk file size after extending + * writes. + */ + if (ioend->io_type != IOMAP_READ) + xfs_setfilesize(ioend); xfs_destroy_ioend(ioend); } @@ -314,10 +238,10 @@ xfs_finish_ioend( int wait) { if (atomic_dec_and_test(&ioend->io_remaining)) { - struct workqueue_struct *wq = xfsdatad_workqueue; - if (ioend->io_work.func == xfs_end_bio_unwritten) - wq = xfsconvertd_workqueue; + struct workqueue_struct *wq; + wq = (ioend->io_type == IOMAP_UNWRITTEN) ? + xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) flush_workqueue(wq); @@ -355,15 +279,7 @@ xfs_alloc_ioend( ioend->io_offset = 0; ioend->io_size = 0; - if (type == IOMAP_UNWRITTEN) - INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten); - else if (type == IOMAP_DELAY) - INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc); - else if (type == IOMAP_READ) - INIT_WORK(&ioend->io_work, xfs_end_bio_read); - else - INIT_WORK(&ioend->io_work, xfs_end_bio_written); - + INIT_WORK(&ioend->io_work, xfs_end_io); return ioend; } @@ -380,7 +296,7 @@ xfs_map_blocks( return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); } -STATIC_INLINE int +STATIC int xfs_iomap_valid( xfs_iomap_t *iomapp, loff_t offset) @@ -412,8 +328,9 @@ xfs_end_bio( STATIC void xfs_submit_ioend_bio( - xfs_ioend_t *ioend, - struct bio *bio) + struct writeback_control *wbc, + xfs_ioend_t *ioend, + struct bio *bio) { atomic_inc(&ioend->io_remaining); bio->bi_private = ioend; @@ -426,7 +343,8 @@ xfs_submit_ioend_bio( if (xfs_ioend_new_eof(ioend)) xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); - submit_bio(WRITE, bio); + submit_bio(wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE, bio); ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); bio_put(bio); } @@ -505,6 +423,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) */ STATIC void xfs_submit_ioend( + struct writeback_control *wbc, xfs_ioend_t *ioend) { xfs_ioend_t *head = ioend; @@ -533,19 +452,19 @@ xfs_submit_ioend( retry: bio = xfs_alloc_ioend_bio(bh); } else if (bh->b_blocknr != lastblock + 1) { - xfs_submit_ioend_bio(ioend, bio); + xfs_submit_ioend_bio(wbc, ioend, bio); goto retry; } if (bio_add_buffer(bio, bh) != bh->b_size) { - xfs_submit_ioend_bio(ioend, bio); + xfs_submit_ioend_bio(wbc, ioend, bio); goto retry; } lastblock = bh->b_blocknr; } if (bio) - xfs_submit_ioend_bio(ioend, bio); + xfs_submit_ioend_bio(wbc, ioend, bio); xfs_finish_ioend(ioend, 0); } while ((ioend = next) != NULL); } @@ -1191,7 +1110,7 @@ xfs_page_state_convert( } if (iohead) - xfs_submit_ioend(iohead); + xfs_submit_ioend(wbc, iohead); return page_dirty; @@ -1242,7 +1161,7 @@ xfs_vm_writepage( int delalloc, unmapped, unwritten; struct inode *inode = page->mapping->host; - xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); + trace_xfs_writepage(inode, page, 0); /* * We need a transaction if: @@ -1347,7 +1266,7 @@ xfs_vm_releasepage( .nr_to_write = 1, }; - xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); + trace_xfs_releasepage(inode, page, 0); if (!page_has_buffers(page)) return 0; @@ -1528,7 +1447,7 @@ xfs_end_io_direct( * didn't map an unwritten extent so switch it's completion * handler. */ - INIT_WORK(&ioend->io_work, xfs_end_bio_written); + ioend->io_type = IOMAP_NEW; xfs_finish_ioend(ioend, 0); } @@ -1555,19 +1474,13 @@ xfs_vm_direct_IO( bdev = xfs_find_bdev_for_inode(XFS_I(inode)); - if (rw == WRITE) { - iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); - ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - bdev, iov, offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct); - } else { - iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - bdev, iov, offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct); - } + iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? + IOMAP_UNWRITTEN : IOMAP_READ); + + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct); if (unlikely(ret != -EIOCBQUEUED && iocb->private)) xfs_destroy_ioend(iocb->private); @@ -1627,8 +1540,7 @@ xfs_vm_invalidatepage( struct page *page, unsigned long offset) { - xfs_page_trace(XFS_INVALIDPAGE_ENTER, - page->mapping->host, page, offset); + trace_xfs_invalidatepage(page->mapping->host, page, offset); block_invalidatepage(page, offset); } diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 221b3e66cee..4cfc6ea87df 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -45,4 +45,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); extern void xfs_ioend_init(void); extern void xfs_ioend_wait(struct xfs_inode *); +extern void xfs_count_page_state(struct page *, int *, int *, int *); + #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 965df1227d6..6f76ba85f19 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -33,12 +33,14 @@ #include <linux/migrate.h> #include <linux/backing-dev.h> #include <linux/freezer.h> +#include <linux/list_sort.h> #include "xfs_sb.h" #include "xfs_inum.h" #include "xfs_ag.h" #include "xfs_dmapi.h" #include "xfs_mount.h" +#include "xfs_trace.h" static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); @@ -53,34 +55,6 @@ static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; struct workqueue_struct *xfsconvertd_workqueue; -#ifdef XFS_BUF_TRACE -void -xfs_buf_trace( - xfs_buf_t *bp, - char *id, - void *data, - void *ra) -{ - ktrace_enter(xfs_buf_trace_buf, - bp, id, - (void *)(unsigned long)bp->b_flags, - (void *)(unsigned long)bp->b_hold.counter, - (void *)(unsigned long)bp->b_sema.count, - (void *)current, - data, ra, - (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), - (void *)(unsigned long)(bp->b_file_offset & 0xffffffff), - (void *)(unsigned long)bp->b_buffer_length, - NULL, NULL, NULL, NULL, NULL); -} -ktrace_t *xfs_buf_trace_buf; -#define XFS_BUF_TRACE_SIZE 4096 -#define XB_TRACE(bp, id, data) \ - xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0)) -#else -#define XB_TRACE(bp, id, data) do { } while (0) -#endif - #ifdef XFS_BUF_LOCK_TRACKING # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) @@ -103,6 +77,27 @@ ktrace_t *xfs_buf_trace_buf; #define xfs_buf_deallocate(bp) \ kmem_zone_free(xfs_buf_zone, (bp)); +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + /* * Page Region interfaces. * @@ -149,7 +144,7 @@ page_region_mask( return mask; } -STATIC_INLINE void +STATIC void set_page_region( struct page *page, size_t offset, @@ -161,7 +156,7 @@ set_page_region( SetPageUptodate(page); } -STATIC_INLINE int +STATIC int test_page_region( struct page *page, size_t offset, @@ -279,7 +274,8 @@ _xfs_buf_initialize( init_waitqueue_head(&bp->b_waiters); XFS_STATS_INC(xb_create); - XB_TRACE(bp, "initialize", target); + + trace_xfs_buf_init(bp, _RET_IP_); } /* @@ -318,6 +314,7 @@ _xfs_buf_free_pages( { if (bp->b_pages != bp->b_page_array) { kmem_free(bp->b_pages); + bp->b_pages = NULL; } } @@ -332,14 +329,14 @@ void xfs_buf_free( xfs_buf_t *bp) { - XB_TRACE(bp, "free", 0); + trace_xfs_buf_free(bp, _RET_IP_); ASSERT(list_empty(&bp->b_hash_list)); if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; - if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) + if (xfs_buf_is_vmapped(bp)) free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { @@ -349,9 +346,8 @@ xfs_buf_free( ASSERT(!PagePrivate(page)); page_cache_release(page); } - _xfs_buf_free_pages(bp); } - + _xfs_buf_free_pages(bp); xfs_buf_deallocate(bp); } @@ -445,7 +441,6 @@ _xfs_buf_lookup_pages( if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; - XB_TRACE(bp, "lookup_pages", (long)page_count); return error; } @@ -548,7 +543,6 @@ found: if (down_trylock(&bp->b_sema)) { if (!(flags & XBF_TRYLOCK)) { /* wait for buffer ownership */ - XB_TRACE(bp, "get_lock", 0); xfs_buf_lock(bp); XFS_STATS_INC(xb_get_locked_waited); } else { @@ -571,7 +565,8 @@ found: ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); bp->b_flags &= XBF_MAPPED; } - XB_TRACE(bp, "got_lock", 0); + + trace_xfs_buf_find(bp, flags, _RET_IP_); XFS_STATS_INC(xb_get_locked); return bp; } @@ -582,7 +577,7 @@ found: * although backing storage may not be. */ xfs_buf_t * -xfs_buf_get_flags( +xfs_buf_get( xfs_buftarg_t *target,/* target for buffer */ xfs_off_t ioff, /* starting offset of range */ size_t isize, /* length of range */ @@ -627,7 +622,7 @@ xfs_buf_get_flags( bp->b_bn = ioff; bp->b_count_desired = bp->b_buffer_length; - XB_TRACE(bp, "get", (unsigned long)flags); + trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; no_buffer: @@ -644,8 +639,6 @@ _xfs_buf_read( { int status; - XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); - ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); @@ -661,7 +654,7 @@ _xfs_buf_read( } xfs_buf_t * -xfs_buf_read_flags( +xfs_buf_read( xfs_buftarg_t *target, xfs_off_t ioff, size_t isize, @@ -671,21 +664,20 @@ xfs_buf_read_flags( flags |= XBF_READ; - bp = xfs_buf_get_flags(target, ioff, isize, flags); + bp = xfs_buf_get(target, ioff, isize, flags); if (bp) { + trace_xfs_buf_read(bp, flags, _RET_IP_); + if (!XFS_BUF_ISDONE(bp)) { - XB_TRACE(bp, "read", (unsigned long)flags); XFS_STATS_INC(xb_get_read); _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { - XB_TRACE(bp, "read_async", (unsigned long)flags); /* * Read ahead call which is already satisfied, * drop the buffer */ goto no_buffer; } else { - XB_TRACE(bp, "read_done", (unsigned long)flags); /* We do not want read in the flags */ bp->b_flags &= ~XBF_READ; } @@ -718,7 +710,7 @@ xfs_buf_readahead( return; flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); - xfs_buf_read_flags(target, ioff, isize, flags); + xfs_buf_read(target, ioff, isize, flags); } xfs_buf_t * @@ -823,7 +815,7 @@ xfs_buf_get_noaddr( xfs_buf_unlock(bp); - XB_TRACE(bp, "no_daddr", len); + trace_xfs_buf_get_noaddr(bp, _RET_IP_); return bp; fail_free_mem: @@ -845,8 +837,8 @@ void xfs_buf_hold( xfs_buf_t *bp) { + trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); - XB_TRACE(bp, "hold", 0); } /* @@ -859,7 +851,7 @@ xfs_buf_rele( { xfs_bufhash_t *hash = bp->b_hash; - XB_TRACE(bp, "rele", bp->b_relse); + trace_xfs_buf_rele(bp, _RET_IP_); if (unlikely(!hash)) { ASSERT(!bp->b_relse); @@ -909,21 +901,19 @@ xfs_buf_cond_lock( int locked; locked = down_trylock(&bp->b_sema) == 0; - if (locked) { + if (locked) XB_SET_OWNER(bp); - } - XB_TRACE(bp, "cond_lock", (long)locked); + + trace_xfs_buf_cond_lock(bp, _RET_IP_); return locked ? 0 : -EBUSY; } -#if defined(DEBUG) || defined(XFS_BLI_TRACE) int xfs_buf_lock_value( xfs_buf_t *bp) { return bp->b_sema.count; } -#endif /* * Locks a buffer object. @@ -935,12 +925,14 @@ void xfs_buf_lock( xfs_buf_t *bp) { - XB_TRACE(bp, "lock", 0); + trace_xfs_buf_lock(bp, _RET_IP_); + if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); down(&bp->b_sema); XB_SET_OWNER(bp); - XB_TRACE(bp, "locked", 0); + + trace_xfs_buf_lock_done(bp, _RET_IP_); } /* @@ -962,7 +954,8 @@ xfs_buf_unlock( XB_CLEAR_OWNER(bp); up(&bp->b_sema); - XB_TRACE(bp, "unlock", 0); + + trace_xfs_buf_unlock(bp, _RET_IP_); } @@ -974,17 +967,18 @@ void xfs_buf_pin( xfs_buf_t *bp) { + trace_xfs_buf_pin(bp, _RET_IP_); atomic_inc(&bp->b_pin_count); - XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter); } void xfs_buf_unpin( xfs_buf_t *bp) { + trace_xfs_buf_unpin(bp, _RET_IP_); + if (atomic_dec_and_test(&bp->b_pin_count)) wake_up_all(&bp->b_waiters); - XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter); } int @@ -1035,7 +1029,7 @@ xfs_buf_iodone_work( */ if ((bp->b_error == EOPNOTSUPP) && (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { - XB_TRACE(bp, "ordered_retry", bp->b_iodone); + trace_xfs_buf_ordered_retry(bp, _RET_IP_); bp->b_flags &= ~XBF_ORDERED; bp->b_flags |= _XFS_BARRIER_FAILED; xfs_buf_iorequest(bp); @@ -1050,12 +1044,12 @@ xfs_buf_ioend( xfs_buf_t *bp, int schedule) { + trace_xfs_buf_iodone(bp, _RET_IP_); + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); if (bp->b_error == 0) bp->b_flags |= XBF_DONE; - XB_TRACE(bp, "iodone", bp->b_iodone); - if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { if (schedule) { INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); @@ -1075,26 +1069,34 @@ xfs_buf_ioerror( { ASSERT(error >= 0 && error <= 0xffff); bp->b_error = (unsigned short)error; - XB_TRACE(bp, "ioerror", (unsigned long)error); + trace_xfs_buf_ioerror(bp, error, _RET_IP_); } int -xfs_bawrite( - void *mp, +xfs_bwrite( + struct xfs_mount *mp, struct xfs_buf *bp) { - XB_TRACE(bp, "bawrite", 0); + int iowait = (bp->b_flags & XBF_ASYNC) == 0; + int error = 0; - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + bp->b_strat = xfs_bdstrat_cb; + bp->b_mount = mp; + bp->b_flags |= XBF_WRITE; + if (!iowait) + bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_delwri_dequeue(bp); + xfs_buf_iostrategy(bp); - bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); + if (iowait) { + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + } - bp->b_mount = mp; - bp->b_strat = xfs_bdstrat_cb; - return xfs_bdstrat_cb(bp); + return error; } void @@ -1102,7 +1104,7 @@ xfs_bdwrite( void *mp, struct xfs_buf *bp) { - XB_TRACE(bp, "bdwrite", 0); + trace_xfs_buf_bdwrite(bp, _RET_IP_); bp->b_strat = xfs_bdstrat_cb; bp->b_mount = mp; @@ -1113,7 +1115,127 @@ xfs_bdwrite( xfs_buf_delwri_queue(bp, 1); } -STATIC_INLINE void +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call biodone + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + XFS_BUF_ERROR(bp, EIO); + + /* + * We're calling biodone, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + xfs_biodone(bp); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the biodone call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = XFS_BUF_BFLAGS(bp); + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + XFS_BUF_CLR_IODONE_FUNC(bp); + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + if (!(fl & XBF_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + +STATIC void _xfs_buf_ioend( xfs_buf_t *bp, int schedule) @@ -1135,6 +1257,9 @@ xfs_buf_bio_end_io( xfs_buf_ioerror(bp, -error); + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + do { struct page *page = bvec->bv_page; @@ -1177,10 +1302,14 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_ORDERED) { ASSERT(!(bp->b_flags & XBF_READ)); rw = WRITE_BARRIER; - } else if (bp->b_flags & _XBF_RUN_QUEUES) { + } else if (bp->b_flags & XBF_LOG_BUFFER) { ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); bp->b_flags &= ~_XBF_RUN_QUEUES; rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; + } else if (bp->b_flags & _XBF_RUN_QUEUES) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; } else { rw = (bp->b_flags & XBF_WRITE) ? WRITE : (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; @@ -1240,6 +1369,10 @@ next_chunk: submit_io: if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } submit_bio(rw, bio); if (size) goto next_chunk; @@ -1253,7 +1386,7 @@ int xfs_buf_iorequest( xfs_buf_t *bp) { - XB_TRACE(bp, "iorequest", 0); + trace_xfs_buf_iorequest(bp, _RET_IP_); if (bp->b_flags & XBF_DELWRI) { xfs_buf_delwri_queue(bp, 1); @@ -1287,11 +1420,13 @@ int xfs_buf_iowait( xfs_buf_t *bp) { - XB_TRACE(bp, "iowait", 0); + trace_xfs_buf_iowait(bp, _RET_IP_); + if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); wait_for_completion(&bp->b_iowait); - XB_TRACE(bp, "iowaited", (long)bp->b_error); + + trace_xfs_buf_iowait_done(bp, _RET_IP_); return bp->b_error; } @@ -1318,7 +1453,7 @@ xfs_buf_iomove( xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ - caddr_t data, /* data address */ + void *data, /* data address */ xfs_buf_rw_t mode) /* read/write/zero flag */ { size_t bend, cpoff, csize; @@ -1400,8 +1535,8 @@ xfs_alloc_bufhash( btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; - btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); + btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * + sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); @@ -1412,7 +1547,7 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash); + kmem_free_large(btp->bt_hash); btp->bt_hash = NULL; } @@ -1604,7 +1739,8 @@ xfs_buf_delwri_queue( struct list_head *dwq = &bp->b_target->bt_delwrite_queue; spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; - XB_TRACE(bp, "delwri_q", (long)unlock); + trace_xfs_buf_delwri_queue(bp, _RET_IP_); + ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); spin_lock(dwlk); @@ -1616,6 +1752,11 @@ xfs_buf_delwri_queue( list_del(&bp->b_list); } + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + bp->b_flags |= _XBF_DELWRI_Q; list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; @@ -1644,7 +1785,36 @@ xfs_buf_delwri_dequeue( if (dequeued) xfs_buf_rele(bp); - XB_TRACE(bp, "delwri_dq", (long)dequeued); + trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); +} + +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); } STATIC void @@ -1665,6 +1835,8 @@ xfsbufd_wakeup( list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) continue; + if (list_empty(&btp->bt_delwrite_queue)) + continue; set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); wake_up_process(btp->bt_task); } @@ -1692,7 +1864,7 @@ xfs_buf_delwri_split( INIT_LIST_HEAD(list); spin_lock(dwlk); list_for_each_entry_safe(bp, n, dwq, b_list) { - XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); + trace_xfs_buf_delwri_split(bp, _RET_IP_); ASSERT(bp->b_flags & XBF_DELWRI); if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { @@ -1715,20 +1887,53 @@ xfs_buf_delwri_split( } +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) { - struct list_head tmp; - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - int count; - xfs_buf_t *bp; + xfs_buftarg_t *target = (xfs_buftarg_t *)data; current->flags |= PF_MEMALLOC; set_freezable(); do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + int count = 0; + struct list_head tmp; + if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); @@ -1736,17 +1941,16 @@ xfsbufd( clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } - schedule_timeout_interruptible( - xfs_buf_timer_centisecs * msecs_to_jiffies(10)); + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); - xfs_buf_delwri_split(target, &tmp, - xfs_buf_age_centisecs * msecs_to_jiffies(10)); - - count = 0; + xfs_buf_delwri_split(target, &tmp, age); + list_sort(NULL, &tmp, xfs_buf_cmp); while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - ASSERT(target == bp->b_target); - + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); count++; @@ -1772,42 +1976,45 @@ xfs_flush_buftarg( xfs_buftarg_t *target, int wait) { - struct list_head tmp; - xfs_buf_t *bp, *n; + xfs_buf_t *bp; int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp, 0); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); /* - * Dropped the delayed write list lock, now walk the temporary list + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. */ - list_for_each_entry_safe(bp, n, &tmp, b_list) { + list_sort(NULL, &tmp_list, xfs_buf_cmp); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); - if (wait) + list_del_init(&bp->b_list); + if (wait) { bp->b_flags &= ~XBF_ASYNC; - else - list_del_init(&bp->b_list); - + list_add(&bp->b_list, &wait_list); + } xfs_buf_iostrategy(bp); } - if (wait) + if (wait) { + /* Expedite and wait for IO to complete. */ blk_run_address_space(target->bt_mapping); + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - /* - * Remaining list items must be flushed before returning - */ - while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - - list_del_init(&bp->b_list); - xfs_iowait(bp); - xfs_buf_relse(bp); + list_del_init(&bp->b_list); + xfs_iowait(bp); + xfs_buf_relse(bp); + } } return pincount; @@ -1816,14 +2023,10 @@ xfs_flush_buftarg( int __init xfs_buf_init(void) { -#ifdef XFS_BUF_TRACE - xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); -#endif - xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", KM_ZONE_HWALIGN, NULL); if (!xfs_buf_zone) - goto out_free_trace_buf; + goto out; xfslogd_workqueue = create_workqueue("xfslogd"); if (!xfslogd_workqueue) @@ -1846,10 +2049,7 @@ xfs_buf_init(void) destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: kmem_zone_destroy(xfs_buf_zone); - out_free_trace_buf: -#ifdef XFS_BUF_TRACE - ktrace_free(xfs_buf_trace_buf); -#endif + out: return -ENOMEM; } @@ -1861,9 +2061,6 @@ xfs_buf_terminate(void) destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); -#ifdef XFS_BUF_TRACE - ktrace_free(xfs_buf_trace_buf); -#endif } #ifdef CONFIG_KDB_MODULES diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9b4d666ad31..386e7361e50 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -55,6 +55,7 @@ typedef enum { XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ XBF_ORDERED = (1 << 11), /* use ordered writes */ XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ + XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ /* flags used only as arguments to access routines */ XBF_LOCK = (1 << 14), /* lock requested */ @@ -95,6 +96,28 @@ typedef enum { _XFS_BARRIER_FAILED = (1 << 23), } xfs_buf_flags_t; +#define XFS_BUF_FLAGS \ + { XBF_READ, "READ" }, \ + { XBF_WRITE, "WRITE" }, \ + { XBF_MAPPED, "MAPPED" }, \ + { XBF_ASYNC, "ASYNC" }, \ + { XBF_DONE, "DONE" }, \ + { XBF_DELWRI, "DELWRI" }, \ + { XBF_STALE, "STALE" }, \ + { XBF_FS_MANAGED, "FS_MANAGED" }, \ + { XBF_ORDERED, "ORDERED" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_LOCK, "LOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ + { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ + { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ + { _XBF_PAGES, "PAGES" }, \ + { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ + { _XBF_DELWRI_Q, "DELWRI_Q" }, \ + { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ + { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } + + typedef enum { XBT_FORCE_SLEEP = 0, XBT_FORCE_FLUSH = 1, @@ -186,15 +209,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, #define xfs_incore(buftarg,blkno,len,lockit) \ _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) -extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, +extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, xfs_buf_flags_t); -#define xfs_buf_get(target, blkno, len, flags) \ - xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) - -extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t, +extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, xfs_buf_flags_t); -#define xfs_buf_read(target, blkno, len, flags) \ - xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); @@ -214,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ -extern int xfs_bawrite(void *mp, xfs_buf_t *bp); +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); static inline int xfs_buf_iostrategy(xfs_buf_t *bp) @@ -243,49 +265,29 @@ extern int xfs_buf_ispin(xfs_buf_t *); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); extern void xfs_buf_terminate(void); -#ifdef XFS_BUF_TRACE -extern ktrace_t *xfs_buf_trace_buf; -extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); -#else -#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0) -#endif - #define xfs_buf_target_name(target) \ ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) -#define XFS_B_ASYNC XBF_ASYNC -#define XFS_B_DELWRI XBF_DELWRI -#define XFS_B_READ XBF_READ -#define XFS_B_WRITE XBF_WRITE -#define XFS_B_STALE XBF_STALE - -#define XFS_BUF_TRYLOCK XBF_TRYLOCK -#define XFS_INCORE_TRYLOCK XBF_TRYLOCK -#define XFS_BUF_LOCK XBF_LOCK -#define XFS_BUF_MAPPED XBF_MAPPED - -#define BUF_BUSY XBF_DONT_BLOCK - #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) -#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) #define XFS_BUF_SUPER_STALE(bp) do { \ XFS_BUF_STALE(bp); \ xfs_buf_delwri_dequeue(bp); \ XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_MANAGE XBF_FS_MANAGED #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) @@ -370,39 +372,15 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_bpin(bp) xfs_buf_pin(bp) #define xfs_bunpin(bp) xfs_buf_unpin(bp) - -#define xfs_buftrace(id, bp) \ - xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) - #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) #define xfs_biomove(bp, off, len, data, rw) \ xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) + ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int XFS_bwrite(xfs_buf_t *bp) -{ - int iowait = (bp->b_flags & XBF_ASYNC) == 0; - int error = 0; - - if (!iowait) - bp->b_flags |= _XBF_RUN_QUEUES; - - xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); - if (iowait) { - error = xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - return error; -} - -#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) - #define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ @@ -417,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + #ifdef CONFIG_KDB_MODULES extern struct list_head *xfs_get_buftarg_list(void); #endif diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index eff61e2732a..e4caeb28ce2 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -52,7 +52,7 @@ xfs_file_aio_read( loff_t pos) { struct file *file = iocb->ki_filp; - int ioflags = IO_ISAIO; + int ioflags = 0; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) @@ -71,7 +71,7 @@ xfs_file_aio_write( loff_t pos) { struct file *file = iocb->ki_filp; - int ioflags = IO_ISAIO; + int ioflags = 0; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 08be36d7326..b6918d76bc7 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -19,6 +19,7 @@ #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_trace.h" int fs_noerr(void) { return 0; } int fs_nosys(void) { return ENOSYS; } @@ -51,6 +52,8 @@ xfs_flushinval_pages( struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; + trace_xfs_pagecache_inval(ip, first, last); + if (mapping->nrpages) { xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_write_and_wait(mapping); @@ -76,7 +79,7 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = -filemap_fdatawrite(mapping); } - if (flags & XFS_B_ASYNC) + if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); if (!ret) diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 5bb523d7f37..4ea1ee18ade 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -51,6 +51,7 @@ #include "xfs_quota.h" #include "xfs_inode_item.h" #include "xfs_export.h" +#include "xfs_trace.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -446,12 +447,12 @@ xfs_attrlist_by_handle( int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (*len > XATTR_SIZE_MAX) @@ -475,12 +476,12 @@ xfs_attrmulti_attr_get( int xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -500,7 +501,7 @@ xfs_attrmulti_attr_set( int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -518,7 +519,7 @@ xfs_attrmulti_by_handle( xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -546,7 +547,7 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) error = -ERANGE; @@ -1430,6 +1431,9 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 7bd7c6afc1e..d56173b34a2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -45,23 +45,23 @@ xfs_readlink_by_handle( extern int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags); extern int - xfs_attrmulti_attr_set( +xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags); extern int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags); extern struct dentry * diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index eafcc7c1870..0bf6d61f052 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -46,6 +46,7 @@ #include "xfs_attr.h" #include "xfs_ioctl.h" #include "xfs_ioctl32.h" +#include "xfs_trace.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) @@ -410,7 +411,7 @@ xfs_compat_attrmulti_by_handle( compat_xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -439,7 +440,7 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, compat_ptr(ops[i].am_attrname), MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index cd42ef78f6b..e8566bbf0f0 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -47,6 +47,7 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -139,10 +140,10 @@ xfs_init_security( struct xfs_inode *ip = XFS_I(inode); size_t length; void *value; - char *name; + unsigned char *name; int error; - error = security_inode_init_security(inode, dir, &name, + error = security_inode_init_security(inode, dir, (char **)&name, &value, &length); if (error) { if (error == -EOPNOTSUPP) @@ -573,8 +574,8 @@ xfs_vn_fallocate( bf.l_len = len; xfs_ilock(ip, XFS_IOLOCK_EXCL); - error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, XFS_ATTR_NOLOCK); + error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -585,7 +586,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); @@ -793,7 +794,7 @@ xfs_setup_inode( struct inode *inode = &ip->i_vnode; inode->i_ino = ip->i_ino; - inode->i_state = I_NEW|I_LOCK; + inode->i_state = I_NEW; inode_add_to_lists(ip->i_mount->m_super, inode); inode->i_mode = ip->i_d.di_mode; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 6127e24062d..5af0c81ca1a 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -40,7 +40,6 @@ #include <sv.h> #include <time.h> -#include <support/ktrace.h> #include <support/debug.h> #include <support/uuid.h> diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 072050f8d34..eac6f80d786 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -48,73 +48,12 @@ #include "xfs_utils.h" #include "xfs_iomap.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" #include <linux/capability.h> #include <linux/writeback.h> -#if defined(XFS_RW_TRACE) -void -xfs_rw_enter_trace( - int tag, - xfs_inode_t *ip, - void *data, - size_t segs, - loff_t offset, - int ioflags) -{ - if (ip->i_rwtrace == NULL) - return; - ktrace_enter(ip->i_rwtrace, - (void *)(unsigned long)tag, - (void *)ip, - (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), - (void *)data, - (void *)((unsigned long)segs), - (void *)((unsigned long)((offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(offset & 0xffffffff)), - (void *)((unsigned long)ioflags), - (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), - (void *)((unsigned long)current_pid()), - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL); -} - -void -xfs_inval_cached_trace( - xfs_inode_t *ip, - xfs_off_t offset, - xfs_off_t len, - xfs_off_t first, - xfs_off_t last) -{ - - if (ip->i_rwtrace == NULL) - return; - ktrace_enter(ip->i_rwtrace, - (void *)(__psint_t)XFS_INVAL_CACHED, - (void *)ip, - (void *)((unsigned long)((offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(offset & 0xffffffff)), - (void *)((unsigned long)((len >> 32) & 0xffffffff)), - (void *)((unsigned long)(len & 0xffffffff)), - (void *)((unsigned long)((first >> 32) & 0xffffffff)), - (void *)((unsigned long)(first & 0xffffffff)), - (void *)((unsigned long)((last >> 32) & 0xffffffff)), - (void *)((unsigned long)(last & 0xffffffff)), - (void *)((unsigned long)current_pid()), - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL); -} -#endif - /* * xfs_iozero * @@ -250,13 +189,10 @@ xfs_read( } } - xfs_rw_enter_trace(XFS_READ_ENTER, ip, - (void *)iovp, segs, *offset, ioflags); + trace_xfs_file_read(ip, size, *offset, ioflags); iocb->ki_pos = *offset; ret = generic_file_aio_read(iocb, iovp, segs, *offset); - if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) - ret = wait_on_sync_kiocb(iocb); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -294,8 +230,9 @@ xfs_splice_read( return -error; } } - xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, - pipe, count, *ppos, ioflags); + + trace_xfs_file_splice_read(ip, count, *ppos, ioflags); + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -344,8 +281,8 @@ xfs_splice_write( ip->i_new_size = new_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, - pipe, count, *ppos, ioflags); + trace_xfs_file_splice_write(ip, count, *ppos, ioflags); + ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); @@ -693,18 +630,9 @@ start: * by root. This keeps people from modifying setuid and * setgid binaries. */ - - if (((xip->i_d.di_mode & S_ISUID) || - ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP))) && - !capable(CAP_FSETID)) { - error = xfs_write_clear_setuid(xip); - if (likely(!error)) - error = -file_remove_suid(file); - if (unlikely(error)) { - goto out_unlock_internal; - } - } + error = -file_remove_suid(file); + if (unlikely(error)) + goto out_unlock_internal; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -712,8 +640,6 @@ start: if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); - xfs_inval_cached_trace(xip, pos, -1, - (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); @@ -730,8 +656,7 @@ start: need_i_mutex = 0; } - xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, - *offset, ioflags); + trace_xfs_file_direct_write(xip, count, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); @@ -754,8 +679,7 @@ start: ssize_t ret2 = 0; write_retry: - xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, - *offset, ioflags); + trace_xfs_file_buffered_write(xip, count, *offset, ioflags); ret2 = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); /* @@ -774,9 +698,6 @@ write_retry: current->backing_dev_info = NULL; - if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) - ret = wait_on_sync_kiocb(iocb); - isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; @@ -811,7 +732,7 @@ write_retry: XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error2; @@ -854,53 +775,6 @@ write_retry: } /* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb(struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - xfs_buftrace("XFS__BDSTRAT IOERROR", bp); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (XFS_BUF_IODONE_FUNC(bp) == NULL && - (XFS_BUF_ISREAD(bp)) == 0) - return (xfs_bioerror_relse(bp)); - else - return (xfs_bioerror(bp)); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - ASSERT(mp); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return; - } - - xfs_buftrace("XFSBDSTRAT IOERROR", bp); - xfs_bioerror_relse(bp); -} - -/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index e6be37dbd0e..342ae8c0d01 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -20,56 +20,8 @@ struct xfs_mount; struct xfs_inode; -struct xfs_bmbt_irec; struct xfs_buf; -struct xfs_iomap; -#if defined(XFS_RW_TRACE) -/* - * Defines for the trace mechanisms in xfs_lrw.c. - */ -#define XFS_RW_KTRACE_SIZE 128 - -#define XFS_READ_ENTER 1 -#define XFS_WRITE_ENTER 2 -#define XFS_IOMAP_READ_ENTER 3 -#define XFS_IOMAP_WRITE_ENTER 4 -#define XFS_IOMAP_READ_MAP 5 -#define XFS_IOMAP_WRITE_MAP 6 -#define XFS_IOMAP_WRITE_NOSPACE 7 -#define XFS_ITRUNC_START 8 -#define XFS_ITRUNC_FINISH1 9 -#define XFS_ITRUNC_FINISH2 10 -#define XFS_CTRUNC1 11 -#define XFS_CTRUNC2 12 -#define XFS_CTRUNC3 13 -#define XFS_CTRUNC4 14 -#define XFS_CTRUNC5 15 -#define XFS_CTRUNC6 16 -#define XFS_BUNMAP 17 -#define XFS_INVAL_CACHED 18 -#define XFS_DIORD_ENTER 19 -#define XFS_DIOWR_ENTER 20 -#define XFS_WRITEPAGE_ENTER 22 -#define XFS_RELEASEPAGE_ENTER 23 -#define XFS_INVALIDPAGE_ENTER 24 -#define XFS_IOMAP_ALLOC_ENTER 25 -#define XFS_IOMAP_ALLOC_MAP 26 -#define XFS_IOMAP_UNWRITTEN 27 -#define XFS_SPLICE_READ_ENTER 28 -#define XFS_SPLICE_WRITE_ENTER 29 -extern void xfs_rw_enter_trace(int, struct xfs_inode *, - void *, size_t, loff_t, int); -extern void xfs_inval_cached_trace(struct xfs_inode *, - xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); -#else -#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags) -#define xfs_inval_cached_trace(ip, offset, len, first, last) -#endif - -/* errors from xfsbdstrat() must be extracted from the buffer */ -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18a4b8e11df..25ea2408118 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -15,6 +15,7 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include "xfs.h" #include "xfs_bit.h" #include "xfs_log.h" @@ -52,11 +53,11 @@ #include "xfs_trans_priv.h" #include "xfs_filestream.h" #include "xfs_da_btree.h" -#include "xfs_dir2_trace.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" #include "xfs_sync.h" +#include "xfs_trace.h" #include <linux/namei.h> #include <linux/init.h> @@ -876,12 +877,11 @@ xfsaild( { struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; + long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - if (tout) - schedule_timeout_interruptible(msecs_to_jiffies(tout)); - tout = 1000; + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); @@ -930,13 +930,37 @@ xfs_fs_alloc_inode( */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) + struct inode *inode) { - xfs_inode_t *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(inode); + + xfs_itrace_entry(ip); XFS_STATS_INC(vn_reclaim); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); + + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + + xfs_ioend_wait(ip); + + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + + /* + * We should never get here with one of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + + /* + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. + */ +out_reclaim: + xfs_inode_set_reclaim_tag(ip); } /* @@ -973,7 +997,6 @@ xfs_fs_inode_init_once( mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); } /* @@ -998,12 +1021,45 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -/* - * Attempt to flush the inode, this will actually fail - * if the inode is pinned, but we dirty the inode again - * at the point when it is unpinned after a log write, - * since this is when the inode itself becomes flushable. - */ +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + STATIC int xfs_fs_write_inode( struct inode *inode, @@ -1011,7 +1067,7 @@ xfs_fs_write_inode( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; xfs_itrace_entry(ip); @@ -1022,35 +1078,55 @@ xfs_fs_write_inode( error = xfs_wait_on_pages(ip, 0, -1); if (error) goto out; - } - /* - * Bypass inodes which have already been cleaned by - * the inode flush clustering code inside xfs_iflush - */ - if (xfs_inode_clean(ip)) - goto out; - - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. - */ - if (sync) { + /* + * Make sure the inode has hit stable storage. By using the + * log and the fsync transactions we reduce the IOs we have + * to do here from two (log and inode) to just the log. + * + * Note: We still need to do a delwri write of the inode after + * this to flush it to the backing buffer so that bulkstat + * works properly if this is the first time the inode has been + * written. Because we hold the ilock atomically over the + * transaction commit and the inode flush we are guaranteed + * that the inode is not pinned when it returns. If the flush + * lock is already held, then the inode has already been + * flushed once and we don't need to flush it again. Hence + * the code will only flush the inode if it isn't already + * being flushed. + */ xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } } else { - error = EAGAIN; + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by xfs_sync. + */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + } + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); + /* + * Now we have the flush lock and the inode is not pinned, we can check + * if the inode is really clean as we know that there are no pending + * transaction completions, it is not waiting on the delayed write + * queue and there is no IO in progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; } + error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1075,6 +1151,20 @@ xfs_fs_clear_inode( XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); + /* + * The iolock is used by the file system to coordinate reads, + * writes, and block truncates. Up to this point the lock + * protected concurrent accesses by users of the inode. But + * from here forward we're doing some final processing of the + * inode because we're done with it, and although we reuse the + * iolock for protection it is really a distinct lock class + * (in the lockdep sense) from before. To keep lockdep happy + * (and basically indicate what we are doing), we explicitly + * re-init the iolock here. + */ + ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + xfs_inactive(ip); } @@ -1092,8 +1182,6 @@ xfs_fs_put_super( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); - struct xfs_inode *rip = mp->m_rootip; - int unmount_event_flags = 0; xfs_syncd_stop(mp); @@ -1109,20 +1197,7 @@ xfs_fs_put_super( xfs_sync_attr(mp, 0); } -#ifdef HAVE_DMAPI - if (mp->m_flags & XFS_MOUNT_DMAPI) { - unmount_event_flags = - (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? - 0 : DM_FLAGS_UNWANTED; - /* - * Ignore error from dmapi here, first unmount is not allowed - * to fail anyway, and second we wouldn't want to fail a - * unmount because of dmapi. - */ - XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, - NULL, NULL, 0, 0, unmount_event_flags); - } -#endif + XFS_SEND_PREUNMOUNT(mp); /* * Blow away any referenced inode in the filestreams cache. @@ -1133,10 +1208,7 @@ xfs_fs_put_super( XFS_bflush(mp->m_ddev_targp); - if (mp->m_flags & XFS_MOUNT_DMAPI) { - XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, - unmount_event_flags); - } + XFS_SEND_UNMOUNT(mp); xfs_unmountfs(mp); xfs_freesb(mp); @@ -1237,6 +1309,29 @@ xfs_fs_statfs( return 0; } +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1316,11 +1411,27 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1339,11 +1450,22 @@ xfs_fs_freeze( { struct xfs_mount *mp = XFS_M(sb); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); return -xfs_fs_log_dummy(mp); } STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) @@ -1504,8 +1626,6 @@ xfs_fs_fill_super( goto fail_vnrele; kfree(mtpt); - - xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); return 0; out_filestream_unmount: @@ -1567,6 +1687,7 @@ static const struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, @@ -1581,94 +1702,6 @@ static struct file_system_type xfs_fs_type = { }; STATIC int __init -xfs_alloc_trace_bufs(void) -{ -#ifdef XFS_ALLOC_TRACE - xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_alloc_trace_buf) - goto out; -#endif -#ifdef XFS_BMAP_TRACE - xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_bmap_trace_buf) - goto out_free_alloc_trace; -#endif -#ifdef XFS_BTREE_TRACE - xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE, - KM_MAYFAIL); - if (!xfs_allocbt_trace_buf) - goto out_free_bmap_trace; - - xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_inobt_trace_buf) - goto out_free_allocbt_trace; - - xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_bmbt_trace_buf) - goto out_free_inobt_trace; -#endif -#ifdef XFS_ATTR_TRACE - xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_attr_trace_buf) - goto out_free_bmbt_trace; -#endif -#ifdef XFS_DIR2_TRACE - xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL); - if (!xfs_dir2_trace_buf) - goto out_free_attr_trace; -#endif - - return 0; - -#ifdef XFS_DIR2_TRACE - out_free_attr_trace: -#endif -#ifdef XFS_ATTR_TRACE - ktrace_free(xfs_attr_trace_buf); - out_free_bmbt_trace: -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(xfs_bmbt_trace_buf); - out_free_inobt_trace: - ktrace_free(xfs_inobt_trace_buf); - out_free_allocbt_trace: - ktrace_free(xfs_allocbt_trace_buf); - out_free_bmap_trace: -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(xfs_bmap_trace_buf); - out_free_alloc_trace: -#endif -#ifdef XFS_ALLOC_TRACE - ktrace_free(xfs_alloc_trace_buf); - out: -#endif - return -ENOMEM; -} - -STATIC void -xfs_free_trace_bufs(void) -{ -#ifdef XFS_DIR2_TRACE - ktrace_free(xfs_dir2_trace_buf); -#endif -#ifdef XFS_ATTR_TRACE - ktrace_free(xfs_attr_trace_buf); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(xfs_bmbt_trace_buf); - ktrace_free(xfs_inobt_trace_buf); - ktrace_free(xfs_allocbt_trace_buf); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(xfs_bmap_trace_buf); -#endif -#ifdef XFS_ALLOC_TRACE - ktrace_free(xfs_alloc_trace_buf); -#endif -} - -STATIC int __init xfs_init_zones(void) { @@ -1809,7 +1842,6 @@ init_xfs_fs(void) printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); - ktrace_init(64); xfs_ioend_init(); xfs_dir_startup(); @@ -1817,13 +1849,9 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_alloc_trace_bufs(); - if (error) - goto out_destroy_zones; - error = xfs_mru_cache_init(); if (error) - goto out_free_trace_buffers; + goto out_destroy_zones; error = xfs_filestream_init(); if (error) @@ -1858,8 +1886,6 @@ init_xfs_fs(void) xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); - out_free_trace_buffers: - xfs_free_trace_bufs(); out_destroy_zones: xfs_destroy_zones(); out: @@ -1876,9 +1902,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); - xfs_free_trace_bufs(); xfs_destroy_zones(); - ktrace_uninit(); } module_init(init_xfs_fs); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 18175ebd58e..233d4b9881b 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -56,12 +56,6 @@ extern void xfs_qm_exit(void); # define XFS_BIGFS_STRING #endif -#ifdef CONFIG_XFS_TRACE -# define XFS_TRACE_STRING "tracing, " -#else -# define XFS_TRACE_STRING -#endif - #ifdef CONFIG_XFS_DMAPI # define XFS_DMAPI_STRING "dmapi support, " #else @@ -78,7 +72,6 @@ extern void xfs_qm_exit(void); XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ XFS_BIGFS_STRING \ - XFS_TRACE_STRING \ XFS_DMAPI_STRING \ XFS_DBG_STRING /* DBG must be last */ diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 961df0a22c7..a9f6d20aff4 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -44,6 +44,7 @@ #include "xfs_inode_item.h" #include "xfs_rw.h" #include "xfs_quota.h" +#include "xfs_trace.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -64,7 +65,6 @@ xfs_inode_ag_lookup( * as the tree is sparse and a gang lookup walks to find * the number of objects requested. */ - read_lock(&pag->pag_ici_lock); if (tag == XFS_ICI_NO_TAG) { nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void **)&ip, *first_index, 1); @@ -73,7 +73,7 @@ xfs_inode_ag_lookup( (void **)&ip, *first_index, 1, tag); } if (!nr_found) - goto unlock; + return NULL; /* * Update the index for the next lookup. Catch overflows @@ -83,25 +83,20 @@ xfs_inode_ag_lookup( */ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - goto unlock; - + return NULL; return ip; - -unlock: - read_unlock(&pag->pag_ici_lock); - return NULL; } STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, - xfs_agnumber_t ag, + struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive) { - struct xfs_perag *pag = &mp->m_perag[ag]; uint32_t first_index; int last_error = 0; int skipped; @@ -113,10 +108,20 @@ restart: int error = 0; xfs_inode_t *ip; + if (exclusive) + write_lock(&pag->pag_ici_lock); + else + read_lock(&pag->pag_ici_lock); ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) + if (!ip) { + if (exclusive) + write_unlock(&pag->pag_ici_lock); + else + read_unlock(&pag->pag_ici_lock); break; + } + /* execute releases pag->pag_ici_lock */ error = execute(ip, pag, flags); if (error == EAGAIN) { skipped++; @@ -124,9 +129,8 @@ restart: } if (error) last_error = error; - /* - * bail out if the filesystem is corrupted. - */ + + /* bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) break; @@ -136,8 +140,6 @@ restart: delay(1); goto restart; } - - xfs_put_perag(mp, pag); return last_error; } @@ -147,16 +149,24 @@ xfs_inode_ag_iterator( int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive) { int error = 0; int last_error = 0; xfs_agnumber_t ag; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - if (!mp->m_perag[ag].pag_ici_init) + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); + } + error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, + exclusive); + xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) @@ -173,30 +183,31 @@ xfs_sync_inode_valid( struct xfs_perag *pag) { struct inode *inode = VFS_I(ip); + int error = EFSCORRUPTED; /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - read_unlock(&pag->pag_ici_lock); - return EFSCORRUPTED; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + goto out_unlock; - /* - * If we can't get a reference on the inode, it must be in reclaim. - * Leave it for the reclaim code to flush. Also avoid inodes that - * haven't been fully initialised. - */ - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - return ENOENT; - } - read_unlock(&pag->pag_ici_lock); + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + error = ENOENT; + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock; - if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + goto out_unlock; + + if (is_bad_inode(inode)) { IRELE(ip); - return ENOENT; + goto out_unlock; } - return 0; + /* inode is valid */ + error = 0; +out_unlock: + read_unlock(&pag->pag_ici_lock); + return error; } STATIC int @@ -223,7 +234,7 @@ xfs_sync_inode_data( } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); + 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: @@ -259,8 +270,7 @@ xfs_sync_inode_attr( goto out_unlock; } - error = xfs_iflush(ip, (flags & SYNC_WAIT) ? - XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + error = xfs_iflush(ip, flags); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -281,14 +291,11 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0); if (error) return XFS_ERROR(error); - xfs_log_force(mp, 0, - (flags & SYNC_WAIT) ? - XFS_LOG_FORCE | XFS_LOG_SYNC : - XFS_LOG_FORCE); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return 0; } @@ -303,7 +310,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0); } STATIC int @@ -314,10 +321,6 @@ xfs_commit_dummy_trans( struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; - int log_flags = XFS_LOG_FORCE; - - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -339,11 +342,11 @@ xfs_commit_dummy_trans( xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, 0, log_flags); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return error; } -int +STATIC int xfs_sync_fsdata( struct xfs_mount *mp, int flags) @@ -359,7 +362,7 @@ xfs_sync_fsdata( if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + bp = xfs_getsb(mp, XBF_TRYLOCK); if (!bp) goto out; @@ -379,7 +382,7 @@ xfs_sync_fsdata( * become pinned in between there and here. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } @@ -440,9 +443,6 @@ xfs_quiesce_data( xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); - /* drop inode references pinned by filestreams */ - xfs_filestream_flush(mp); - /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, SYNC_WAIT); @@ -459,16 +459,18 @@ xfs_quiesce_fs( { int count = 0, pincount; + xfs_reclaim_inodes(mp, 0); xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop * will flush most meta data but that will generate more meta data * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. */ do { + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { @@ -567,7 +569,7 @@ xfs_flush_inodes( igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); wait_for_completion(&completion); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(ip->i_mount, XFS_LOG_SYNC); } /* @@ -583,8 +585,8 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_log_force(mp, 0); + xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); @@ -663,67 +665,6 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } -int -xfs_reclaim_inode( - xfs_inode_t *ip, - int locked, - int sync_mode) -{ - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ - write_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - if (locked) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - return -EAGAIN; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); - - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } - - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return 0; -} - void __xfs_inode_set_reclaim_tag( struct xfs_perag *pag, @@ -743,16 +684,17 @@ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } void @@ -765,20 +707,145 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned, delwri => requeue + * pinned, sync => unpin + * stale => reclaim + * clean => reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim + */ STATIC int -xfs_reclaim_inode_now( +xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, - int flags) + int sync_mode) { - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); + int error = 0; + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + */ + spin_lock(&ip->i_flags_lock); + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); return 0; } - read_unlock(&pag->pag_ici_lock); + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } + + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } + xfs_iunpin_wait(ip); + } + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* Now we have an inode that needs flushing */ + error = xfs_iflush(ip, sync_mode); + if (sync_mode & SYNC_WAIT) { + xfs_iflock(ip); + goto reclaim; + } + + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will relcaim the inode and + * pass on the error. + */ + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + +reclaim: + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return error; - return xfs_reclaim_inode(ip, 0, flags); } int @@ -786,6 +853,6 @@ xfs_reclaim_inodes( xfs_mount_t *mp, int mode) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, - XFS_ICI_RECLAIM_TAG); + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, + XFS_ICI_RECLAIM_TAG, 1); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 27920eb7a82..d480c346cab 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -37,14 +37,12 @@ void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync_attr(struct xfs_mount *mp, int flags); int xfs_sync_data(struct xfs_mount *mp, int flags); -int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); -int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); @@ -55,6 +53,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag); + int flags, int tag, int write_lock); #endif diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c new file mode 100644 index 00000000000..856eb3c8d60 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_leaf.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" +#include "xfs_quota.h" +#include "xfs_iomap.h" +#include "xfs_aops.h" +#include "quota/xfs_dquot_item.h" +#include "quota/xfs_dquot.h" + +/* + * Format fsblock number into a static buffer & return it. + */ +STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno) +{ + static char rval[50]; + + if (bno == NULLFSBLOCK) + sprintf(rval, "NULLFSBLOCK"); + else if (isnullstartblock(bno)) + sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno)); + else + sprintf(rval, "%lld", (xfs_dfsbno_t)bno); + return rval; +} + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "xfs_trace.h" diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h new file mode 100644 index 00000000000..a4574dcf506 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -0,0 +1,1503 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xfs + +#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_XFS_H + +#include <linux/tracepoint.h> + +struct xfs_agf; +struct xfs_alloc_arg; +struct xfs_attr_list_context; +struct xfs_buf_log_item; +struct xfs_da_args; +struct xfs_da_node_entry; +struct xfs_dquot; +struct xlog_ticket; +struct log; + +DECLARE_EVENT_CLASS(xfs_attr_list_class, + TP_PROTO(struct xfs_attr_list_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) + ) +) + +#define DEFINE_PERAG_REF_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_agnumber_t, agno) \ + __field(int, refcount) \ + __field(unsigned long, caller_ip) \ + ), \ + TP_fast_assign( \ + __entry->dev = mp->m_super->s_dev; \ + __entry->agno = agno; \ + __entry->refcount = refcount; \ + __entry->caller_ip = caller_ip; \ + ), \ + TP_printk("dev %d:%d agno %u refcount %d caller %pf", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->agno, \ + __entry->refcount, \ + (char *)__entry->caller_ip) \ +); + +DEFINE_PERAG_REF_EVENT(xfs_perag_get) +DEFINE_PERAG_REF_EVENT(xfs_perag_put) + +#define DEFINE_ATTR_LIST_EVENT(name) \ +DEFINE_EVENT(xfs_attr_list_class, name, \ + TP_PROTO(struct xfs_attr_list_context *ctx), \ + TP_ARGS(ctx)) +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); + +TRACE_EVENT(xfs_attr_list_node_descend, + TP_PROTO(struct xfs_attr_list_context *ctx, + struct xfs_da_node_entry *btree), + TP_ARGS(ctx, btree), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + __field(u32, bt_hashval) + __field(u32, bt_before) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + __entry->bt_hashval = be32_to_cpu(btree->hashval); + __entry->bt_before = be32_to_cpu(btree->before); + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s " + "node hashval %u, node before %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), + __entry->bt_hashval, + __entry->bt_before) +); + +TRACE_EVENT(xfs_iext_insert, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, + struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), + TP_ARGS(ip, idx, r, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r->br_startoff; + __entry->startblock = r->br_startblock; + __entry->blockcount = r->br_blockcount; + __entry->state = r->br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %s count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + xfs_fmtfsblock(__entry->startblock), + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_bmap_class, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, + unsigned long caller_ip), + TP_ARGS(ip, idx, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? + ip->i_afp : &ip->i_df; + struct xfs_bmbt_irec r; + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r.br_startoff; + __entry->startblock = r.br_startblock; + __entry->blockcount = r.br_blockcount; + __entry->state = r.br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %s count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + xfs_fmtfsblock(__entry->startblock), + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +) + +#define DEFINE_BMAP_EVENT(name) \ +DEFINE_EVENT(xfs_bmap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ + unsigned long caller_ip), \ + TP_ARGS(ip, idx, state, caller_ip)) +DEFINE_BMAP_EVENT(xfs_iext_remove); +DEFINE_BMAP_EVENT(xfs_bmap_pre_update); +DEFINE_BMAP_EVENT(xfs_bmap_post_update); +DEFINE_BMAP_EVENT(xfs_extlist); + +DECLARE_EVENT_CLASS(xfs_buf_class, + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), + TP_ARGS(bp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = xfs_buf_lock_value(bp); + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_EVENT(name) \ +DEFINE_EVENT(xfs_buf_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ + TP_ARGS(bp, caller_ip)) +DEFINE_BUF_EVENT(xfs_buf_init); +DEFINE_BUF_EVENT(xfs_buf_free); +DEFINE_BUF_EVENT(xfs_buf_hold); +DEFINE_BUF_EVENT(xfs_buf_rele); +DEFINE_BUF_EVENT(xfs_buf_pin); +DEFINE_BUF_EVENT(xfs_buf_unpin); +DEFINE_BUF_EVENT(xfs_buf_iodone); +DEFINE_BUF_EVENT(xfs_buf_iorequest); +DEFINE_BUF_EVENT(xfs_buf_bawrite); +DEFINE_BUF_EVENT(xfs_buf_bdwrite); +DEFINE_BUF_EVENT(xfs_buf_lock); +DEFINE_BUF_EVENT(xfs_buf_lock_done); +DEFINE_BUF_EVENT(xfs_buf_cond_lock); +DEFINE_BUF_EVENT(xfs_buf_unlock); +DEFINE_BUF_EVENT(xfs_buf_ordered_retry); +DEFINE_BUF_EVENT(xfs_buf_iowait); +DEFINE_BUF_EVENT(xfs_buf_iowait_done); +DEFINE_BUF_EVENT(xfs_buf_delwri_queue); +DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); +DEFINE_BUF_EVENT(xfs_buf_delwri_split); +DEFINE_BUF_EVENT(xfs_buf_get_noaddr); +DEFINE_BUF_EVENT(xfs_bdstrat_shut); +DEFINE_BUF_EVENT(xfs_buf_item_relse); +DEFINE_BUF_EVENT(xfs_buf_item_iodone); +DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); +DEFINE_BUF_EVENT(xfs_buf_error_relse); +DEFINE_BUF_EVENT(xfs_trans_read_buf_io); +DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); + +/* not really buffer traces, but the buf provides useful information */ +DEFINE_BUF_EVENT(xfs_btree_corrupt); +DEFINE_BUF_EVENT(xfs_da_btree_corrupt); +DEFINE_BUF_EVENT(xfs_reset_dqcounts); +DEFINE_BUF_EVENT(xfs_inode_item_push); + +/* pass flags explicitly */ +DECLARE_EVENT_CLASS(xfs_buf_flags_class, + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), + TP_ARGS(bp, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->flags = flags; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = xfs_buf_lock_value(bp); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_FLAGS_EVENT(name) \ +DEFINE_EVENT(xfs_buf_flags_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ + TP_ARGS(bp, flags, caller_ip)) +DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); + +TRACE_EVENT(xfs_buf_ioerror, + TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), + TP_ARGS(bp, error, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(unsigned, flags) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(int, error) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = xfs_buf_lock_value(bp); + __entry->error = error; + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d error %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __entry->error, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_buf_item_class, + TP_PROTO(struct xfs_buf_log_item *bip), + TP_ARGS(bip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, buf_bno) + __field(size_t, buf_len) + __field(int, buf_hold) + __field(int, buf_pincount) + __field(int, buf_lockval) + __field(unsigned, buf_flags) + __field(unsigned, bli_recur) + __field(int, bli_refcount) + __field(unsigned, bli_flags) + __field(void *, li_desc) + __field(unsigned, li_flags) + ), + TP_fast_assign( + __entry->dev = bip->bli_buf->b_target->bt_dev; + __entry->bli_flags = bip->bli_flags; + __entry->bli_recur = bip->bli_recur; + __entry->bli_refcount = atomic_read(&bip->bli_refcount); + __entry->buf_bno = bip->bli_buf->b_bn; + __entry->buf_len = bip->bli_buf->b_buffer_length; + __entry->buf_flags = bip->bli_buf->b_flags; + __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); + __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); + __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); + __entry->li_desc = bip->bli_item.li_desc; + __entry->li_flags = bip->bli_item.li_flags; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s recur %d refcount %d bliflags %s " + "lidesc 0x%p liflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->buf_bno, + __entry->buf_len, + __entry->buf_hold, + __entry->buf_pincount, + __entry->buf_lockval, + __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), + __entry->bli_recur, + __entry->bli_refcount, + __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), + __entry->li_desc, + __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_BUF_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_buf_item_class, name, \ + TP_PROTO(struct xfs_buf_log_item *bip), \ + TP_ARGS(bip)) +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); +DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); + +DECLARE_EVENT_CLASS(xfs_lock_class, + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, + unsigned long caller_ip), + TP_ARGS(ip, lock_flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, lock_flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lock_flags = lock_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_LOCK_EVENT(name) \ +DEFINE_EVENT(xfs_lock_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ + unsigned long caller_ip), \ + TP_ARGS(ip, lock_flags, caller_ip)) +DEFINE_LOCK_EVENT(xfs_ilock); +DEFINE_LOCK_EVENT(xfs_ilock_nowait); +DEFINE_LOCK_EVENT(xfs_ilock_demote); +DEFINE_LOCK_EVENT(xfs_iunlock); + +DECLARE_EVENT_CLASS(xfs_iget_class, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +) + +#define DEFINE_IGET_EVENT(name) \ +DEFINE_EVENT(xfs_iget_class, name, \ + TP_PROTO(struct xfs_inode *ip), \ + TP_ARGS(ip)) +DEFINE_IGET_EVENT(xfs_iget_skip); +DEFINE_IGET_EVENT(xfs_iget_reclaim); +DEFINE_IGET_EVENT(xfs_iget_found); +DEFINE_IGET_EVENT(xfs_iget_alloc); + +DECLARE_EVENT_CLASS(xfs_inode_class, + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), + TP_ARGS(ip, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, count) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->count, + (char *)__entry->caller_ip) +) + +#define DEFINE_INODE_EVENT(name) \ +DEFINE_EVENT(xfs_inode_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ + TP_ARGS(ip, caller_ip)) +DEFINE_INODE_EVENT(xfs_ihold); +DEFINE_INODE_EVENT(xfs_irele); +/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ +DEFINE_INODE_EVENT(xfs_inode); +#define xfs_itrace_entry(ip) \ + trace_xfs_inode(ip, _THIS_IP_) + +DECLARE_EVENT_CLASS(xfs_dquot_class, + TP_PROTO(struct xfs_dquot *dqp), + TP_ARGS(dqp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__be32, id) + __field(unsigned, flags) + __field(unsigned, nrefs) + __field(unsigned long long, res_bcount) + __field(unsigned long long, bcount) + __field(unsigned long long, icount) + __field(unsigned long long, blk_hardlimit) + __field(unsigned long long, blk_softlimit) + __field(unsigned long long, ino_hardlimit) + __field(unsigned long long, ino_softlimit) + ), \ + TP_fast_assign( + __entry->dev = dqp->q_mount->m_super->s_dev; + __entry->id = dqp->q_core.d_id; + __entry->flags = dqp->dq_flags; + __entry->nrefs = dqp->q_nrefs; + __entry->res_bcount = dqp->q_res_bcount; + __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); + __entry->icount = be64_to_cpu(dqp->q_core.d_icount); + __entry->blk_hardlimit = + be64_to_cpu(dqp->q_core.d_blk_hardlimit); + __entry->blk_softlimit = + be64_to_cpu(dqp->q_core.d_blk_softlimit); + __entry->ino_hardlimit = + be64_to_cpu(dqp->q_core.d_ino_hardlimit); + __entry->ino_softlimit = + be64_to_cpu(dqp->q_core.d_ino_softlimit); + ), + TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " + "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " + "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", + MAJOR(__entry->dev), MINOR(__entry->dev), + be32_to_cpu(__entry->id), + __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), + __entry->nrefs, + __entry->res_bcount, + __entry->bcount, + __entry->blk_hardlimit, + __entry->blk_softlimit, + __entry->icount, + __entry->ino_hardlimit, + __entry->ino_softlimit) +) + +#define DEFINE_DQUOT_EVENT(name) \ +DEFINE_EVENT(xfs_dquot_class, name, \ + TP_PROTO(struct xfs_dquot *dqp), \ + TP_ARGS(dqp)) +DEFINE_DQUOT_EVENT(xfs_dqadjust); +DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); +DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqattach_found); +DEFINE_DQUOT_EVENT(xfs_dqattach_get); +DEFINE_DQUOT_EVENT(xfs_dqinit); +DEFINE_DQUOT_EVENT(xfs_dqreuse); +DEFINE_DQUOT_EVENT(xfs_dqalloc); +DEFINE_DQUOT_EVENT(xfs_dqtobp_read); +DEFINE_DQUOT_EVENT(xfs_dqread); +DEFINE_DQUOT_EVENT(xfs_dqread_fail); +DEFINE_DQUOT_EVENT(xfs_dqlookup_found); +DEFINE_DQUOT_EVENT(xfs_dqlookup_want); +DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); +DEFINE_DQUOT_EVENT(xfs_dqlookup_move); +DEFINE_DQUOT_EVENT(xfs_dqlookup_done); +DEFINE_DQUOT_EVENT(xfs_dqget_hit); +DEFINE_DQUOT_EVENT(xfs_dqget_miss); +DEFINE_DQUOT_EVENT(xfs_dqput); +DEFINE_DQUOT_EVENT(xfs_dqput_wait); +DEFINE_DQUOT_EVENT(xfs_dqput_free); +DEFINE_DQUOT_EVENT(xfs_dqrele); +DEFINE_DQUOT_EVENT(xfs_dqflush); +DEFINE_DQUOT_EVENT(xfs_dqflush_force); +DEFINE_DQUOT_EVENT(xfs_dqflush_done); +/* not really iget events, but we re-use the format */ +DEFINE_IGET_EVENT(xfs_dquot_dqalloc); +DEFINE_IGET_EVENT(xfs_dquot_dqdetach); + +DECLARE_EVENT_CLASS(xfs_loggrant_class, + TP_PROTO(struct log *log, struct xlog_ticket *tic), + TP_ARGS(log, tic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned, trans_type) + __field(char, ocnt) + __field(char, cnt) + __field(int, curr_res) + __field(int, unit_res) + __field(unsigned int, flags) + __field(void *, reserve_headq) + __field(void *, write_headq) + __field(int, grant_reserve_cycle) + __field(int, grant_reserve_bytes) + __field(int, grant_write_cycle) + __field(int, grant_write_bytes) + __field(int, curr_cycle) + __field(int, curr_block) + __field(xfs_lsn_t, tail_lsn) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->trans_type = tic->t_trans_type; + __entry->ocnt = tic->t_ocnt; + __entry->cnt = tic->t_cnt; + __entry->curr_res = tic->t_curr_res; + __entry->unit_res = tic->t_unit_res; + __entry->flags = tic->t_flags; + __entry->reserve_headq = log->l_reserve_headq; + __entry->write_headq = log->l_write_headq; + __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; + __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; + __entry->grant_write_cycle = log->l_grant_write_cycle; + __entry->grant_write_bytes = log->l_grant_write_bytes; + __entry->curr_cycle = log->l_curr_cycle; + __entry->curr_block = log->l_curr_block; + __entry->tail_lsn = log->l_tail_lsn; + ), + TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " + "t_unit_res %u t_flags %s reserve_headq 0x%p " + "write_headq 0x%p grant_reserve_cycle %d " + "grant_reserve_bytes %d grant_write_cycle %d " + "grant_write_bytes %d curr_cycle %d curr_block %d " + "tail_cycle %d tail_block %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), + __entry->ocnt, + __entry->cnt, + __entry->curr_res, + __entry->unit_res, + __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), + __entry->reserve_headq, + __entry->write_headq, + __entry->grant_reserve_cycle, + __entry->grant_reserve_bytes, + __entry->grant_write_cycle, + __entry->grant_write_bytes, + __entry->curr_cycle, + __entry->curr_block, + CYCLE_LSN(__entry->tail_lsn), + BLOCK_LSN(__entry->tail_lsn) + ) +) + +#define DEFINE_LOGGRANT_EVENT(name) \ +DEFINE_EVENT(xfs_loggrant_class, name, \ + TP_PROTO(struct log *log, struct xlog_ticket *tic), \ + TP_ARGS(log, tic)) +DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); +DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); +DEFINE_LOGGRANT_EVENT(xfs_log_reserve); +DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); + +#define DEFINE_RW_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ + TP_ARGS(ip, count, offset, flags), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_ino_t, ino) \ + __field(xfs_fsize_t, size) \ + __field(xfs_fsize_t, new_size) \ + __field(loff_t, offset) \ + __field(size_t, count) \ + __field(int, flags) \ + ), \ + TP_fast_assign( \ + __entry->dev = VFS_I(ip)->i_sb->s_dev; \ + __entry->ino = ip->i_ino; \ + __entry->size = ip->i_d.di_size; \ + __entry->new_size = ip->i_new_size; \ + __entry->offset = offset; \ + __entry->count = count; \ + __entry->flags = flags; \ + ), \ + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ + "offset 0x%llx count 0x%zx ioflags %s", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->ino, \ + __entry->size, \ + __entry->new_size, \ + __entry->offset, \ + __entry->count, \ + __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \ +) +DEFINE_RW_EVENT(xfs_file_read); +DEFINE_RW_EVENT(xfs_file_buffered_write); +DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_splice_read); +DEFINE_RW_EVENT(xfs_file_splice_write); + + +#define DEFINE_PAGE_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ + TP_ARGS(inode, page, off), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_ino_t, ino) \ + __field(pgoff_t, pgoff) \ + __field(loff_t, size) \ + __field(unsigned long, offset) \ + __field(int, delalloc) \ + __field(int, unmapped) \ + __field(int, unwritten) \ + ), \ + TP_fast_assign( \ + int delalloc = -1, unmapped = -1, unwritten = -1; \ + \ + if (page_has_buffers(page)) \ + xfs_count_page_state(page, &delalloc, \ + &unmapped, &unwritten); \ + __entry->dev = inode->i_sb->s_dev; \ + __entry->ino = XFS_I(inode)->i_ino; \ + __entry->pgoff = page_offset(page); \ + __entry->size = i_size_read(inode); \ + __entry->offset = off; \ + __entry->delalloc = delalloc; \ + __entry->unmapped = unmapped; \ + __entry->unwritten = unwritten; \ + ), \ + TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ + "delalloc %d unmapped %d unwritten %d", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->ino, \ + __entry->pgoff, \ + __entry->size, \ + __entry->offset, \ + __entry->delalloc, \ + __entry->unmapped, \ + __entry->unwritten) \ +) +DEFINE_PAGE_EVENT(xfs_writepage); +DEFINE_PAGE_EVENT(xfs_releasepage); +DEFINE_PAGE_EVENT(xfs_invalidatepage); + +#define DEFINE_IOMAP_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ + int flags, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, flags, irec), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_ino_t, ino) \ + __field(loff_t, size) \ + __field(loff_t, new_size) \ + __field(loff_t, offset) \ + __field(size_t, count) \ + __field(int, flags) \ + __field(xfs_fileoff_t, startoff) \ + __field(xfs_fsblock_t, startblock) \ + __field(xfs_filblks_t, blockcount) \ + ), \ + TP_fast_assign( \ + __entry->dev = VFS_I(ip)->i_sb->s_dev; \ + __entry->ino = ip->i_ino; \ + __entry->size = ip->i_d.di_size; \ + __entry->new_size = ip->i_new_size; \ + __entry->offset = offset; \ + __entry->count = count; \ + __entry->flags = flags; \ + __entry->startoff = irec ? irec->br_startoff : 0; \ + __entry->startblock = irec ? irec->br_startblock : 0; \ + __entry->blockcount = irec ? irec->br_blockcount : 0; \ + ), \ + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ + "offset 0x%llx count %zd flags %s " \ + "startoff 0x%llx startblock %s blockcount 0x%llx", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->ino, \ + __entry->size, \ + __entry->new_size, \ + __entry->offset, \ + __entry->count, \ + __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ + __entry->startoff, \ + xfs_fmtfsblock(__entry->startblock), \ + __entry->blockcount) \ +) +DEFINE_IOMAP_EVENT(xfs_iomap_enter); +DEFINE_IOMAP_EVENT(xfs_iomap_found); +DEFINE_IOMAP_EVENT(xfs_iomap_alloc); + +#define DEFINE_SIMPLE_IO_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ + TP_ARGS(ip, offset, count), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_ino_t, ino) \ + __field(loff_t, size) \ + __field(loff_t, new_size) \ + __field(loff_t, offset) \ + __field(size_t, count) \ + ), \ + TP_fast_assign( \ + __entry->dev = VFS_I(ip)->i_sb->s_dev; \ + __entry->ino = ip->i_ino; \ + __entry->size = ip->i_d.di_size; \ + __entry->new_size = ip->i_new_size; \ + __entry->offset = offset; \ + __entry->count = count; \ + ), \ + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ + "offset 0x%llx count %zd", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->ino, \ + __entry->size, \ + __entry->new_size, \ + __entry->offset, \ + __entry->count) \ +); +DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); +DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); + + +TRACE_EVENT(xfs_itruncate_start, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag, + xfs_off_t toss_start, xfs_off_t toss_finish), + TP_ARGS(ip, new_size, flag, toss_start, toss_finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(xfs_off_t, toss_start) + __field(xfs_off_t, toss_finish) + __field(int, flag) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + __entry->toss_start = toss_start; + __entry->toss_finish = toss_finish; + __entry->flag = flag; + ), + TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx " + "toss start 0x%llx toss finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS), + __entry->size, + __entry->new_size, + __entry->toss_start, + __entry->toss_finish) +); + +DECLARE_EVENT_CLASS(xfs_itrunc_class, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), + TP_ARGS(ip, new_size), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size) +) + +#define DEFINE_ITRUNC_EVENT(name) \ +DEFINE_EVENT(xfs_itrunc_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ + TP_ARGS(ip, new_size)) +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); + +TRACE_EVENT(xfs_pagecache_inval, + TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), + TP_ARGS(ip, start, finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_off_t, start) + __field(xfs_off_t, finish) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->start = start; + __entry->finish = finish; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->start, + __entry->finish) +); + +TRACE_EVENT(xfs_bunmap, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, + int flags, unsigned long caller_ip), + TP_ARGS(ip, bno, len, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fileoff_t, bno) + __field(xfs_filblks_t, len) + __field(unsigned long, caller_ip) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->bno = bno; + __entry->len = len; + __entry->caller_ip = caller_ip; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" + "flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->bno, + __entry->len, + __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), + (void *)__entry->caller_ip) + +); + +TRACE_EVENT(xfs_alloc_busy, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, int slot), + TP_ARGS(mp, agno, agbno, len, slot), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(int, slot) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->slot = slot; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u slot %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->slot) + +); + +#define XFS_BUSY_STATES \ + { 0, "found" }, \ + { 1, "missing" } + +TRACE_EVENT(xfs_alloc_unbusy, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + int slot, int found), + TP_ARGS(mp, agno, slot, found), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, slot) + __field(int, found) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->slot = slot; + __entry->found = found; + ), + TP_printk("dev %d:%d agno %u slot %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->slot, + __print_symbolic(__entry->found, XFS_BUSY_STATES)) +); + +TRACE_EVENT(xfs_alloc_busysearch, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, xfs_lsn_t lsn), + TP_ARGS(mp, agno, agbno, len, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->lsn) +); + +TRACE_EVENT(xfs_agf, + TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, + unsigned long caller_ip), + TP_ARGS(mp, agf, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, flags) + __field(__u32, length) + __field(__u32, bno_root) + __field(__u32, cnt_root) + __field(__u32, bno_level) + __field(__u32, cnt_level) + __field(__u32, flfirst) + __field(__u32, fllast) + __field(__u32, flcount) + __field(__u32, freeblks) + __field(__u32, longest) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = be32_to_cpu(agf->agf_seqno), + __entry->flags = flags; + __entry->length = be32_to_cpu(agf->agf_length), + __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), + __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), + __entry->bno_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), + __entry->cnt_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), + __entry->flfirst = be32_to_cpu(agf->agf_flfirst), + __entry->fllast = be32_to_cpu(agf->agf_fllast), + __entry->flcount = be32_to_cpu(agf->agf_flcount), + __entry->freeblks = be32_to_cpu(agf->agf_freeblks), + __entry->longest = be32_to_cpu(agf->agf_longest); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " + "levels b %u c %u flfirst %u fllast %u flcount %u " + "freeblks %u longest %u caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), + __entry->length, + __entry->bno_root, + __entry->cnt_root, + __entry->bno_level, + __entry->cnt_level, + __entry->flfirst, + __entry->fllast, + __entry->flcount, + __entry->freeblks, + __entry->longest, + (void *)__entry->caller_ip) +); + +TRACE_EVENT(xfs_free_extent, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, bool isfl, int haveleft, int haveright), + TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(int, isfl) + __field(int, haveleft) + __field(int, haveright) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->isfl = isfl; + __entry->haveleft = haveleft; + __entry->haveright = haveright; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->isfl, + __entry->haveleft ? + (__entry->haveright ? "both" : "left") : + (__entry->haveright ? "right" : "none")) + +); + +DECLARE_EVENT_CLASS(xfs_alloc_class, + TP_PROTO(struct xfs_alloc_arg *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, minlen) + __field(xfs_extlen_t, maxlen) + __field(xfs_extlen_t, mod) + __field(xfs_extlen_t, prod) + __field(xfs_extlen_t, minleft) + __field(xfs_extlen_t, total) + __field(xfs_extlen_t, alignment) + __field(xfs_extlen_t, minalignslop) + __field(xfs_extlen_t, len) + __field(short, type) + __field(short, otype) + __field(char, wasdel) + __field(char, wasfromfl) + __field(char, isfl) + __field(char, userdata) + __field(xfs_fsblock_t, firstblock) + ), + TP_fast_assign( + __entry->dev = args->mp->m_super->s_dev; + __entry->agno = args->agno; + __entry->agbno = args->agbno; + __entry->minlen = args->minlen; + __entry->maxlen = args->maxlen; + __entry->mod = args->mod; + __entry->prod = args->prod; + __entry->minleft = args->minleft; + __entry->total = args->total; + __entry->alignment = args->alignment; + __entry->minalignslop = args->minalignslop; + __entry->len = args->len; + __entry->type = args->type; + __entry->otype = args->otype; + __entry->wasdel = args->wasdel; + __entry->wasfromfl = args->wasfromfl; + __entry->isfl = args->isfl; + __entry->userdata = args->userdata; + __entry->firstblock = args->firstblock; + ), + TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " + "prod %u minleft %u total %u alignment %u minalignslop %u " + "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " + "userdata %d firstblock 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->minlen, + __entry->maxlen, + __entry->mod, + __entry->prod, + __entry->minleft, + __entry->total, + __entry->alignment, + __entry->minalignslop, + __entry->len, + __print_symbolic(__entry->type, XFS_ALLOC_TYPES), + __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), + __entry->wasdel, + __entry->wasfromfl, + __entry->isfl, + __entry->userdata, + __entry->firstblock) +) + +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(xfs_alloc_class, name, \ + TP_PROTO(struct xfs_alloc_arg *args), \ + TP_ARGS(args)) +DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); +DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_near_first); +DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); +DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); +DEFINE_ALLOC_EVENT(xfs_alloc_near_error); +DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); +DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_size_done); +DEFINE_ALLOC_EVENT(xfs_alloc_size_error); +DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); +DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); +DEFINE_ALLOC_EVENT(xfs_alloc_small_done); +DEFINE_ALLOC_EVENT(xfs_alloc_small_error); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); + +DECLARE_EVENT_CLASS(xfs_dir2_class, + TP_PROTO(struct xfs_da_args *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __dynamic_array(char, name, args->namelen) + __field(int, namelen) + __field(xfs_dahash_t, hashval) + __field(xfs_ino_t, inumber) + __field(int, op_flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + if (args->namelen) + memcpy(__get_str(name), args->name, args->namelen); + __entry->namelen = args->namelen; + __entry->hashval = args->hashval; + __entry->inumber = args->inumber; + __entry->op_flags = args->op_flags; + ), + TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " + "inumber 0x%llx op_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->namelen, + __entry->namelen ? __get_str(name) : NULL, + __entry->namelen, + __entry->hashval, + __entry->inumber, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) +) + +#define DEFINE_DIR2_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_class, name, \ + TP_PROTO(struct xfs_da_args *args), \ + TP_ARGS(args)) +DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_sf_create); +DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); +DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_block_addname); +DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_block_replace); +DEFINE_DIR2_EVENT(xfs_dir2_block_removename); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); +DEFINE_DIR2_EVENT(xfs_dir2_node_addname); +DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_node_replace); +DEFINE_DIR2_EVENT(xfs_dir2_node_removename); +DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); + +DECLARE_EVENT_CLASS(xfs_dir2_space_class, + TP_PROTO(struct xfs_da_args *args, int idx), + TP_ARGS(args, idx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, idx) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->idx = idx; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->idx) +) + +#define DEFINE_DIR2_SPACE_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_space_class, name, \ + TP_PROTO(struct xfs_da_args *args, int idx), \ + TP_ARGS(args, idx)) +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); + +TRACE_EVENT(xfs_dir2_leafn_moveents, + TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), + TP_ARGS(args, src_idx, dst_idx, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, src_idx) + __field(int, dst_idx) + __field(int, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->src_idx = src_idx; + __entry->dst_idx = dst_idx; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s " + "src_idx %d dst_idx %d count %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->src_idx, + __entry->dst_idx, + __entry->count) +); + +#define XFS_SWAPEXT_INODES \ + { 0, "target" }, \ + { 1, "temp" } + +#define XFS_INODE_FORMAT_STR \ + { 0, "invalid" }, \ + { 1, "local" }, \ + { 2, "extent" }, \ + { 3, "btree" } + +DECLARE_EVENT_CLASS(xfs_swap_extent_class, + TP_PROTO(struct xfs_inode *ip, int which), + TP_ARGS(ip, which), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, which) + __field(xfs_ino_t, ino) + __field(int, format) + __field(int, nex) + __field(int, max_nex) + __field(int, broot_size) + __field(int, fork_off) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->which = which; + __entry->ino = ip->i_ino; + __entry->format = ip->i_d.di_format; + __entry->nex = ip->i_d.di_nextents; + __entry->max_nex = ip->i_df.if_ext_max; + __entry->broot_size = ip->i_df.if_broot_bytes; + __entry->fork_off = XFS_IFORK_BOFF(ip); + ), + TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " + "Max in-fork extents %d, broot size %d, fork offset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), + __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), + __entry->nex, + __entry->max_nex, + __entry->broot_size, + __entry->fork_off) +) + +#define DEFINE_SWAPEXT_EVENT(name) \ +DEFINE_EVENT(xfs_swap_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, int which), \ + TP_ARGS(ip, which)) + +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); + +#endif /* _TRACE_XFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE xfs_trace +#include <trace/define_trace.h> diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index ad7fbead4c9..7c220b4227b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -36,10 +36,13 @@ struct attrlist_cursor_kern; /* * Flags for read/write calls - same values as IRIX */ -#define IO_ISAIO 0x00001 /* don't wait for completion */ #define IO_ISDIRECT 0x00004 /* bypass page cache */ #define IO_INVIS 0x00020 /* don't update inode timestamps */ +#define XFS_IO_FLAGS \ + { IO_ISDIRECT, "DIRECT" }, \ + { IO_INVIS, "INVIS"} + /* * Flush/Invalidate options for vop_toss/flush/flushinval_pages. */ diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index 497c7fb75cc..fa01b9daba6 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -30,10 +30,10 @@ static int -__xfs_xattr_get(struct inode *inode, const char *name, +xfs_xattr_get(struct dentry *dentry, const char *name, void *value, size_t size, int xflags) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(dentry->d_inode); int error, asize = size; if (strcmp(name, "") == 0) @@ -45,17 +45,17 @@ __xfs_xattr_get(struct inode *inode, const char *name, value = NULL; } - error = -xfs_attr_get(ip, name, value, &asize, xflags); + error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); if (error) return error; return asize; } static int -__xfs_xattr_set(struct inode *inode, const char *name, const void *value, +xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags, int xflags) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(dentry->d_inode); if (strcmp(name, "") == 0) return -EINVAL; @@ -67,79 +67,39 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value, xflags |= ATTR_REPLACE; if (!value) - return -xfs_attr_remove(ip, name, xflags); - return -xfs_attr_set(ip, name, (void *)value, size, xflags); -} - -static int -xfs_xattr_user_get(struct inode *inode, const char *name, - void *value, size_t size) -{ - return __xfs_xattr_get(inode, name, value, size, 0); -} - -static int -xfs_xattr_user_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return __xfs_xattr_set(inode, name, value, size, flags, 0); + return -xfs_attr_remove(ip, (unsigned char *)name, xflags); + return -xfs_attr_set(ip, (unsigned char *)name, + (void *)value, size, xflags); } static struct xattr_handler xfs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .get = xfs_xattr_user_get, - .set = xfs_xattr_user_set, + .flags = 0, /* no flags implies user namespace */ + .get = xfs_xattr_get, + .set = xfs_xattr_set, }; - -static int -xfs_xattr_trusted_get(struct inode *inode, const char *name, - void *value, size_t size) -{ - return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT); -} - -static int -xfs_xattr_trusted_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT); -} - static struct xattr_handler xfs_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .get = xfs_xattr_trusted_get, - .set = xfs_xattr_trusted_set, + .flags = ATTR_ROOT, + .get = xfs_xattr_get, + .set = xfs_xattr_set, }; - -static int -xfs_xattr_secure_get(struct inode *inode, const char *name, - void *value, size_t size) -{ - return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE); -} - -static int -xfs_xattr_secure_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE); -} - static struct xattr_handler xfs_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .get = xfs_xattr_secure_get, - .set = xfs_xattr_secure_set, + .flags = ATTR_SECURE, + .get = xfs_xattr_get, + .set = xfs_xattr_set, }; - struct xattr_handler *xfs_xattr_handlers[] = { &xfs_xattr_user_handler, &xfs_xattr_trusted_handler, &xfs_xattr_security_handler, #ifdef CONFIG_XFS_POSIX_ACL - &xfs_xattr_system_handler, + &xfs_xattr_acl_access_handler, + &xfs_xattr_acl_default_handler, #endif NULL }; @@ -165,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags) } static int -xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { unsigned int prefix_len = xfs_xattr_prefix_len(flags); char *offset; @@ -189,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, offset = (char *)context->alist + context->count; strncpy(offset, xfs_xattr_prefix(flags), prefix_len); offset += prefix_len; - strncpy(offset, name, namelen); /* real name */ + strncpy(offset, (char *)name, namelen); /* real name */ offset += namelen; *offset = '\0'; context->count += prefix_len + namelen + 1; @@ -197,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, } static int -xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent_sizes( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { context->count += xfs_xattr_prefix_len(flags) + namelen + 1; return 0; diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 2f3f2229eaa..5f79dd78626 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -47,6 +47,7 @@ #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" +#include "xfs_trace.h" /* @@ -112,10 +113,7 @@ xfs_qm_dqinit( init_completion(&dqp->q_flush); complete(&dqp->q_flush); -#ifdef XFS_DQUOT_TRACE - dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS); - xfs_dqtrace_entry(dqp, "DQINIT"); -#endif + trace_xfs_dqinit(dqp); } else { /* * Only the q_core portion was zeroed in dqreclaim_one(). @@ -136,10 +134,7 @@ xfs_qm_dqinit( dqp->q_hash = NULL; ASSERT(dqp->dq_flnext == dqp->dq_flprev); -#ifdef XFS_DQUOT_TRACE - ASSERT(dqp->q_trace); - xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT"); -#endif + trace_xfs_dqreuse(dqp); } /* @@ -167,13 +162,8 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); sv_destroy(&dqp->q_pinwait); - -#ifdef XFS_DQUOT_TRACE - if (dqp->q_trace) - ktrace_free(dqp->q_trace); - dqp->q_trace = NULL; -#endif kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); + atomic_dec(&xfs_Gqm->qm_totaldquots); } @@ -195,49 +185,6 @@ xfs_qm_dqinit_core( d->dd_diskdq.d_flags = type; } - -#ifdef XFS_DQUOT_TRACE -/* - * Dquot tracing for debugging. - */ -/* ARGSUSED */ -void -__xfs_dqtrace_entry( - xfs_dquot_t *dqp, - char *func, - void *retaddr, - xfs_inode_t *ip) -{ - xfs_dquot_t *udqp = NULL; - xfs_ino_t ino = 0; - - ASSERT(dqp->q_trace); - if (ip) { - ino = ip->i_ino; - udqp = ip->i_udquot; - } - ktrace_enter(dqp->q_trace, - (void *)(__psint_t)DQUOT_KTRACE_ENTRY, - (void *)func, - (void *)(__psint_t)dqp->q_nrefs, - (void *)(__psint_t)dqp->dq_flags, - (void *)(__psint_t)dqp->q_res_bcount, - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount), - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount), - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit), - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit), - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit), - (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit), - (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id), - (void *)(__psint_t)current_pid(), - (void *)(__psint_t)ino, - (void *)(__psint_t)retaddr, - (void *)(__psint_t)udqp); - return; -} -#endif - - /* * If default limits are in force, push them into the dquot now. * We overwrite the dquot limits only if they are zero and this @@ -425,7 +372,8 @@ xfs_qm_dqalloc( xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); - xfs_dqtrace_entry(dqp, "DQALLOC"); + + trace_xfs_dqalloc(dqp); /* * Initialize the bmap freelist prior to calling bmapi code. @@ -612,7 +560,8 @@ xfs_qm_dqtobp( * (in which case we already have the buf). */ if (! newdquot) { - xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); + trace_xfs_dqtobp_read(dqp); + if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(mp), @@ -670,11 +619,12 @@ xfs_qm_dqread( ASSERT(tpp); + trace_xfs_dqread(dqp); + /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ - xfs_dqtrace_entry(dqp, "DQREAD"); if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { return (error); } @@ -763,7 +713,7 @@ xfs_qm_idtodq( * or if the dquot didn't exist on disk and we ask to * allocate (ENOENT). */ - xfs_dqtrace_entry(dqp, "DQREAD FAIL"); + trace_xfs_dqread_fail(dqp); cancelflags |= XFS_TRANS_ABORT; goto error0; } @@ -817,7 +767,8 @@ xfs_qm_dqlookup( * id can't be modified without the hashlock anyway. */ if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { - xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP"); + trace_xfs_dqlookup_found(dqp); + /* * All in core dquots must be on the dqlist of mp */ @@ -827,7 +778,7 @@ xfs_qm_dqlookup( if (dqp->q_nrefs == 0) { ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { - xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT"); + trace_xfs_dqlookup_want(dqp); /* * We may have raced with dqreclaim_one() @@ -857,8 +808,7 @@ xfs_qm_dqlookup( /* * take it off the freelist */ - xfs_dqtrace_entry(dqp, - "DQLOOKUP: TAKEOFF FL"); + trace_xfs_dqlookup_freelist(dqp); XQM_FREELIST_REMOVE(dqp); /* xfs_qm_freelist_print(&(xfs_Gqm-> qm_dqfreelist), @@ -878,8 +828,7 @@ xfs_qm_dqlookup( */ ASSERT(mutex_is_locked(&qh->qh_lock)); if (dqp->HL_PREVP != &qh->qh_next) { - xfs_dqtrace_entry(dqp, - "DQLOOKUP: HASH MOVETOFRONT"); + trace_xfs_dqlookup_move(dqp); if ((d = dqp->HL_NEXT)) d->HL_PREVP = dqp->HL_PREVP; *(dqp->HL_PREVP) = d; @@ -889,7 +838,7 @@ xfs_qm_dqlookup( dqp->HL_PREVP = &qh->qh_next; qh->qh_next = dqp; } - xfs_dqtrace_entry(dqp, "LOOKUP END"); + trace_xfs_dqlookup_done(dqp); *O_dqpp = dqp; ASSERT(mutex_is_locked(&qh->qh_lock)); return (0); @@ -971,7 +920,7 @@ xfs_qm_dqget( ASSERT(*O_dqpp); ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); mutex_unlock(&h->qh_lock); - xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); + trace_xfs_dqget_hit(*O_dqpp); return (0); /* success */ } XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); @@ -1104,7 +1053,7 @@ xfs_qm_dqget( mutex_unlock(&h->qh_lock); dqret: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); - xfs_dqtrace_entry(dqp, "DQGET DONE"); + trace_xfs_dqget_miss(dqp); *O_dqpp = dqp; return (0); } @@ -1124,7 +1073,8 @@ xfs_qm_dqput( ASSERT(dqp->q_nrefs > 0); ASSERT(XFS_DQ_IS_LOCKED(dqp)); - xfs_dqtrace_entry(dqp, "DQPUT"); + + trace_xfs_dqput(dqp); if (dqp->q_nrefs != 1) { dqp->q_nrefs--; @@ -1137,7 +1087,7 @@ xfs_qm_dqput( * in the right order; but try to get it out-of-order first */ if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { - xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT"); + trace_xfs_dqput_wait(dqp); xfs_dqunlock(dqp); xfs_qm_freelist_lock(xfs_Gqm); xfs_dqlock(dqp); @@ -1148,7 +1098,8 @@ xfs_qm_dqput( /* We can't depend on nrefs being == 1 here */ if (--dqp->q_nrefs == 0) { - xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST"); + trace_xfs_dqput_free(dqp); + /* * insert at end of the freelist. */ @@ -1196,7 +1147,7 @@ xfs_qm_dqrele( if (!dqp) return; - xfs_dqtrace_entry(dqp, "DQRELE"); + trace_xfs_dqrele(dqp); xfs_dqlock(dqp); /* @@ -1229,14 +1180,14 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); - xfs_dqtrace_entry(dqp, "DQFLUSH"); + trace_xfs_dqflush(dqp); /* * If not dirty, or it's pinned and we are not supposed to * block, nada. */ if (!XFS_DQ_IS_DIRTY(dqp) || - (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { + (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { xfs_dqfunlock(dqp); return 0; } @@ -1259,7 +1210,6 @@ xfs_qm_dqflush( * the ondisk-dquot has already been allocated for. */ if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { - xfs_dqtrace_entry(dqp, "DQTOBP FAIL"); ASSERT(error != ENOENT); /* * Quotas could have gotten turned off (ESRCH) @@ -1297,22 +1247,21 @@ xfs_qm_dqflush( * get stuck waiting in the write for too long. */ if (XFS_BUF_ISPINNED(bp)) { - xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + trace_xfs_dqflush_force(dqp); + xfs_log_force(mp, 0); } - if (flags & XFS_QMOPT_DELWRI) { - xfs_bdwrite(mp, bp); - } else if (flags & XFS_QMOPT_ASYNC) { - error = xfs_bawrite(mp, bp); - } else { + if (flags & SYNC_WAIT) error = xfs_bwrite(mp, bp); - } - xfs_dqtrace_entry(dqp, "DQFLUSH END"); + else + xfs_bdwrite(mp, bp); + + trace_xfs_dqflush_done(dqp); + /* * dqp is still locked, but caller is free to unlock it now. */ - return (error); + return error; } @@ -1483,7 +1432,7 @@ xfs_qm_dqpurge( */ if (XFS_DQ_IS_DIRTY(dqp)) { int error; - xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); + /* dqflush unlocks dqflock */ /* * Given that dqpurge is a very rare occurrence, it is OK @@ -1493,7 +1442,7 @@ xfs_qm_dqpurge( * We don't care about getting disk errors here. We need * to purge this dquot anyway, so we go ahead regardless. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); + error = xfs_qm_dqflush(dqp, SYNC_WAIT); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_qm_dqpurge: dquot %p flush failed", dqp); @@ -1577,25 +1526,17 @@ xfs_qm_dqflock_pushbuf_wait( * the flush lock when the I/O completes. */ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, - XFS_QI_DQCHUNKLEN(dqp->q_mount), - XFS_INCORE_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - int error; - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(dqp->q_mount, - (xfs_lsn_t)0, - XFS_LOG_FORCE); - } - error = xfs_bawrite(dqp->q_mount, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, dqp->q_mount, - "xfs_qm_dqflock_pushbuf_wait: " - "pushbuf error %d on dqp %p, bp %p", - error, dqp, bp); - } else { - xfs_buf_relse(bp); - } + XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); + if (!bp) + goto out_lock; + + if (XFS_BUF_ISDELAYWRITE(bp)) { + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(dqp->q_mount, 0); + xfs_buf_delwri_promote(bp); + wake_up_process(bp->b_target->bt_task); } + xfs_buf_relse(bp); +out_lock: xfs_dqflock(dqp); } diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 6533ead9b88..a0f7da586d1 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -85,9 +85,6 @@ typedef struct xfs_dquot { struct completion q_flush; /* flush completion queue */ atomic_t q_pincount; /* dquot pin count */ wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ -#ifdef XFS_DQUOT_TRACE - struct ktrace *q_trace; /* trace header structure */ -#endif } xfs_dquot_t; @@ -98,7 +95,7 @@ typedef struct xfs_dquot { #define dq_flags q_lists.dqm_flags /* - * Lock hierachy for q_qlock: + * Lock hierarchy for q_qlock: * XFS_QLOCK_NORMAL is the implicit default, * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 */ @@ -144,24 +141,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ (XFS_IS_OQUOTA_ON((d)->q_mount)))) -#ifdef XFS_DQUOT_TRACE -/* - * Dquot Tracing stuff. - */ -#define DQUOT_TRACE_SIZE 64 -#define DQUOT_KTRACE_ENTRY 1 - -extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func, - void *, xfs_inode_t *); -#define xfs_dqtrace_entry_ino(a,b,ip) \ - __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip)) -#define xfs_dqtrace_entry(a,b) \ - __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL) -#else -#define xfs_dqtrace_entry(a,b) -#define xfs_dqtrace_entry_ino(a,b,ip) -#endif - #ifdef QUOTADEBUG extern void xfs_qm_dqprint(xfs_dquot_t *); #else diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index d0d4a9a0bbd..4e4ee9a5719 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -74,11 +74,11 @@ xfs_qm_dquot_logitem_format( logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; logvec->i_len = sizeof(xfs_dq_logformat_t); - XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); + logvec->i_type = XLOG_REG_TYPE_QFORMAT; logvec++; logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; logvec->i_len = sizeof(xfs_disk_dquot_t); - XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); + logvec->i_type = XLOG_REG_TYPE_DQUOT; ASSERT(2 == logitem->qli_item.li_desc->lid_size); logitem->qli_format.qlf_size = 2; @@ -153,7 +153,7 @@ xfs_qm_dquot_logitem_push( * lock without sleeping, then there must not have been * anyone in the process of flushing the dquot. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dquot_logitem_push: push error %d on dqp %p", @@ -190,7 +190,7 @@ xfs_qm_dqunpin_wait( /* * Give the log a push so we don't wait here too long. */ - xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(dqp->q_mount, 0); wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } @@ -212,68 +212,31 @@ xfs_qm_dquot_logitem_pushbuf( xfs_dquot_t *dqp; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* - * The qli_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(qip->qli_pushbuf_flag != 0); - ASSERT(qip->qli_push_owner == current_pid()); - - /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. * So, just get out. */ if (completion_done(&dqp->q_flush) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { - qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); return; } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, - XFS_QI_DQCHUNKLEN(mp), - XFS_INCORE_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&dqp->q_flush)); - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE); - } - if (dopush) { - int error; -#ifdef XFSRACEDEBUG - delay_for_intr(); - delay(300); -#endif - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", - error, qip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - xfs_buf_relse(bp); - } + XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); + xfs_dqunlock(dqp); + if (!bp) return; - } + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); + return; - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); } /* @@ -291,50 +254,24 @@ xfs_qm_dquot_logitem_trylock( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; - uint retval; dqp = qip->qli_dquot; if (atomic_read(&dqp->q_pincount) > 0) - return (XFS_ITEM_PINNED); + return XFS_ITEM_PINNED; if (! xfs_qm_dqlock_nowait(dqp)) - return (XFS_ITEM_LOCKED); + return XFS_ITEM_LOCKED; - retval = XFS_ITEM_SUCCESS; if (!xfs_dqflock_nowait(dqp)) { /* - * The dquot is already being flushed. It may have been - * flushed delayed write, however, and we don't want to - * get stuck waiting for that to complete. So, we want to check - * to see if we can lock the dquot's buffer without sleeping. - * If we can and it is marked for delayed write, then we - * hold it and send it out from the push routine. We don't - * want to do that now since we might sleep in the device - * strategy routine. We also don't want to grab the buffer lock - * here because we'd like not to call into the buffer cache - * while holding the AIL lock. - * Make sure to only return PUSHBUF if we set pushbuf_flag - * ourselves. If someone else is doing it then we don't - * want to go to the push routine and duplicate their efforts. + * dquot has already been flushed to the backing buffer, + * leave it locked, pushbuf routine will unlock it. */ - if (qip->qli_pushbuf_flag == 0) { - qip->qli_pushbuf_flag = 1; - ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); -#ifdef DEBUG - qip->qli_push_owner = current_pid(); -#endif - /* - * The dquot is left locked. - */ - retval = XFS_ITEM_PUSHBUF; - } else { - retval = XFS_ITEM_FLUSHING; - xfs_dqunlock_nonotify(dqp); - } + return XFS_ITEM_PUSHBUF; } ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); - return (retval); + return XFS_ITEM_SUCCESS; } @@ -467,7 +404,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); log_vector->i_len = sizeof(xfs_qoff_logitem_t); - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); + log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; qf->qql_format.qf_size = 1; } diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h index 5a632531f84..5acae2ada70 100644 --- a/fs/xfs/quota/xfs_dquot_item.h +++ b/fs/xfs/quota/xfs_dquot_item.h @@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem { xfs_log_item_t qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */ -#ifdef DEBUG - uint64_t qli_push_owner; -#endif xfs_dq_logformat_t qli_format; /* logged structure */ } xfs_dq_logitem_t; diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 45b1bfef738..417e61e3d9d 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -47,6 +47,7 @@ #include "xfs_trans_space.h" #include "xfs_utils.h" #include "xfs_qm.h" +#include "xfs_trace.h" /* * The global quota manager. There is only one of these for the entire @@ -117,9 +118,14 @@ xfs_Gqm_init(void) */ udqhash = kmem_zalloc_greedy(&hsize, XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), - XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), - KM_SLEEP | KM_MAYFAIL | KM_LARGE); - gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; + + gdqhash = kmem_zalloc_large(hsize); + if (!gdqhash) + goto out_free_udqhash; + hsize /= sizeof(xfs_dqhash_t); ndquot = hsize << 8; @@ -169,6 +175,11 @@ xfs_Gqm_init(void) mutex_init(&qcheck_lock); #endif return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; } /* @@ -188,8 +199,8 @@ xfs_qm_destroy( xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); } - kmem_free(xqm->qm_usr_dqhtable); - kmem_free(xqm->qm_grp_dqhtable); + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); xqm->qm_usr_dqhtable = NULL; xqm->qm_grp_dqhtable = NULL; xqm->qm_dqhashmask = 0; @@ -218,8 +229,12 @@ xfs_qm_hold_quotafs_ref( */ mutex_lock(&xfs_Gqm_lock); - if (xfs_Gqm == NULL) + if (!xfs_Gqm) { xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) + return ENOMEM; + } + /* * We can keep a list of all filesystems with quotas mounted for * debugging and statistical purposes, but ... @@ -435,7 +450,7 @@ xfs_qm_unmount_quotas( STATIC int xfs_qm_dqflush_all( xfs_mount_t *mp, - int flags) + int sync_mode) { int recl; xfs_dquot_t *dqp; @@ -453,7 +468,7 @@ again: xfs_dqunlock(dqp); continue; } - xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); + /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); if (!xfs_dqflock_nowait(dqp)) { @@ -471,7 +486,7 @@ again: * across a disk write. */ xfs_qm_mplist_unlock(mp); - error = xfs_qm_dqflush(dqp, flags); + error = xfs_qm_dqflush(dqp, sync_mode); xfs_dqunlock(dqp); if (error) return error; @@ -651,7 +666,7 @@ xfs_qm_dqattach_one( */ dqp = *IO_idqpp; if (dqp) { - xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); + trace_xfs_dqattach_found(dqp); return 0; } @@ -704,7 +719,7 @@ xfs_qm_dqattach_one( if (error) return error; - xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); + trace_xfs_dqattach_get(dqp); /* * dqget may have dropped and re-acquired the ilock, but it guarantees @@ -890,15 +905,15 @@ xfs_qm_dqdetach( if (!(ip->i_udquot || ip->i_gdquot)) return; + trace_xfs_dquot_dqdetach(ip); + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); if (ip->i_udquot) { - xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } if (ip->i_gdquot) { - xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } @@ -911,13 +926,11 @@ xfs_qm_sync( { int recl, restarts; xfs_dquot_t *dqp; - uint flush_flags; int error; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI; restarts = 0; again: @@ -977,8 +990,7 @@ xfs_qm_sync( * across a disk write */ xfs_qm_mplist_unlock(mp); - xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); - error = xfs_qm_dqflush(dqp, flush_flags); + error = xfs_qm_dqflush(dqp, flags); xfs_dqunlock(dqp); if (error && XFS_FORCED_SHUTDOWN(mp)) return 0; /* Need to prevent umount failure */ @@ -1350,7 +1362,8 @@ xfs_qm_reset_dqcounts( xfs_disk_dquot_t *ddq; int j; - xfs_buftrace("RESET DQUOTS", bp); + trace_xfs_reset_dqcounts(bp, _RET_IP_); + /* * Reset all counters and timers. They'll be * started afresh by xfs_qm_quotacheck. @@ -1543,7 +1556,9 @@ xfs_qm_quotacheck_dqadjust( xfs_qcnt_t rtblks) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); - xfs_dqtrace_entry(dqp, "QCHECK DQADJUST"); + + trace_xfs_dqadjust(dqp); + /* * Adjust the inode count and the block count to reflect this inode's * resource usage. @@ -1779,7 +1794,7 @@ xfs_qm_quotacheck( * successfully. */ if (!error) - error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush_all(mp, 0); /* * We can get this error if we couldn't do a dquot allocation inside @@ -1994,12 +2009,14 @@ xfs_qm_shake_freelist( */ if (XFS_DQ_IS_DIRTY(dqp)) { int error; - xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); + + trace_xfs_dqshake_dirty(dqp); + /* * We flush it delayed write, so don't bother * releasing the mplock. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dqflush_all: dquot %p flush failed", dqp); @@ -2038,7 +2055,9 @@ xfs_qm_shake_freelist( return nreclaimed; goto tryagain; } - xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); + + trace_xfs_dqshake_unlink(dqp); + #ifdef QUOTADEBUG cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", dqp, be32_to_cpu(dqp->q_core.d_id)); @@ -2125,7 +2144,9 @@ xfs_qm_dqreclaim_one(void) */ if (dqp->dq_flags & XFS_DQ_WANT) { ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT"); + + trace_xfs_dqreclaim_want(dqp); + xfs_dqunlock(dqp); xfs_qm_freelist_unlock(xfs_Gqm); if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) @@ -2171,12 +2192,14 @@ xfs_qm_dqreclaim_one(void) */ if (XFS_DQ_IS_DIRTY(dqp)) { int error; - xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); + + trace_xfs_dqreclaim_dirty(dqp); + /* * We flush it delayed write, so don't bother * releasing the freelist lock. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dqreclaim: dquot %p flush failed", dqp); @@ -2194,8 +2217,9 @@ xfs_qm_dqreclaim_one(void) if (!mutex_trylock(&dqp->q_hash->qh_lock)) goto mplistunlock; + trace_xfs_dqreclaim_unlink(dqp); + ASSERT(dqp->q_nrefs == 0); - xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING"); XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); XQM_FREELIST_REMOVE(dqp); @@ -2430,7 +2454,7 @@ xfs_qm_vop_dqalloc( } } if (uq) - xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); + trace_xfs_dquot_dqalloc(ip); xfs_iunlock(ip, lockflags); if (O_udqpp) diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index a5346630dfa..97b410c1279 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -59,7 +59,7 @@ xfs_fill_statvfs_from_dquot( be64_to_cpu(dp->d_blk_hardlimit); if (limit && statp->f_blocks > limit) { statp->f_blocks = limit; - statp->f_bfree = + statp->f_bfree = statp->f_bavail = (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 5d1a3b98a6e..5d0ee8d492d 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -49,6 +49,7 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_qm.h" +#include "xfs_trace.h" #ifdef DEBUG # define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) @@ -496,7 +497,6 @@ xfs_qm_scall_setqlim( ASSERT(error != ENOENT); return (error); } - xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET"); xfs_trans_dqjoin(tp, dqp); ddq = &dqp->q_core; @@ -602,7 +602,6 @@ xfs_qm_scall_setqlim( dqp->dq_flags |= XFS_DQ_DIRTY; xfs_trans_log_dquot(tp, dqp); - xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); error = xfs_trans_commit(tp, 0); xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); @@ -630,7 +629,6 @@ xfs_qm_scall_getquota( return (error); } - xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS"); /* * If everything's NULL, this dquot doesn't quite exist as far as * our utility programs are concerned. @@ -893,7 +891,7 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); } /*------------------------------------------------------------------------*/ @@ -1194,9 +1192,9 @@ xfs_qm_internalqcheck( if (! XFS_IS_QUOTA_ON(mp)) return XFS_ERROR(ESRCH); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); XFS_bflush(mp->m_ddev_targp); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); XFS_bflush(mp->m_ddev_targp); mutex_lock(&qcheck_lock); diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 97ac9640be9..c3ab75cb1d9 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -589,12 +589,18 @@ xfs_trans_unreserve_and_mod_dquots( } } -STATIC int -xfs_quota_error(uint flags) +STATIC void +xfs_quota_warn( + struct xfs_mount *mp, + struct xfs_dquot *dqp, + int type) { - if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; + /* no warnings for project quotas - we just return ENOSPC later */ + if (dqp->dq_flags & XFS_DQ_PROJ) + return; + quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, + be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, + type); } /* @@ -612,7 +618,6 @@ xfs_trans_dqresv( long ninos, uint flags) { - int error; xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; time_t timer; @@ -649,7 +654,6 @@ xfs_trans_dqresv( warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_rtbcount; } - error = 0; if ((flags & XFS_QMOPT_FORCE_RES) == 0 && dqp->q_core.d_id && @@ -667,18 +671,20 @@ xfs_trans_dqresv( * nblks. */ if (hardlimit > 0ULL && - (hardlimit <= nblks + *resbcountp)) { - error = xfs_quota_error(flags); + hardlimit <= nblks + *resbcountp) { + xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); goto error_return; } - if (softlimit > 0ULL && - (softlimit <= nblks + *resbcountp)) { + softlimit <= nblks + *resbcountp) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { - error = xfs_quota_error(flags); + xfs_quota_warn(mp, dqp, + QUOTA_NL_BSOFTLONGWARN); goto error_return; } + + xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); } } if (ninos > 0) { @@ -692,15 +698,19 @@ xfs_trans_dqresv( softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); if (!softlimit) softlimit = q->qi_isoftlimit; + if (hardlimit > 0ULL && count >= hardlimit) { - error = xfs_quota_error(flags); + xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); goto error_return; - } else if (softlimit > 0ULL && count >= softlimit) { - if ((timer != 0 && get_seconds() > timer) || + } + if (softlimit > 0ULL && count >= softlimit) { + if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { - error = xfs_quota_error(flags); + xfs_quota_warn(mp, dqp, + QUOTA_NL_ISOFTLONGWARN); goto error_return; } + xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); } } } @@ -736,9 +746,14 @@ xfs_trans_dqresv( ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); + xfs_dqunlock(dqp); + return 0; + error_return: xfs_dqunlock(dqp); - return error; + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; } diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 6f4fd37c67a..d2d20462fd4 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -41,10 +41,6 @@ extern void assfail(char *expr, char *f, int l); # define STATIC static noinline #endif -#ifndef STATIC_INLINE -# define STATIC_INLINE static inline -#endif - #else /* DEBUG */ #define ASSERT(expr) \ @@ -54,19 +50,5 @@ extern void assfail(char *expr, char *f, int l); # define STATIC noinline #endif -/* - * We stop inlining of inline functions in debug mode. - * Unfortunately, this means static inline in header files - * get multiple definitions, so they need to remain static. - * This then gives tonnes of warnings about unused but defined - * functions, so we need to add the unused attribute to prevent - * these spurious warnings. - */ -#ifndef STATIC_INLINE -# define STATIC_INLINE static __attribute__ ((unused)) noinline -#endif - #endif /* DEBUG */ - - #endif /* __XFS_SUPPORT_DEBUG_H__ */ diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c deleted file mode 100644 index 2d494c26717..00000000000 --- a/fs/xfs/support/ktrace.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include <xfs.h> - -static kmem_zone_t *ktrace_hdr_zone; -static kmem_zone_t *ktrace_ent_zone; -static int ktrace_zentries; - -void __init -ktrace_init(int zentries) -{ - ktrace_zentries = roundup_pow_of_two(zentries); - - ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), - "ktrace_hdr"); - ASSERT(ktrace_hdr_zone); - - ktrace_ent_zone = kmem_zone_init(ktrace_zentries - * sizeof(ktrace_entry_t), - "ktrace_ent"); - ASSERT(ktrace_ent_zone); -} - -void __exit -ktrace_uninit(void) -{ - kmem_zone_destroy(ktrace_hdr_zone); - kmem_zone_destroy(ktrace_ent_zone); -} - -/* - * ktrace_alloc() - * - * Allocate a ktrace header and enough buffering for the given - * number of entries. Round the number of entries up to a - * power of 2 so we can do fast masking to get the index from - * the atomic index counter. - */ -ktrace_t * -ktrace_alloc(int nentries, unsigned int __nocast sleep) -{ - ktrace_t *ktp; - ktrace_entry_t *ktep; - int entries; - - ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); - - if (ktp == (ktrace_t*)NULL) { - /* - * KM_SLEEP callers don't expect failure. - */ - if (sleep & KM_SLEEP) - panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); - - return NULL; - } - - /* - * Special treatment for buffers with the ktrace_zentries entries - */ - entries = roundup_pow_of_two(nentries); - if (entries == ktrace_zentries) { - ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, - sleep); - } else { - ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)), - sleep | KM_LARGE); - } - - if (ktep == NULL) { - /* - * KM_SLEEP callers don't expect failure. - */ - if (sleep & KM_SLEEP) - panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); - - kmem_free(ktp); - - return NULL; - } - - ktp->kt_entries = ktep; - ktp->kt_nentries = entries; - ASSERT(is_power_of_2(entries)); - ktp->kt_index_mask = entries - 1; - atomic_set(&ktp->kt_index, 0); - ktp->kt_rollover = 0; - return ktp; -} - - -/* - * ktrace_free() - * - * Free up the ktrace header and buffer. It is up to the caller - * to ensure that no-one is referencing it. - */ -void -ktrace_free(ktrace_t *ktp) -{ - if (ktp == (ktrace_t *)NULL) - return; - - /* - * Special treatment for the Vnode trace buffer. - */ - if (ktp->kt_nentries == ktrace_zentries) - kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); - else - kmem_free(ktp->kt_entries); - - kmem_zone_free(ktrace_hdr_zone, ktp); -} - - -/* - * Enter the given values into the "next" entry in the trace buffer. - * kt_index is always the index of the next entry to be filled. - */ -void -ktrace_enter( - ktrace_t *ktp, - void *val0, - void *val1, - void *val2, - void *val3, - void *val4, - void *val5, - void *val6, - void *val7, - void *val8, - void *val9, - void *val10, - void *val11, - void *val12, - void *val13, - void *val14, - void *val15) -{ - int index; - ktrace_entry_t *ktep; - - ASSERT(ktp != NULL); - - /* - * Grab an entry by pushing the index up to the next one. - */ - index = atomic_add_return(1, &ktp->kt_index); - index = (index - 1) & ktp->kt_index_mask; - if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) - ktp->kt_rollover = 1; - - ASSERT((index >= 0) && (index < ktp->kt_nentries)); - - ktep = &(ktp->kt_entries[index]); - - ktep->val[0] = val0; - ktep->val[1] = val1; - ktep->val[2] = val2; - ktep->val[3] = val3; - ktep->val[4] = val4; - ktep->val[5] = val5; - ktep->val[6] = val6; - ktep->val[7] = val7; - ktep->val[8] = val8; - ktep->val[9] = val9; - ktep->val[10] = val10; - ktep->val[11] = val11; - ktep->val[12] = val12; - ktep->val[13] = val13; - ktep->val[14] = val14; - ktep->val[15] = val15; -} - -/* - * Return the number of entries in the trace buffer. - */ -int -ktrace_nentries( - ktrace_t *ktp) -{ - int index; - if (ktp == NULL) - return 0; - - index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; - return (ktp->kt_rollover ? ktp->kt_nentries : index); -} - -/* - * ktrace_first() - * - * This is used to find the start of the trace buffer. - * In conjunction with ktrace_next() it can be used to - * iterate through the entire trace buffer. This code does - * not do any locking because it is assumed that it is called - * from the debugger. - * - * The caller must pass in a pointer to a ktrace_snap - * structure in which we will keep some state used to - * iterate through the buffer. This state must not touched - * by any code outside of this module. - */ -ktrace_entry_t * -ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp) -{ - ktrace_entry_t *ktep; - int index; - int nentries; - - if (ktp->kt_rollover) - index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; - else - index = 0; - - ktsp->ks_start = index; - ktep = &(ktp->kt_entries[index]); - - nentries = ktrace_nentries(ktp); - index++; - if (index < nentries) { - ktsp->ks_index = index; - } else { - ktsp->ks_index = 0; - if (index > nentries) - ktep = NULL; - } - return ktep; -} - -/* - * ktrace_next() - * - * This is used to iterate through the entries of the given - * trace buffer. The caller must pass in the ktrace_snap_t - * structure initialized by ktrace_first(). The return value - * will be either a pointer to the next ktrace_entry or NULL - * if all of the entries have been traversed. - */ -ktrace_entry_t * -ktrace_next( - ktrace_t *ktp, - ktrace_snap_t *ktsp) -{ - int index; - ktrace_entry_t *ktep; - - index = ktsp->ks_index; - if (index == ktsp->ks_start) { - ktep = NULL; - } else { - ktep = &ktp->kt_entries[index]; - } - - index++; - if (index == ktrace_nentries(ktp)) { - ktsp->ks_index = 0; - } else { - ktsp->ks_index = index; - } - - return ktep; -} - -/* - * ktrace_skip() - * - * Skip the next "count" entries and return the entry after that. - * Return NULL if this causes us to iterate past the beginning again. - */ -ktrace_entry_t * -ktrace_skip( - ktrace_t *ktp, - int count, - ktrace_snap_t *ktsp) -{ - int index; - int new_index; - ktrace_entry_t *ktep; - int nentries = ktrace_nentries(ktp); - - index = ktsp->ks_index; - new_index = index + count; - while (new_index >= nentries) { - new_index -= nentries; - } - if (index == ktsp->ks_start) { - /* - * We've iterated around to the start, so we're done. - */ - ktep = NULL; - } else if ((new_index < index) && (index < ktsp->ks_index)) { - /* - * We've skipped past the start again, so we're done. - */ - ktep = NULL; - ktsp->ks_index = ktsp->ks_start; - } else { - ktep = &(ktp->kt_entries[new_index]); - new_index++; - if (new_index == nentries) { - ktsp->ks_index = 0; - } else { - ktsp->ks_index = new_index; - } - } - return ktep; -} diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h deleted file mode 100644 index 741d6947ca6..00000000000 --- a/fs/xfs/support/ktrace.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_KTRACE_H__ -#define __XFS_SUPPORT_KTRACE_H__ - -/* - * Trace buffer entry structure. - */ -typedef struct ktrace_entry { - void *val[16]; -} ktrace_entry_t; - -/* - * Trace buffer header structure. - */ -typedef struct ktrace { - int kt_nentries; /* number of entries in trace buf */ - atomic_t kt_index; /* current index in entries */ - unsigned int kt_index_mask; - int kt_rollover; - ktrace_entry_t *kt_entries; /* buffer of entries */ -} ktrace_t; - -/* - * Trace buffer snapshot structure. - */ -typedef struct ktrace_snap { - int ks_start; /* kt_index at time of snap */ - int ks_index; /* current index */ -} ktrace_snap_t; - - -#ifdef CONFIG_XFS_TRACE - -extern void ktrace_init(int zentries); -extern void ktrace_uninit(void); - -extern ktrace_t *ktrace_alloc(int, unsigned int __nocast); -extern void ktrace_free(ktrace_t *); - -extern void ktrace_enter( - ktrace_t *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *, - void *); - -extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *); -extern int ktrace_nentries(ktrace_t *); -extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *); -extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *); - -#else -#define ktrace_init(x) do { } while (0) -#define ktrace_uninit() do { } while (0) -#endif /* CONFIG_XFS_TRACE */ - -#endif /* __XFS_SUPPORT_KTRACE_H__ */ diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 17254b529c5..5ad8ad3a1dc 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -25,21 +25,5 @@ /* #define QUOTADEBUG 1 */ #endif -#ifdef CONFIG_XFS_TRACE -#define XFS_ALLOC_TRACE 1 -#define XFS_ATTR_TRACE 1 -#define XFS_BLI_TRACE 1 -#define XFS_BMAP_TRACE 1 -#define XFS_BTREE_TRACE 1 -#define XFS_DIR2_TRACE 1 -#define XFS_DQUOT_TRACE 1 -#define XFS_ILOCK_TRACE 1 -#define XFS_LOG_TRACE 1 -#define XFS_RW_TRACE 1 -#define XFS_BUF_TRACE 1 -#define XFS_INODE_TRACE 1 -#define XFS_FILESTREAMS_TRACE 1 -#endif - #include <linux-2.6/xfs_linux.h> #endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 947b150df8e..d13eeba2c8f 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -36,8 +36,8 @@ struct xfs_acl { }; /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE "SGI_ACL_FILE" -#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" +#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) @@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode); extern int posix_acl_access_exists(struct inode *inode); extern int posix_acl_default_exists(struct inode *inode); -extern struct xattr_handler xfs_xattr_system_handler; +extern struct xattr_handler xfs_xattr_acl_access_handler; +extern struct xattr_handler xfs_xattr_acl_default_handler; #else # define xfs_check_acl NULL # define xfs_get_acl(inode, type) NULL diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index a5d54bf4931..b1a5a1ff88e 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -86,6 +86,20 @@ typedef struct xfs_agf { #define XFS_AGF_NUM_BITS 12 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) +#define XFS_AGF_FLAGS \ + { XFS_AGF_MAGICNUM, "MAGICNUM" }, \ + { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \ + { XFS_AGF_SEQNO, "SEQNO" }, \ + { XFS_AGF_LENGTH, "LENGTH" }, \ + { XFS_AGF_ROOTS, "ROOTS" }, \ + { XFS_AGF_LEVELS, "LEVELS" }, \ + { XFS_AGF_FLFIRST, "FLFIRST" }, \ + { XFS_AGF_FLLAST, "FLLAST" }, \ + { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ + { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ + { XFS_AGF_LONGEST, "LONGEST" }, \ + { XFS_AGF_BTREEBLKS, "BTREEBLKS" } + /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) @@ -173,17 +187,13 @@ typedef struct xfs_perag_busy { /* * Per-ag incore structure, copies of information in agf and agi, * to improve the performance of allocation group selection. - * - * pick sizes which fit in allocation buckets well */ -#if (BITS_PER_LONG == 32) -#define XFS_PAGB_NUM_SLOTS 84 -#elif (BITS_PER_LONG == 64) #define XFS_PAGB_NUM_SLOTS 128 -#endif -typedef struct xfs_perag -{ +typedef struct xfs_perag { + struct xfs_mount *pag_mount; /* owner filesystem */ + xfs_agnumber_t pag_agno; /* AG this structure belongs to */ + atomic_t pag_ref; /* perag reference count */ char pagf_init; /* this agf's entry is initialized */ char pagi_init; /* this agi's entry is initialized */ char pagf_metadata; /* the agf is preferred to be metadata */ @@ -196,8 +206,6 @@ typedef struct xfs_perag __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ - int pagb_count; /* pagb slots in use */ - xfs_perag_busy_t *pagb_list; /* unstable blocks */ /* * Inode allocation search lookup optimisation. @@ -216,6 +224,8 @@ typedef struct xfs_perag rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ #endif + int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ } xfs_perag_t; /* diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 2cf944eb796..94cddbfb256 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -38,6 +38,7 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" +#include "xfs_trace.h" #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) @@ -51,30 +52,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len); -#if defined(XFS_ALLOC_TRACE) -ktrace_t *xfs_alloc_trace_buf; - -#define TRACE_ALLOC(s,a) \ - xfs_alloc_trace_alloc(__func__, s, a, __LINE__) -#define TRACE_FREE(s,a,b,x,f) \ - xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__) -#define TRACE_MODAGF(s,a,f) \ - xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__) -#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \ - xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) -#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \ - xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) -#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \ - xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) -#else -#define TRACE_ALLOC(s,a) -#define TRACE_FREE(s,a,b,x,f) -#define TRACE_MODAGF(s,a,f) -#define TRACE_BUSY(s,a,ag,agb,l,sl,tp) -#define TRACE_UNBUSY(fname,s,ag,sl,tp) -#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp) -#endif /* XFS_ALLOC_TRACE */ - /* * Prototypes for per-ag allocation routines */ @@ -498,124 +475,6 @@ xfs_alloc_read_agfl( return 0; } -#if defined(XFS_ALLOC_TRACE) -/* - * Add an allocation trace entry for an alloc call. - */ -STATIC void -xfs_alloc_trace_alloc( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_alloc_arg_t *args, /* allocation argument structure */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)), - (void *)name, - (void *)str, - (void *)args->mp, - (void *)(__psunsigned_t)args->agno, - (void *)(__psunsigned_t)args->agbno, - (void *)(__psunsigned_t)args->minlen, - (void *)(__psunsigned_t)args->maxlen, - (void *)(__psunsigned_t)args->mod, - (void *)(__psunsigned_t)args->prod, - (void *)(__psunsigned_t)args->minleft, - (void *)(__psunsigned_t)args->total, - (void *)(__psunsigned_t)args->alignment, - (void *)(__psunsigned_t)args->len, - (void *)((((__psint_t)args->type) << 16) | - (__psint_t)args->otype), - (void *)(__psint_t)((args->wasdel << 3) | - (args->wasfromfl << 2) | - (args->isfl << 1) | - (args->userdata << 0))); -} - -/* - * Add an allocation trace entry for a free call. - */ -STATIC void -xfs_alloc_trace_free( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* a.g. relative block number */ - xfs_extlen_t len, /* length of extent */ - int isfl, /* set if is freelist allocation/free */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psunsigned_t)agno, - (void *)(__psunsigned_t)agbno, - (void *)(__psunsigned_t)len, - (void *)(__psint_t)isfl, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); -} - -/* - * Add an allocation trace entry for modifying an agf. - */ -STATIC void -xfs_alloc_trace_modagf( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agf_t *agf, /* new agf value */ - int flags, /* logging flags for agf */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psint_t)flags, - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest)); -} - -STATIC void -xfs_alloc_trace_busy( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* a.g. relative block number */ - xfs_extlen_t len, /* length of extent */ - int slot, /* perag Busy slot */ - xfs_trans_t *tp, - int trtype, /* type: add, delete, search */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(trtype | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psunsigned_t)agno, - (void *)(__psunsigned_t)agbno, - (void *)(__psunsigned_t)len, - (void *)(__psint_t)slot, - (void *)tp, - NULL, NULL, NULL, NULL, NULL, NULL, NULL); -} -#endif /* XFS_ALLOC_TRACE */ - /* * Allocation group level functions. */ @@ -665,9 +524,6 @@ xfs_alloc_ag_vextent( */ if (args->agbno != NULLAGBLOCK) { xfs_agf_t *agf; /* allocation group freelist header */ -#ifdef XFS_ALLOC_TRACE - xfs_mount_t *mp = args->mp; -#endif long slen = (long)args->len; ASSERT(args->len >= args->minlen && args->len <= args->maxlen); @@ -682,7 +538,6 @@ xfs_alloc_ag_vextent( args->pag->pagf_freeblks -= args->len; ASSERT(be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length)); - TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); xfs_alloc_log_agf(args->tp, args->agbp, XFS_AGF_FREEBLKS); /* search the busylist for these blocks */ @@ -792,13 +647,14 @@ xfs_alloc_ag_vextent_exact( } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("normal", args); + + trace_xfs_alloc_exact_done(args); args->wasfromfl = 0; return 0; error0: xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); - TRACE_ALLOC("error", args); + trace_xfs_alloc_exact_error(args); return error; } @@ -958,7 +814,7 @@ xfs_alloc_ag_vextent_near( args->len = blen; if (!xfs_alloc_fix_minleft(args)) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("nominleft", args); + trace_xfs_alloc_near_nominleft(args); return 0; } blen = args->len; @@ -981,7 +837,8 @@ xfs_alloc_ag_vextent_near( goto error0; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - TRACE_ALLOC("first", args); + + trace_xfs_alloc_near_first(args); return 0; } /* @@ -1272,7 +1129,7 @@ xfs_alloc_ag_vextent_near( * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { - TRACE_ALLOC("neither", args); + trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; } @@ -1299,7 +1156,7 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) { - TRACE_ALLOC("nominleft", args); + trace_xfs_alloc_near_nominleft(args); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); return 0; @@ -1314,13 +1171,18 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, ltnew, rlen, XFSA_FIXUP_BNO_OK))) goto error0; - TRACE_ALLOC(j ? "gt" : "lt", args); + + if (j) + trace_xfs_alloc_near_greater(args); + else + trace_xfs_alloc_near_lesser(args); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_near_error(args); if (cnt_cur != NULL) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); if (bno_cur_lt != NULL) @@ -1371,7 +1233,7 @@ xfs_alloc_ag_vextent_size( goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("noentry", args); + trace_xfs_alloc_size_noentry(args); return 0; } ASSERT(i == 1); @@ -1448,7 +1310,7 @@ xfs_alloc_ag_vextent_size( xfs_alloc_fix_len(args); if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("nominleft", args); + trace_xfs_alloc_size_nominleft(args); args->agbno = NULLAGBLOCK; return 0; } @@ -1471,11 +1333,11 @@ xfs_alloc_ag_vextent_size( args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); - TRACE_ALLOC("normal", args); + trace_xfs_alloc_size_done(args); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_size_error(args); if (cnt_cur) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); if (bno_cur) @@ -1534,7 +1396,7 @@ xfs_alloc_ag_vextent_small( be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); args->wasfromfl = 1; - TRACE_ALLOC("freelist", args); + trace_xfs_alloc_small_freelist(args); *stat = 0; return 0; } @@ -1556,17 +1418,17 @@ xfs_alloc_ag_vextent_small( */ if (flen < args->minlen) { args->agbno = NULLAGBLOCK; - TRACE_ALLOC("notenough", args); + trace_xfs_alloc_small_notenough(args); flen = 0; } *fbnop = fbno; *flenp = flen; *stat = 1; - TRACE_ALLOC("normal", args); + trace_xfs_alloc_small_done(args); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_small_error(args); return error; } @@ -1800,26 +1662,25 @@ xfs_free_ag_extent( xfs_agf_t *agf; xfs_perag_t *pag; /* per allocation group data */ + pag = xfs_perag_get(mp, agno); + pag->pagf_freeblks += len; + xfs_perag_put(pag); + agf = XFS_BUF_TO_AGF(agbp); - pag = &mp->m_perag[agno]; be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); - pag->pagf_freeblks += len; XFS_WANT_CORRUPTED_GOTO( be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length), error0); - TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); if (!isfl) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); XFS_STATS_INC(xs_freex); XFS_STATS_ADD(xs_freeb, len); } - TRACE_FREE(haveleft ? - (haveright ? "both" : "left") : - (haveright ? "right" : "none"), - agno, bno, len, isfl); + + trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); /* * Since blocks move to the free list without the coordination @@ -1836,7 +1697,7 @@ xfs_free_ag_extent( return 0; error0: - TRACE_FREE("error", agno, bno, len, isfl); + trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) @@ -2110,10 +1971,12 @@ xfs_alloc_get_freelist( xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; + xfs_perag_put(pag); logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { @@ -2122,7 +1985,6 @@ xfs_alloc_get_freelist( logflags |= XFS_AGF_BTREEBLKS; } - TRACE_MODAGF(NULL, agf, logflags); xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; @@ -2165,6 +2027,8 @@ xfs_alloc_log_agf( sizeof(xfs_agf_t) }; + trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); + xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); } @@ -2218,7 +2082,8 @@ xfs_alloc_put_freelist( be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2229,14 +2094,13 @@ xfs_alloc_put_freelist( pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } + xfs_perag_put(pag); - TRACE_MODAGF(NULL, agf, logflags); xfs_alloc_log_agf(tp, agbp, logflags); ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); - TRACE_MODAGF(NULL, agf, logflags); xfs_alloc_log_agf(tp, agbp, logflags); xfs_trans_log_buf(tp, agflbp, (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), @@ -2294,7 +2158,6 @@ xfs_read_agf( xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); return 0; } @@ -2317,7 +2180,7 @@ xfs_alloc_read_agf( ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); if (error) return error; @@ -2326,7 +2189,7 @@ xfs_alloc_read_agf( ASSERT(!XFS_BUF_GETERROR(*bpp)); agf = XFS_BUF_TO_AGF(*bpp); - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -2337,8 +2200,8 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); spin_lock_init(&pag->pagb_lock); - pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * - sizeof(xfs_perag_busy_t), KM_SLEEP); + pag->pagb_count = 0; + memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); pag->pagf_init = 1; } #ifdef DEBUG @@ -2353,6 +2216,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif + xfs_perag_put(pag); return 0; } @@ -2399,7 +2263,7 @@ xfs_alloc_vextent( args->minlen > args->maxlen || args->minlen > agsize || args->mod >= args->prod) { args->fsbno = NULLFSBLOCK; - TRACE_ALLOC("badargs", args); + trace_xfs_alloc_vextent_badargs(args); return 0; } minleft = args->minleft; @@ -2412,24 +2276,21 @@ xfs_alloc_vextent( * These three force us into a single a.g. */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - down_read(&mp->m_peraglock); - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); args->minleft = minleft; if (error) { - TRACE_ALLOC("nofix", args); + trace_xfs_alloc_vextent_nofix(args); goto error0; } if (!args->agbp) { - up_read(&mp->m_peraglock); - TRACE_ALLOC("noagbp", args); + trace_xfs_alloc_vextent_noagbp(args); break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if ((error = xfs_alloc_ag_vextent(args))) goto error0; - up_read(&mp->m_peraglock); break; case XFS_ALLOCTYPE_START_BNO: /* @@ -2481,14 +2342,13 @@ xfs_alloc_vextent( * Loop over allocation groups twice; first time with * trylock set, second time without. */ - down_read(&mp->m_peraglock); for (;;) { - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); args->minleft = minleft; if (error) { - TRACE_ALLOC("nofix", args); + trace_xfs_alloc_vextent_nofix(args); goto error0; } /* @@ -2499,7 +2359,9 @@ xfs_alloc_vextent( goto error0; break; } - TRACE_ALLOC("loopfailed", args); + + trace_xfs_alloc_vextent_loopfailed(args); + /* * Didn't work, figure out the next iteration. */ @@ -2526,7 +2388,7 @@ xfs_alloc_vextent( if (args->agno == sagno) { if (no_min == 1) { args->agbno = NULLAGBLOCK; - TRACE_ALLOC("allfailed", args); + trace_xfs_alloc_vextent_allfailed(args); break; } if (flags == 0) { @@ -2540,8 +2402,8 @@ xfs_alloc_vextent( } } } + xfs_perag_put(args->pag); } - up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % @@ -2567,9 +2429,10 @@ xfs_alloc_vextent( args->len); #endif } + xfs_perag_put(args->pag); return 0; error0: - up_read(&mp->m_peraglock); + xfs_perag_put(args->pag); return error; } @@ -2594,8 +2457,7 @@ xfs_free_extent( args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - down_read(&args.mp->m_peraglock); - args.pag = &args.mp->m_perag[args.agno]; + args.pag = xfs_perag_get(args.mp, args.agno); if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG @@ -2605,7 +2467,7 @@ xfs_free_extent( #endif error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: - up_read(&args.mp->m_peraglock); + xfs_perag_put(args.pag); return error; } @@ -2626,15 +2488,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; xfs_perag_busy_t *bsy; + struct xfs_perag *pag; int n; - mp = tp->t_mountp; - spin_lock(&mp->m_perag[agno].pagb_lock); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); /* search pagb_list for an open slot */ - for (bsy = mp->m_perag[agno].pagb_list, n = 0; + for (bsy = pag->pagb_list, n = 0; n < XFS_PAGB_NUM_SLOTS; bsy++, n++) { if (bsy->busy_tp == NULL) { @@ -2642,16 +2504,16 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, } } + trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); + if (n < XFS_PAGB_NUM_SLOTS) { - bsy = &mp->m_perag[agno].pagb_list[n]; - mp->m_perag[agno].pagb_count++; - TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp); + bsy = &pag->pagb_list[n]; + pag->pagb_count++; bsy->busy_start = bno; bsy->busy_length = len; bsy->busy_tp = tp; xfs_trans_add_busy(tp, agno, n); } else { - TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp); /* * The busy list is full! Since it is now not possible to * track the free block, make this a synchronous transaction @@ -2661,7 +2523,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_trans_set_sync(tp); } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } void @@ -2669,24 +2532,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, xfs_agnumber_t agno, int idx) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *list; - mp = tp->t_mountp; + ASSERT(idx < XFS_PAGB_NUM_SLOTS); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + list = pag->pagb_list; - spin_lock(&mp->m_perag[agno].pagb_lock); - list = mp->m_perag[agno].pagb_list; + trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); - ASSERT(idx < XFS_PAGB_NUM_SLOTS); if (list[idx].busy_tp == tp) { - TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp); list[idx].busy_tp = NULL; - mp->m_perag[agno].pagb_count--; - } else { - TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); + pag->pagb_count--; } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } @@ -2700,48 +2562,44 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *bsy; xfs_agblock_t uend, bend; - xfs_lsn_t lsn; + xfs_lsn_t lsn = 0; int cnt; - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - cnt = mp->m_perag[agno].pagb_count; + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + cnt = pag->pagb_count; + /* + * search pagb_list for this slot, skipping open slots. We have to + * search the entire array as there may be multiple overlaps and + * we have to get the most recent LSN for the log force to push out + * all the transactions that span the range. + */ uend = bno + len - 1; - - /* search pagb_list for this slot, skipping open slots */ - for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { - - /* - * (start1,length1) within (start2, length2) - */ - if (bsy->busy_tp != NULL) { - bend = bsy->busy_start + bsy->busy_length - 1; - if ((bno > bend) || (uend < bsy->busy_start)) { - cnt--; - } else { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", - "found1", agno, bno, len, tp); - break; - } - } + for (cnt = 0; cnt < pag->pagb_count; cnt++) { + bsy = &pag->pagb_list[cnt]; + if (!bsy->busy_tp) + continue; + + bend = bsy->busy_start + bsy->busy_length - 1; + if (bno > bend || uend < bsy->busy_start) + continue; + + /* (start1,length1) within (start2, length2) */ + if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) + lsn = bsy->busy_tp->t_commit_lsn; } + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); + trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); /* * If a block was found, force the log through the LSN of the * transaction that freed the block */ - if (cnt) { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp); - lsn = bsy->busy_tp->t_commit_lsn; - spin_unlock(&mp->m_perag[agno].pagb_lock); - xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); - } else { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp); - spin_unlock(&mp->m_perag[agno].pagb_lock); - } + if (lsn) + xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); } diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index e704caee10d..599bffa3978 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -37,6 +37,15 @@ typedef enum xfs_alloctype XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ } xfs_alloctype_t; +#define XFS_ALLOC_TYPES \ + { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ + { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ + { XFS_ALLOCTYPE_START_AG, "START_AG" }, \ + { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ + { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ + { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ + { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" } + /* * Flags for xfs_alloc_fix_freelist. */ @@ -109,24 +118,6 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, #ifdef __KERNEL__ -#if defined(XFS_ALLOC_TRACE) -/* - * Allocation tracing buffer size. - */ -#define XFS_ALLOC_TRACE_SIZE 4096 -extern ktrace_t *xfs_alloc_trace_buf; - -/* - * Types for alloc tracing. - */ -#define XFS_ALLOC_KTRACE_ALLOC 1 -#define XFS_ALLOC_KTRACE_FREE 2 -#define XFS_ALLOC_KTRACE_MODAGF 3 -#define XFS_ALLOC_KTRACE_BUSY 4 -#define XFS_ALLOC_KTRACE_UNBUSY 5 -#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 -#endif - void xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_agnumber_t agno, diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index c10c3a292d3..b726e10d2c1 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -39,6 +39,7 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" +#include "xfs_trace.h" STATIC struct xfs_btree_cur * @@ -60,12 +61,14 @@ xfs_allocbt_set_root( struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); int btnum = cur->bc_btnum; + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); - cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; + pag->pagf_levels[btnum] += inc; + xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -149,6 +152,7 @@ xfs_allocbt_update_lastrec( { struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_perag *pag; __be32 len; int numrecs; @@ -192,7 +196,9 @@ xfs_allocbt_update_lastrec( } agf->agf_longest = len; - cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); + pag = xfs_perag_get(cur->bc_mp, seqno); + pag->pagf_longest = be32_to_cpu(len); + xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); } diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 4ece1906bd4..b9c196a53c4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -47,6 +47,7 @@ #include "xfs_trans_space.h" #include "xfs_rw.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" /* * xfs_attr.c @@ -89,19 +90,15 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args); #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ -#if defined(XFS_ATTR_TRACE) -ktrace_t *xfs_attr_trace_buf; -#endif - STATIC int xfs_attr_name_to_xname( struct xfs_name *xname, - const char *aname) + const unsigned char *aname) { if (!aname) return EINVAL; xname->name = aname; - xname->len = strlen(aname); + xname->len = strlen((char *)aname); if (xname->len >= MAXNAMELEN) return EFAULT; /* match IRIX behaviour */ @@ -123,9 +120,13 @@ xfs_inode_hasattr( * Overall external interface routines. *========================================================================*/ -int -xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, - char *value, int *valuelenp, int flags) +STATIC int +xfs_attr_get_int( + struct xfs_inode *ip, + struct xfs_name *name, + unsigned char *value, + int *valuelenp, + int flags) { xfs_da_args_t args; int error; @@ -170,8 +171,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, int xfs_attr_get( xfs_inode_t *ip, - const char *name, - char *value, + const unsigned char *name, + unsigned char *value, int *valuelenp, int flags) { @@ -188,7 +189,7 @@ xfs_attr_get( return error; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags); + error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); xfs_iunlock(ip, XFS_ILOCK_SHARED); return(error); } @@ -196,7 +197,7 @@ xfs_attr_get( /* * Calculate how many blocks we need for the new attribute, */ -int +STATIC int xfs_attr_calc_size( struct xfs_inode *ip, int namelen, @@ -234,8 +235,12 @@ xfs_attr_calc_size( } STATIC int -xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, - char *value, int valuelen, int flags) +xfs_attr_set_int( + struct xfs_inode *dp, + struct xfs_name *name, + unsigned char *value, + int valuelen, + int flags) { xfs_da_args_t args; xfs_fsblock_t firstblock; @@ -451,8 +456,8 @@ out: int xfs_attr_set( xfs_inode_t *dp, - const char *name, - char *value, + const unsigned char *name, + unsigned char *value, int valuelen, int flags) { @@ -599,7 +604,7 @@ out: int xfs_attr_remove( xfs_inode_t *dp, - const char *name, + const unsigned char *name, int flags) { int error; @@ -636,7 +641,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) return EIO; xfs_ilock(dp, XFS_ILOCK_SHARED); - xfs_attr_trace_l_c("syscall start", context); /* * Decide on what work routines to call based on the inode size. @@ -652,7 +656,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) } xfs_iunlock(dp, XFS_ILOCK_SHARED); - xfs_attr_trace_l_c("syscall end", context); return error; } @@ -670,9 +673,13 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) */ /*ARGSUSED*/ STATIC int -xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, - char *name, int namelen, - int valuelen, char *value) +xfs_attr_put_listent( + xfs_attr_list_context_t *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { struct attrlist *alist = (struct attrlist *)context->alist; attrlist_ent_t *aep; @@ -698,7 +705,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, context->count * sizeof(alist->al_offset[0]); context->firstu -= ATTR_ENTSIZE(namelen); if (context->firstu < arraytop) { - xfs_attr_trace_l_c("buffer full", context); + trace_xfs_attr_list_full(context); alist->al_more = 1; context->seen_enough = 1; return 1; @@ -710,7 +717,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, aep->a_name[namelen] = 0; alist->al_offset[context->count++] = context->firstu; alist->al_count = context->count; - xfs_attr_trace_l_c("add", context); + trace_xfs_attr_list_add(context); return 0; } @@ -1849,7 +1856,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) node = bp->data; switch (be16_to_cpu(node->hdr.info.magic)) { case XFS_DA_NODE_MAGIC: - xfs_attr_trace_l_cn("wrong blk", context, node); + trace_xfs_attr_list_wrong_blk(context); xfs_da_brelse(NULL, bp); bp = NULL; break; @@ -1857,20 +1864,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) leaf = bp->data; if (cursor->hashval > be32_to_cpu(leaf->entries[ be16_to_cpu(leaf->hdr.count)-1].hashval)) { - xfs_attr_trace_l_cl("wrong blk", - context, leaf); + trace_xfs_attr_list_wrong_blk(context); xfs_da_brelse(NULL, bp); bp = NULL; } else if (cursor->hashval <= be32_to_cpu(leaf->entries[0].hashval)) { - xfs_attr_trace_l_cl("maybe wrong blk", - context, leaf); + trace_xfs_attr_list_wrong_blk(context); xfs_da_brelse(NULL, bp); bp = NULL; } break; default: - xfs_attr_trace_l_c("wrong blk - ??", context); + trace_xfs_attr_list_wrong_blk(context); xfs_da_brelse(NULL, bp); bp = NULL; } @@ -1915,8 +1920,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (cursor->hashval <= be32_to_cpu(btree->hashval)) { cursor->blkno = be32_to_cpu(btree->before); - xfs_attr_trace_l_cb("descending", - context, btree); + trace_xfs_attr_list_node_descend(context, + btree); break; } } @@ -1983,7 +1988,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; xfs_mount_t *mp; xfs_daddr_t dblkno; - xfs_caddr_t dst; + void *dst; xfs_buf_t *bp; int nmap, error, tmp, valuelen, blkcnt, i; xfs_dablk_t lblkno; @@ -2010,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK, + blkcnt, XBF_LOCK | XBF_DONT_BLOCK, &bp); if (error) return(error); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); + xfs_biomove(bp, 0, tmp, dst, XBF_READ); xfs_buf_relse(bp); dst += tmp; valuelen -= tmp; @@ -2042,7 +2046,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) xfs_inode_t *dp; xfs_bmbt_irec_t map; xfs_daddr_t dblkno; - xfs_caddr_t src; + void *src; xfs_buf_t *bp; xfs_dablk_t lblkno; int blkcnt, valuelen, nmap, error, tmp, committed; @@ -2143,14 +2147,14 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK); + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, + XBF_LOCK | XBF_DONT_BLOCK); ASSERT(bp); ASSERT(!XFS_BUF_GETERROR(bp)); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); + xfs_biomove(bp, 0, tmp, src, XBF_WRITE); if (tmp < XFS_BUF_SIZE(bp)) xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ @@ -2211,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * If the "remote" value is in the cache, remove it. */ - bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, - XFS_INCORE_TRYLOCK); + bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); if (bp) { XFS_BUF_STALE(bp); XFS_BUF_UNDELAYWRITE(bp); @@ -2266,85 +2269,3 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) } return(0); } - -#if defined(XFS_ATTR_TRACE) -/* - * Add a trace buffer entry for an attr_list context structure. - */ -void -xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context) -{ - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context, - (__psunsigned_t)NULL, - (__psunsigned_t)NULL, - (__psunsigned_t)NULL); -} - -/* - * Add a trace buffer entry for a context structure and a Btree node. - */ -void -xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, - struct xfs_da_intnode *node) -{ - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context, - (__psunsigned_t)be16_to_cpu(node->hdr.count), - (__psunsigned_t)be32_to_cpu(node->btree[0].hashval), - (__psunsigned_t)be32_to_cpu(node->btree[ - be16_to_cpu(node->hdr.count)-1].hashval)); -} - -/* - * Add a trace buffer entry for a context structure and a Btree element. - */ -void -xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, - struct xfs_da_node_entry *btree) -{ - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context, - (__psunsigned_t)be32_to_cpu(btree->hashval), - (__psunsigned_t)be32_to_cpu(btree->before), - (__psunsigned_t)NULL); -} - -/* - * Add a trace buffer entry for a context structure and a leaf block. - */ -void -xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, - struct xfs_attr_leafblock *leaf) -{ - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context, - (__psunsigned_t)be16_to_cpu(leaf->hdr.count), - (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval), - (__psunsigned_t)be32_to_cpu(leaf->entries[ - be16_to_cpu(leaf->hdr.count)-1].hashval)); -} - -/* - * Add a trace buffer entry for the arguments given to the routine, - * generic form. - */ -void -xfs_attr_trace_enter(int type, char *where, - struct xfs_attr_list_context *context, - __psunsigned_t a13, __psunsigned_t a14, - __psunsigned_t a15) -{ - ASSERT(xfs_attr_trace_buf); - ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type), - (void *)((__psunsigned_t)where), - (void *)((__psunsigned_t)context->dp), - (void *)((__psunsigned_t)context->cursor->hashval), - (void *)((__psunsigned_t)context->cursor->blkno), - (void *)((__psunsigned_t)context->cursor->offset), - (void *)((__psunsigned_t)context->alist), - (void *)((__psunsigned_t)context->bufsize), - (void *)((__psunsigned_t)context->count), - (void *)((__psunsigned_t)context->firstu), - NULL, - (void *)((__psunsigned_t)context->dupcnt), - (void *)((__psunsigned_t)context->flags), - (void *)a13, (void *)a14, (void *)a15); -} -#endif /* XFS_ATTR_TRACE */ diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index fb3b2a68b9b..e920d68ef50 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -48,6 +48,16 @@ struct xfs_attr_list_context; #define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ +#define XFS_ATTR_FLAGS \ + { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ + { ATTR_ROOT, "ROOT" }, \ + { ATTR_TRUST, "TRUST" }, \ + { ATTR_SECURE, "SECURE" }, \ + { ATTR_CREATE, "CREATE" }, \ + { ATTR_REPLACE, "REPLACE" }, \ + { ATTR_KERNOTIME, "KERNOTIME" }, \ + { ATTR_KERNOVAL, "KERNOVAL" } + /* * The maximum size (into the kernel or returned from the kernel) of an * attribute value or the buffer used for an attr_list() call. Larger @@ -103,7 +113,7 @@ typedef struct attrlist_cursor_kern { typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, - char *, int, int, char *); + unsigned char *, int, int, unsigned char *); typedef struct xfs_attr_list_context { struct xfs_inode *dp; /* inode */ @@ -129,9 +139,7 @@ typedef struct xfs_attr_list_context { /* * Overall external interface routines. */ -int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); int xfs_attr_inactive(struct xfs_inode *dp); -int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_list_int(struct xfs_attr_list_context *); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index afdc8911637..a90ce74fc25 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -42,6 +42,7 @@ #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * xfs_attr_leaf.c @@ -98,7 +99,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); * If namespace bits don't match return 0. * If all match then return 1. */ -STATIC_INLINE int +STATIC int xfs_attr_namesp_match(int arg_flags, int ondisk_flags) { return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); @@ -520,11 +521,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; i++) { - nargs.name = (char *)sfe->nameval; + nargs.name = sfe->nameval; nargs.namelen = sfe->namelen; - nargs.value = (char *)&sfe->nameval[nargs.namelen]; + nargs.value = &sfe->nameval[nargs.namelen]; nargs.valuelen = sfe->valuelen; - nargs.hashval = xfs_da_hashname((char *)sfe->nameval, + nargs.hashval = xfs_da_hashname(sfe->nameval, sfe->namelen); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ @@ -594,7 +595,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) cursor = context->cursor; ASSERT(cursor != NULL); - xfs_attr_trace_l_c("sf start", context); + trace_xfs_attr_list_sf(context); /* * If the buffer is large enough and the cursor is at the start, @@ -611,10 +612,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { error = context->put_listent(context, sfe->flags, - (char *)sfe->nameval, + sfe->nameval, (int)sfe->namelen, (int)sfe->valuelen, - (char*)&sfe->nameval[sfe->namelen]); + &sfe->nameval[sfe->namelen]); /* * Either search callback finished early or @@ -627,7 +628,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) return error; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } - xfs_attr_trace_l_c("sf big-gulp", context); + trace_xfs_attr_list_sf_all(context); return(0); } @@ -653,14 +654,13 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); - xfs_attr_trace_l_c("sf corrupted", context); kmem_free(sbuf); return XFS_ERROR(EFSCORRUPTED); } sbp->entno = i; - sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); - sbp->name = (char *)sfe->nameval; + sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen); + sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; @@ -693,7 +693,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } if (i == nsbuf) { kmem_free(sbuf); - xfs_attr_trace_l_c("blk end", context); return(0); } @@ -719,7 +718,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } kmem_free(sbuf); - xfs_attr_trace_l_c("sf E-O-F", context); return(0); } @@ -820,9 +818,9 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) continue; ASSERT(entry->flags & XFS_ATTR_LOCAL); name_loc = xfs_attr_leaf_name_local(leaf, i); - nargs.name = (char *)name_loc->nameval; + nargs.name = name_loc->nameval; nargs.namelen = name_loc->namelen; - nargs.value = (char *)&name_loc->nameval[nargs.namelen]; + nargs.value = &name_loc->nameval[nargs.namelen]; nargs.valuelen = be16_to_cpu(name_loc->valuelen); nargs.hashval = be32_to_cpu(entry->hashval); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); @@ -2323,7 +2321,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) cursor = context->cursor; cursor->initted = 1; - xfs_attr_trace_l_cl("blk start", context, leaf); + trace_xfs_attr_list_leaf(context); /* * Re-find our place in the leaf block if this is a new syscall. @@ -2344,7 +2342,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) } } if (i == be16_to_cpu(leaf->hdr.count)) { - xfs_attr_trace_l_c("not found", context); + trace_xfs_attr_list_notfound(context); return(0); } } else { @@ -2372,10 +2370,10 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) retval = context->put_listent(context, entry->flags, - (char *)name_loc->nameval, + name_loc->nameval, (int)name_loc->namelen, be16_to_cpu(name_loc->valuelen), - (char *)&name_loc->nameval[name_loc->namelen]); + &name_loc->nameval[name_loc->namelen]); if (retval) return retval; } else { @@ -2399,15 +2397,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) return retval; retval = context->put_listent(context, entry->flags, - (char *)name_rmt->name, + name_rmt->name, (int)name_rmt->namelen, valuelen, - (char*)args.value); + args.value); kmem_free(args.value); } else { retval = context->put_listent(context, entry->flags, - (char *)name_rmt->name, + name_rmt->name, (int)name_rmt->namelen, valuelen, NULL); @@ -2419,7 +2417,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) break; cursor->offset++; } - xfs_attr_trace_l_cl("blk end", context, leaf); + trace_xfs_attr_list_leaf_end(context); return(retval); } @@ -2952,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, map.br_blockcount); bp = xfs_trans_get_buf(*trans, dp->i_mount->m_ddev_targp, - dblkno, dblkcnt, XFS_BUF_LOCK); + dblkno, dblkcnt, XBF_LOCK); xfs_trans_binval(*trans, bp); /* * Roll to next transaction. diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h index ea22839caed..919756e3ba5 100644 --- a/fs/xfs/xfs_attr_sf.h +++ b/fs/xfs/xfs_attr_sf.h @@ -25,8 +25,6 @@ * to fit into the literal area of the inode. */ -struct xfs_inode; - /* * Entries are packed toward the top as tight as possible. */ @@ -54,7 +52,7 @@ typedef struct xfs_attr_sf_sort { __uint8_t valuelen; /* length of value */ __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ xfs_dahash_t hash; /* this entry's hash value */ - char *name; /* name value, pointer into buffer */ + unsigned char *name; /* name value, pointer into buffer */ } xfs_attr_sf_sort_t; #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ @@ -69,42 +67,4 @@ typedef struct xfs_attr_sf_sort { (be16_to_cpu(((xfs_attr_shortform_t *) \ ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) -#if defined(XFS_ATTR_TRACE) -/* - * Kernel tracing support for attribute lists - */ -struct xfs_attr_list_context; -struct xfs_da_intnode; -struct xfs_da_node_entry; -struct xfs_attr_leafblock; - -#define XFS_ATTR_TRACE_SIZE 4096 /* size of global trace buffer */ -extern ktrace_t *xfs_attr_trace_buf; - -/* - * Trace record types. - */ -#define XFS_ATTR_KTRACE_L_C 1 /* context */ -#define XFS_ATTR_KTRACE_L_CN 2 /* context, node */ -#define XFS_ATTR_KTRACE_L_CB 3 /* context, btree */ -#define XFS_ATTR_KTRACE_L_CL 4 /* context, leaf */ - -void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context); -void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, - struct xfs_da_intnode *node); -void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, - struct xfs_da_node_entry *btree); -void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, - struct xfs_attr_leafblock *leaf); -void xfs_attr_trace_enter(int type, char *where, - struct xfs_attr_list_context *context, - __psunsigned_t a13, __psunsigned_t a14, - __psunsigned_t a15); -#else -#define xfs_attr_trace_l_c(w,c) -#define xfs_attr_trace_l_cn(w,c,n) -#define xfs_attr_trace_l_cb(w,c,b) -#define xfs_attr_trace_l_cl(w,c,l) -#endif /* XFS_ATTR_TRACE */ - #endif /* __XFS_ATTR_SF_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8971fb09d38..1869fb97381 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -54,6 +54,7 @@ #include "xfs_buf_item.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" #ifdef DEBUG @@ -272,71 +273,6 @@ xfs_bmap_isaeof( int whichfork, /* data or attribute fork */ char *aeof); /* return value */ -#ifdef XFS_BMAP_TRACE -/* - * Add bmap trace entry prior to a call to xfs_iext_remove. - */ -STATIC void -xfs_bmap_trace_delete( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry(entries) deleted */ - xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */ - int whichfork); /* data or attr fork */ - -/* - * Add bmap trace entry prior to a call to xfs_iext_insert, or - * reading in the extents list from the disk (in the btree). - */ -STATIC void -xfs_bmap_trace_insert( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry(entries) inserted */ - xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */ - xfs_bmbt_irec_t *r1, /* inserted record 1 */ - xfs_bmbt_irec_t *r2, /* inserted record 2 or null */ - int whichfork); /* data or attr fork */ - -/* - * Add bmap trace entry after updating an extent record in place. - */ -STATIC void -xfs_bmap_trace_post_update( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry updated */ - int whichfork); /* data or attr fork */ - -/* - * Add bmap trace entry prior to updating an extent record in place. - */ -STATIC void -xfs_bmap_trace_pre_update( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry to be updated */ - int whichfork); /* data or attr fork */ - -#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ - xfs_bmap_trace_delete(__func__,d,ip,i,c,w) -#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ - xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w) -#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ - xfs_bmap_trace_post_update(__func__,d,ip,i,w) -#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ - xfs_bmap_trace_pre_update(__func__,d,ip,i,w) -#else -#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) -#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) -#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) -#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) -#endif /* XFS_BMAP_TRACE */ - /* * Compute the worst-case number of indirect blocks that will be used * for ip's delayed extent of length "len". @@ -363,18 +299,6 @@ xfs_bmap_validate_ret( #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) #endif /* DEBUG */ -#if defined(XFS_RW_TRACE) -STATIC void -xfs_bunmap_trace( - xfs_inode_t *ip, - xfs_fileoff_t bno, - xfs_filblks_t len, - int flags, - inst_t *ra); -#else -#define xfs_bunmap_trace(ip, bno, len, flags, ra) -#endif /* XFS_RW_TRACE */ - STATIC int xfs_bmap_count_tree( xfs_mount_t *mp, @@ -590,9 +514,9 @@ xfs_bmap_add_extent( * already extents in the list. */ if (nextents == 0) { - XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL, - whichfork); - xfs_iext_insert(ifp, 0, 1, new); + xfs_iext_insert(ip, 0, 1, new, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); + ASSERT(cur == NULL); ifp->if_lastex = 0; if (!isnullstartblock(new->br_startblock)) { @@ -759,26 +683,10 @@ xfs_bmap_add_extent_delay_real( xfs_filblks_t temp=0; /* value for dnew calculations */ xfs_filblks_t temp2=0;/* value for dnew calculations */ int tmp_rval; /* partial logging flags */ - enum { /* bit number definitions for state */ - LEFT_CONTIG, RIGHT_CONTIG, - LEFT_FILLING, RIGHT_FILLING, - LEFT_DELAY, RIGHT_DELAY, - LEFT_VALID, RIGHT_VALID - }; #define LEFT r[0] #define RIGHT r[1] #define PREV r[2] -#define MASK(b) (1 << (b)) -#define MASK2(a,b) (MASK(a) | MASK(b)) -#define MASK3(a,b,c) (MASK2(a,b) | MASK(c)) -#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d)) -#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) -#define STATE_TEST(b) (state & MASK(b)) -#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ - ((state &= ~MASK(b)), 0)) -#define SWITCH_STATE \ - (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG)) /* * Set up a bunch of variables to make the tests simpler. @@ -790,69 +698,80 @@ xfs_bmap_add_extent_delay_real( new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + /* * Set flags determining what part of the previous delayed allocation * extent is being replaced by a real allocation. */ - STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); - STATE_SET(RIGHT_FILLING, - PREV.br_startoff + PREV.br_blockcount == new_endoff); + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; + /* * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + if (idx > 0) { + state |= BMAP_LEFT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); - STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; } - STATE_SET(LEFT_CONTIG, - STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == new->br_state && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + /* * Check and set flags if this segment has a right neighbor. * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (STATE_SET_TEST(RIGHT_VALID, - idx < - ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) { + if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); - STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); + + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; } - STATE_SET(RIGHT_CONTIG, - STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == - RIGHT.br_startblock && - new->br_state == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != - MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)); + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + new->br_state == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; + error = 0; /* * Switch out based on the FILLING and CONTIG state bits. */ - switch (SWITCH_STATE) { - - case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); - XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx, 2); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + + xfs_iext_remove(ip, idx, 2, state); ip->i_df.if_lastex = idx - 1; ip->i_d.di_nextents--; if (cur == NULL) @@ -885,20 +804,18 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount; break; - case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: /* * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + ip->i_df.if_lastex = idx - 1; - XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx, 1); + xfs_iext_remove(ip, idx, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -921,19 +838,19 @@ xfs_bmap_add_extent_delay_real( PREV.br_blockcount; break; - case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ip->i_df.if_lastex = idx; - XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx + 1, 1); + xfs_iext_remove(ip, idx + 1, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -956,15 +873,16 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount; break; - case MASK2(LEFT_FILLING, RIGHT_FILLING): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: /* * Filling in all of a previously delayed allocation extent. * Neither the left nor right neighbors are contiguous with * the new one. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) @@ -987,19 +905,20 @@ xfs_bmap_add_extent_delay_real( temp2 = new->br_blockcount; break; - case MASK2(LEFT_FILLING, LEFT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: /* * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + temp = PREV.br_blockcount - new->br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); ip->i_df.if_lastex = idx - 1; if (cur == NULL) @@ -1021,7 +940,7 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; /* DELTA: The boundary between two in-core extents moved. */ temp = LEFT.br_startoff; @@ -1029,18 +948,16 @@ xfs_bmap_add_extent_delay_real( PREV.br_blockcount; break; - case MASK(LEFT_FILLING): + case BMAP_LEFT_FILLING: /* * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, new_endoff); temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx, 1, new); + xfs_iext_insert(ip, idx, 1, new, state); ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) @@ -1071,27 +988,27 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); ep = xfs_iext_get_ext(ifp, idx + 1); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); *dnew = temp; /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; temp2 = PREV.br_blockcount; break; - case MASK2(RIGHT_FILLING, RIGHT_CONTIG): + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Filling in the last part of a previous delayed allocation. * The right neighbor is contiguous with the new allocation. */ temp = PREV.br_blockcount - new->br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); - XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, RIGHT.br_state); - XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -1112,7 +1029,7 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; /* DELTA: The boundary between two in-core extents moved. */ temp = PREV.br_startoff; @@ -1120,17 +1037,15 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount; break; - case MASK(RIGHT_FILLING): + case BMAP_RIGHT_FILLING: /* * Filling in the last part of a previous delayed allocation. * The right neighbor is not contiguous. */ temp = PREV.br_blockcount - new->br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx + 1, 1, new); + xfs_iext_insert(ip, idx + 1, 1, new, state); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) @@ -1161,7 +1076,7 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); *dnew = temp; /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; @@ -1175,7 +1090,7 @@ xfs_bmap_add_extent_delay_real( * This case is avoided almost all the time. */ temp = new->br_startoff - PREV.br_startoff; - XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); r[0] = *new; r[1].br_state = PREV.br_state; @@ -1183,9 +1098,7 @@ xfs_bmap_add_extent_delay_real( r[1].br_startoff = new_endoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_blockcount = temp2; - XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx + 1, 2, &r[0]); + xfs_iext_insert(ip, idx + 1, 2, &r[0], state); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) @@ -1242,24 +1155,24 @@ xfs_bmap_add_extent_delay_real( } ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); - XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), nullstartblock((int)temp2)); - XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); *dnew = temp + temp2; /* DELTA: One in-core extent is split in three. */ temp = PREV.br_startoff; temp2 = PREV.br_blockcount; break; - case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): - case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): - case MASK2(LEFT_FILLING, RIGHT_CONTIG): - case MASK2(RIGHT_FILLING, LEFT_CONTIG): - case MASK2(LEFT_CONTIG, RIGHT_CONTIG): - case MASK(LEFT_CONTIG): - case MASK(RIGHT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: /* * These cases are all impossible. */ @@ -1279,14 +1192,6 @@ done: #undef LEFT #undef RIGHT #undef PREV -#undef MASK -#undef MASK2 -#undef MASK3 -#undef MASK4 -#undef STATE_SET -#undef STATE_TEST -#undef STATE_SET_TEST -#undef SWITCH_STATE } /* @@ -1316,27 +1221,10 @@ xfs_bmap_add_extent_unwritten_real( int state = 0;/* state bits, accessed thru macros */ xfs_filblks_t temp=0; xfs_filblks_t temp2=0; - enum { /* bit number definitions for state */ - LEFT_CONTIG, RIGHT_CONTIG, - LEFT_FILLING, RIGHT_FILLING, - LEFT_DELAY, RIGHT_DELAY, - LEFT_VALID, RIGHT_VALID - }; #define LEFT r[0] #define RIGHT r[1] #define PREV r[2] -#define MASK(b) (1 << (b)) -#define MASK2(a,b) (MASK(a) | MASK(b)) -#define MASK3(a,b,c) (MASK2(a,b) | MASK(c)) -#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d)) -#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) -#define STATE_TEST(b) (state & MASK(b)) -#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ - ((state &= ~MASK(b)), 0)) -#define SWITCH_STATE \ - (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG)) - /* * Set up a bunch of variables to make the tests simpler. */ @@ -1352,68 +1240,78 @@ xfs_bmap_add_extent_unwritten_real( new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + /* * Set flags determining what part of the previous oldext allocation * extent is being replaced by a newext allocation. */ - STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); - STATE_SET(RIGHT_FILLING, - PREV.br_startoff + PREV.br_blockcount == new_endoff); + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; + /* * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + if (idx > 0) { + state |= BMAP_LEFT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); - STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; } - STATE_SET(LEFT_CONTIG, - STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == newext && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == newext && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + /* * Check and set flags if this segment has a right neighbor. * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (STATE_SET_TEST(RIGHT_VALID, - idx < - ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) { + if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); - STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; } - STATE_SET(RIGHT_CONTIG, - STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == - RIGHT.br_startblock && - newext == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != - MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)); + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + newext == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; + /* * Switch out based on the FILLING and CONTIG state bits. */ - switch (SWITCH_STATE) { - - case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); - XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx, 2); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + + xfs_iext_remove(ip, idx, 2, state); ip->i_df.if_lastex = idx - 1; ip->i_d.di_nextents -= 2; if (cur == NULL) @@ -1450,20 +1348,18 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount; break; - case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + ip->i_df.if_lastex = idx - 1; - XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx, 1); + xfs_iext_remove(ip, idx, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1492,21 +1388,18 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount; break; - case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); xfs_bmbt_set_state(ep, newext); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); ip->i_df.if_lastex = idx; - XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx + 1, 1); + xfs_iext_remove(ip, idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1535,17 +1428,16 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount; break; - case MASK2(LEFT_FILLING, RIGHT_FILLING): + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: /* * Setting all of a previous oldext extent to newext. * Neither the left nor right neighbors are contiguous with * the new one. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_state(ep, newext); - XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ip->i_df.if_lastex = idx; if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -1566,27 +1458,25 @@ xfs_bmap_add_extent_unwritten_real( temp2 = new->br_blockcount; break; - case MASK2(LEFT_FILLING, LEFT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, - XFS_DATA_FORK); - XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -1617,22 +1507,21 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount; break; - case MASK(LEFT_FILLING): + case BMAP_LEFT_FILLING: /* * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); xfs_bmbt_set_startoff(ep, new_endoff); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK); - XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx, 1, new); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + + xfs_iext_insert(ip, idx, 1, new, state); ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) @@ -1660,24 +1549,21 @@ xfs_bmap_add_extent_unwritten_real( temp2 = PREV.br_blockcount; break; - case MASK2(RIGHT_FILLING, RIGHT_CONTIG): + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, - XFS_DATA_FORK); - XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, newext); - XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -1707,18 +1593,17 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount; break; - case MASK(RIGHT_FILLING): + case BMAP_RIGHT_FILLING: /* * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); - XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx + 1, 1, new); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + + xfs_iext_insert(ip, idx + 1, 1, new, state); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) @@ -1756,19 +1641,18 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, new->br_startoff - PREV.br_startoff); - XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + r[0] = *new; r[1].br_startoff = new_endoff; r[1].br_blockcount = PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; r[1].br_state = oldext; - XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx + 1, 2, &r[0]); + xfs_iext_insert(ip, idx + 1, 2, &r[0], state); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents += 2; if (cur == NULL) @@ -1813,13 +1697,13 @@ xfs_bmap_add_extent_unwritten_real( temp2 = PREV.br_blockcount; break; - case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): - case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): - case MASK2(LEFT_FILLING, RIGHT_CONTIG): - case MASK2(RIGHT_FILLING, LEFT_CONTIG): - case MASK2(LEFT_CONTIG, RIGHT_CONTIG): - case MASK(LEFT_CONTIG): - case MASK(RIGHT_CONTIG): + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: /* * These cases are all impossible. */ @@ -1839,14 +1723,6 @@ done: #undef LEFT #undef RIGHT #undef PREV -#undef MASK -#undef MASK2 -#undef MASK3 -#undef MASK4 -#undef STATE_SET -#undef STATE_TEST -#undef STATE_SET_TEST -#undef SWITCH_STATE } /* @@ -1872,62 +1748,57 @@ xfs_bmap_add_extent_hole_delay( int state; /* state bits, accessed thru macros */ xfs_filblks_t temp=0; /* temp for indirect calculations */ xfs_filblks_t temp2=0; - enum { /* bit number definitions for state */ - LEFT_CONTIG, RIGHT_CONTIG, - LEFT_DELAY, RIGHT_DELAY, - LEFT_VALID, RIGHT_VALID - }; - -#define MASK(b) (1 << (b)) -#define MASK2(a,b) (MASK(a) | MASK(b)) -#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) -#define STATE_TEST(b) (state & MASK(b)) -#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ - ((state &= ~MASK(b)), 0)) -#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG)) ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ep = xfs_iext_get_ext(ifp, idx); state = 0; ASSERT(isnullstartblock(new->br_startblock)); + /* * Check and set flags if this segment has a left neighbor */ - if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + if (idx > 0) { + state |= BMAP_LEFT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); - STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); + + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; } + /* * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (STATE_SET_TEST(RIGHT_VALID, - idx < - ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(ep, &right); - STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); + + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; } + /* * Set contiguity flags on the left and right neighbors. * Don't let extents get too large, even if the pieces are contiguous. */ - STATE_SET(LEFT_CONTIG, - STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN); - STATE_SET(RIGHT_CONTIG, - STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!STATE_TEST(LEFT_CONTIG) || - (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN))); + if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + (left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN))) + state |= BMAP_RIGHT_CONTIG; + /* * Switch out based on the contiguity flags. */ - switch (SWITCH_STATE) { - - case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: /* * New allocation is contiguous with delayed allocations * on the left and on the right. @@ -1935,8 +1806,8 @@ xfs_bmap_add_extent_hole_delay( */ temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, - XFS_DATA_FORK); + + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + @@ -1944,53 +1815,52 @@ xfs_bmap_add_extent_hole_delay( newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), nullstartblock((int)newlen)); - XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, - XFS_DATA_FORK); - XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK); - xfs_iext_remove(ifp, idx, 1); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + + xfs_iext_remove(ip, idx, 1, state); ip->i_df.if_lastex = idx - 1; /* DELTA: Two in-core extents were replaced by one. */ temp2 = temp; temp = left.br_startoff; break; - case MASK(LEFT_CONTIG): + case BMAP_LEFT_CONTIG: /* * New allocation is contiguous with a delayed allocation * on the left. * Merge the new allocation with the left neighbor. */ temp = left.br_blockcount + new->br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), nullstartblock((int)newlen)); - XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, - XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + ip->i_df.if_lastex = idx - 1; /* DELTA: One in-core extent grew into a hole. */ temp2 = temp; temp = left.br_startoff; break; - case MASK(RIGHT_CONTIG): + case BMAP_RIGHT_CONTIG: /* * New allocation is contiguous with a delayed allocation * on the right. * Merge the new allocation with the right neighbor. */ - XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_allf(ep, new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); - XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ip->i_df.if_lastex = idx; /* DELTA: One in-core extent grew into a hole. */ temp2 = temp; @@ -2004,9 +1874,7 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, - XFS_DATA_FORK); - xfs_iext_insert(ifp, idx, 1, new); + xfs_iext_insert(ip, idx, 1, new, state); ip->i_df.if_lastex = idx; /* DELTA: A new in-core extent was added in a hole. */ temp2 = new->br_blockcount; @@ -2030,12 +1898,6 @@ xfs_bmap_add_extent_hole_delay( } *logflagsp = 0; return 0; -#undef MASK -#undef MASK2 -#undef STATE_SET -#undef STATE_TEST -#undef STATE_SET_TEST -#undef SWITCH_STATE } /* @@ -2062,83 +1924,75 @@ xfs_bmap_add_extent_hole_real( int state; /* state bits, accessed thru macros */ xfs_filblks_t temp=0; xfs_filblks_t temp2=0; - enum { /* bit number definitions for state */ - LEFT_CONTIG, RIGHT_CONTIG, - LEFT_DELAY, RIGHT_DELAY, - LEFT_VALID, RIGHT_VALID - }; - -#define MASK(b) (1 << (b)) -#define MASK2(a,b) (MASK(a) | MASK(b)) -#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) -#define STATE_TEST(b) (state & MASK(b)) -#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ - ((state &= ~MASK(b)), 0)) -#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG)) ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); ep = xfs_iext_get_ext(ifp, idx); state = 0; + + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + /* * Check and set flags if this segment has a left neighbor. */ - if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + if (idx > 0) { + state |= BMAP_LEFT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); - STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; } + /* * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (STATE_SET_TEST(RIGHT_VALID, - idx < - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(ep, &right); - STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; } + /* * We're inserting a real allocation between "left" and "right". * Set the contiguity flags. Don't let extents get too large. */ - STATE_SET(LEFT_CONTIG, - STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_startblock + left.br_blockcount == new->br_startblock && - left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN); - STATE_SET(RIGHT_CONTIG, - STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_startblock + new->br_blockcount == - right.br_startblock && - new->br_state == right.br_state && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!STATE_TEST(LEFT_CONTIG) || - left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN)); + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_startblock + left.br_blockcount == new->br_startblock && + left.br_state == new->br_state && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_startblock + new->br_blockcount == right.br_startblock && + new->br_state == right.br_state && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; error = 0; /* * Select which case we're in here, and implement it. */ - switch (SWITCH_STATE) { - - case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: /* * New allocation is contiguous with real allocations on the * left and on the right. * Merge all three into a single extent record. */ - XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, - whichfork); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), left.br_blockcount + new->br_blockcount + right.br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, - whichfork); - XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork); - xfs_iext_remove(ifp, idx, 1); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + + xfs_iext_remove(ip, idx, 1, state); ifp->if_lastex = idx - 1; XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); @@ -2173,16 +2027,17 @@ xfs_bmap_add_extent_hole_real( right.br_blockcount; break; - case MASK(LEFT_CONTIG): + case BMAP_LEFT_CONTIG: /* * New allocation is contiguous with a real allocation * on the left. * Merge the new allocation with the left neighbor. */ - XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork); + trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), left.br_blockcount + new->br_blockcount); - XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork); + trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + ifp->if_lastex = idx - 1; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); @@ -2207,17 +2062,18 @@ xfs_bmap_add_extent_hole_real( new->br_blockcount; break; - case MASK(RIGHT_CONTIG): + case BMAP_RIGHT_CONTIG: /* * New allocation is contiguous with a real allocation * on the right. * Merge the new allocation with the right neighbor. */ - XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + ifp->if_lastex = idx; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); @@ -2248,8 +2104,7 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork); - xfs_iext_insert(ifp, idx, 1, new); + xfs_iext_insert(ip, idx, 1, new, state); ifp->if_lastex = idx; XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); @@ -2283,12 +2138,6 @@ xfs_bmap_add_extent_hole_real( done: *logflagsp = rval; return error; -#undef MASK -#undef MASK2 -#undef STATE_SET -#undef STATE_TEST -#undef STATE_SET_TEST -#undef SWITCH_STATE } /* @@ -2780,13 +2629,12 @@ xfs_bmap_btalloc( if (startag == NULLAGNUMBER) startag = ag = 0; notinit = 0; - down_read(&mp->m_peraglock); + pag = xfs_perag_get(mp, ag); while (blen < ap->alen) { - pag = &mp->m_perag[ag]; if (!pag->pagf_init && (error = xfs_alloc_pagf_init(mp, args.tp, ag, XFS_ALLOC_FLAG_TRYLOCK))) { - up_read(&mp->m_peraglock); + xfs_perag_put(pag); return error; } /* @@ -2818,13 +2666,13 @@ xfs_bmap_btalloc( break; error = xfs_filestream_new_ag(ap, &ag); - if (error) { - up_read(&mp->m_peraglock); + xfs_perag_put(pag); + if (error) return error; - } /* loop again to set 'blen'*/ startag = NULLAGNUMBER; + pag = xfs_perag_get(mp, ag); continue; } } @@ -2832,8 +2680,10 @@ xfs_bmap_btalloc( ag = 0; if (ag == startag) break; + xfs_perag_put(pag); + pag = xfs_perag_get(mp, ag); } - up_read(&mp->m_peraglock); + xfs_perag_put(pag); /* * Since the above loop did a BUF_TRYLOCK, it is * possible that there is space for this request. @@ -3115,8 +2965,13 @@ xfs_bmap_del_extent( uint qfield; /* quota field to update */ xfs_filblks_t temp; /* for indirect length calculations */ xfs_filblks_t temp2; /* for indirect length calculations */ + int state = 0; XFS_STATS_INC(xs_del_exlist); + + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((idx >= 0) && (idx < ifp->if_bytes / @@ -3196,8 +3051,8 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ - XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork); - xfs_iext_remove(ifp, idx, 1); + xfs_iext_remove(ip, idx, 1, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); ifp->if_lastex = idx; if (delay) break; @@ -3217,7 +3072,7 @@ xfs_bmap_del_extent( /* * Deleting the first part of the extent. */ - XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); @@ -3226,13 +3081,12 @@ xfs_bmap_del_extent( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, - whichfork); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); - XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -3248,19 +3102,18 @@ xfs_bmap_del_extent( * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; - XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, - whichfork); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); da_new = temp; break; } - XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -3277,7 +3130,7 @@ xfs_bmap_del_extent( * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; - XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork); + trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; @@ -3364,10 +3217,8 @@ xfs_bmap_del_extent( } } } - XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork); - XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL, - whichfork); - xfs_iext_insert(ifp, idx + 1, 1, &new); + trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + xfs_iext_insert(ip, idx + 1, 1, &new, state); ifp->if_lastex = idx + 1; break; } @@ -3687,7 +3538,9 @@ xfs_bmap_local_to_extents( xfs_iext_add(ifp, 0, 1); ep = xfs_iext_get_ext(ifp, 0); xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); - XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); + trace_xfs_bmap_post_update(ip, 0, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, + _THIS_IP_); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; xfs_trans_mod_dquot_byino(tp, ip, @@ -3800,158 +3653,6 @@ xfs_bmap_search_extents( return ep; } - -#ifdef XFS_BMAP_TRACE -ktrace_t *xfs_bmap_trace_buf; - -/* - * Add a bmap trace buffer entry. Base routine for the others. - */ -STATIC void -xfs_bmap_trace_addentry( - int opcode, /* operation */ - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry(ies) */ - xfs_extnum_t cnt, /* count of entries, 1 or 2 */ - xfs_bmbt_rec_host_t *r1, /* first record */ - xfs_bmbt_rec_host_t *r2, /* second record or null */ - int whichfork) /* data or attr fork */ -{ - xfs_bmbt_rec_host_t tr2; - - ASSERT(cnt == 1 || cnt == 2); - ASSERT(r1 != NULL); - if (cnt == 1) { - ASSERT(r2 == NULL); - r2 = &tr2; - memset(&tr2, 0, sizeof(tr2)); - } else - ASSERT(r2 != NULL); - ktrace_enter(xfs_bmap_trace_buf, - (void *)(__psint_t)(opcode | (whichfork << 16)), - (void *)fname, (void *)desc, (void *)ip, - (void *)(__psint_t)idx, - (void *)(__psint_t)cnt, - (void *)(__psunsigned_t)(ip->i_ino >> 32), - (void *)(__psunsigned_t)(unsigned)ip->i_ino, - (void *)(__psunsigned_t)(r1->l0 >> 32), - (void *)(__psunsigned_t)(unsigned)(r1->l0), - (void *)(__psunsigned_t)(r1->l1 >> 32), - (void *)(__psunsigned_t)(unsigned)(r1->l1), - (void *)(__psunsigned_t)(r2->l0 >> 32), - (void *)(__psunsigned_t)(unsigned)(r2->l0), - (void *)(__psunsigned_t)(r2->l1 >> 32), - (void *)(__psunsigned_t)(unsigned)(r2->l1) - ); - ASSERT(ip->i_xtrace); - ktrace_enter(ip->i_xtrace, - (void *)(__psint_t)(opcode | (whichfork << 16)), - (void *)fname, (void *)desc, (void *)ip, - (void *)(__psint_t)idx, - (void *)(__psint_t)cnt, - (void *)(__psunsigned_t)(ip->i_ino >> 32), - (void *)(__psunsigned_t)(unsigned)ip->i_ino, - (void *)(__psunsigned_t)(r1->l0 >> 32), - (void *)(__psunsigned_t)(unsigned)(r1->l0), - (void *)(__psunsigned_t)(r1->l1 >> 32), - (void *)(__psunsigned_t)(unsigned)(r1->l1), - (void *)(__psunsigned_t)(r2->l0 >> 32), - (void *)(__psunsigned_t)(unsigned)(r2->l0), - (void *)(__psunsigned_t)(r2->l1 >> 32), - (void *)(__psunsigned_t)(unsigned)(r2->l1) - ); -} - -/* - * Add bmap trace entry prior to a call to xfs_iext_remove. - */ -STATIC void -xfs_bmap_trace_delete( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry(entries) deleted */ - xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */ - int whichfork) /* data or attr fork */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - - ifp = XFS_IFORK_PTR(ip, whichfork); - xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx, - cnt, xfs_iext_get_ext(ifp, idx), - cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL, - whichfork); -} - -/* - * Add bmap trace entry prior to a call to xfs_iext_insert, or - * reading in the extents list from the disk (in the btree). - */ -STATIC void -xfs_bmap_trace_insert( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry(entries) inserted */ - xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */ - xfs_bmbt_irec_t *r1, /* inserted record 1 */ - xfs_bmbt_irec_t *r2, /* inserted record 2 or null */ - int whichfork) /* data or attr fork */ -{ - xfs_bmbt_rec_host_t tr1; /* compressed record 1 */ - xfs_bmbt_rec_host_t tr2; /* compressed record 2 if needed */ - - xfs_bmbt_set_all(&tr1, r1); - if (cnt == 2) { - ASSERT(r2 != NULL); - xfs_bmbt_set_all(&tr2, r2); - } else { - ASSERT(cnt == 1); - ASSERT(r2 == NULL); - } - xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_INSERT, fname, desc, ip, idx, - cnt, &tr1, cnt == 2 ? &tr2 : NULL, whichfork); -} - -/* - * Add bmap trace entry after updating an extent record in place. - */ -STATIC void -xfs_bmap_trace_post_update( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry updated */ - int whichfork) /* data or attr fork */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - - ifp = XFS_IFORK_PTR(ip, whichfork); - xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx, - 1, xfs_iext_get_ext(ifp, idx), NULL, whichfork); -} - -/* - * Add bmap trace entry prior to updating an extent record in place. - */ -STATIC void -xfs_bmap_trace_pre_update( - const char *fname, /* function name */ - char *desc, /* operation description */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index of entry to be updated */ - int whichfork) /* data or attr fork */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - - ifp = XFS_IFORK_PTR(ip, whichfork); - xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1, - xfs_iext_get_ext(ifp, idx), NULL, whichfork); -} -#endif /* XFS_BMAP_TRACE */ - /* * Compute the worst-case number of indirect blocks that will be used * for ip's delayed extent of length "len". @@ -3983,37 +3684,6 @@ xfs_bmap_worst_indlen( return rval; } -#if defined(XFS_RW_TRACE) -STATIC void -xfs_bunmap_trace( - xfs_inode_t *ip, - xfs_fileoff_t bno, - xfs_filblks_t len, - int flags, - inst_t *ra) -{ - if (ip->i_rwtrace == NULL) - return; - ktrace_enter(ip->i_rwtrace, - (void *)(__psint_t)XFS_BUNMAP, - (void *)ip, - (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff), - (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff), - (void *)(__psint_t)(((xfs_dfiloff_t)bno >> 32) & 0xffffffff), - (void *)(__psint_t)((xfs_dfiloff_t)bno & 0xffffffff), - (void *)(__psint_t)len, - (void *)(__psint_t)flags, - (void *)(unsigned long)current_cpu(), - (void *)ra, - (void *)0, - (void *)0, - (void *)0, - (void *)0, - (void *)0, - (void *)0); -} -#endif - /* * Convert inode from non-attributed to attributed. * Must not be in a transaction, ip must not be locked. @@ -4702,34 +4372,30 @@ error0: return XFS_ERROR(EFSCORRUPTED); } -#ifdef XFS_BMAP_TRACE +#ifdef DEBUG /* * Add bmap trace insert entries for all the contents of the extent records. */ void xfs_bmap_trace_exlist( - const char *fname, /* function name */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork) /* data or attr fork */ + int whichfork, /* data or attr fork */ + unsigned long caller_ip) { - xfs_bmbt_rec_host_t *ep; /* current extent record */ xfs_extnum_t idx; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t s; /* file extent record */ + int state = 0; + + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - for (idx = 0; idx < cnt; idx++) { - ep = xfs_iext_get_ext(ifp, idx); - xfs_bmbt_get_all(ep, &s); - XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL, - whichfork); - } + for (idx = 0; idx < cnt; idx++) + trace_xfs_extlist(ip, idx, whichfork, caller_ip); } -#endif -#ifdef DEBUG /* * Validate that the bmbt_irecs being returned from bmapi are valid * given the callers original parameters. Specifically check the @@ -4805,7 +4471,7 @@ xfs_bmapi( xfs_fsblock_t abno; /* allocated block number */ xfs_extlen_t alen; /* allocated extent length */ xfs_fileoff_t aoff; /* allocated file offset */ - xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ + xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* we've hit the end of extents */ @@ -5478,7 +5144,8 @@ xfs_bunmapi( int rsvd; /* OK to allocate reserved blocks */ xfs_fsblock_t sum; - xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address); + trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; ifp = XFS_IFORK_PTR(ip, whichfork); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 56f62d2edc3..419dafb9d87 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -95,6 +95,21 @@ typedef struct xfs_bmap_free /* need write cache flushing and no */ /* additional allocation alignments */ +#define XFS_BMAPI_FLAGS \ + { XFS_BMAPI_WRITE, "WRITE" }, \ + { XFS_BMAPI_DELAY, "DELAY" }, \ + { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ + { XFS_BMAPI_METADATA, "METADATA" }, \ + { XFS_BMAPI_EXACT, "EXACT" }, \ + { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ + { XFS_BMAPI_ASYNC, "ASYNC" }, \ + { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ + { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ + { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ + { XFS_BMAPI_CONTIG, "CONTIG" }, \ + { XFS_BMAPI_CONVERT, "CONVERT" } + + static inline int xfs_bmapi_aflag(int w) { return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); @@ -135,36 +150,43 @@ typedef struct xfs_bmalloca { char conv; /* overwriting unwritten extents */ } xfs_bmalloca_t; -#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE) /* - * Trace operations for bmap extent tracing + * Flags for xfs_bmap_add_extent*. */ -#define XFS_BMAP_KTRACE_DELETE 1 -#define XFS_BMAP_KTRACE_INSERT 2 -#define XFS_BMAP_KTRACE_PRE_UP 3 -#define XFS_BMAP_KTRACE_POST_UP 4 - -#define XFS_BMAP_TRACE_SIZE 4096 /* size of global trace buffer */ -#define XFS_BMAP_KTRACE_SIZE 32 /* size of per-inode trace buffer */ -extern ktrace_t *xfs_bmap_trace_buf; +#define BMAP_LEFT_CONTIG (1 << 0) +#define BMAP_RIGHT_CONTIG (1 << 1) +#define BMAP_LEFT_FILLING (1 << 2) +#define BMAP_RIGHT_FILLING (1 << 3) +#define BMAP_LEFT_DELAY (1 << 4) +#define BMAP_RIGHT_DELAY (1 << 5) +#define BMAP_LEFT_VALID (1 << 6) +#define BMAP_RIGHT_VALID (1 << 7) +#define BMAP_ATTRFORK (1 << 8) + +#define XFS_BMAP_EXT_FLAGS \ + { BMAP_LEFT_CONTIG, "LC" }, \ + { BMAP_RIGHT_CONTIG, "RC" }, \ + { BMAP_LEFT_FILLING, "LF" }, \ + { BMAP_RIGHT_FILLING, "RF" }, \ + { BMAP_ATTRFORK, "ATTR" } /* * Add bmap trace insert entries for all the contents of the extent list. + * + * Quite excessive tracing. Only do this for debug builds. */ +#if defined(__KERNEL) && defined(DEBUG) void xfs_bmap_trace_exlist( - const char *fname, /* function name */ struct xfs_inode *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in list */ - int whichfork); /* data or attr fork */ + int whichfork, + unsigned long caller_ip); /* data or attr fork */ #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ - xfs_bmap_trace_exlist(__func__,ip,c,w) - -#else /* __KERNEL__ && XFS_BMAP_TRACE */ - + xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) +#else #define XFS_BMAP_TRACE_EXLIST(ip,c,w) - -#endif /* __KERNEL__ && XFS_BMAP_TRACE */ +#endif /* * Convert inode from non-attributed to attributed. diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index eb7b702d069..416e47e54b8 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -98,8 +98,7 @@ xfs_bmdr_to_bmbt( * This code must be in sync with the routines xfs_bmbt_get_startoff, * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. */ - -STATIC_INLINE void +STATIC void __xfs_bmbt_get_all( __uint64_t l0, __uint64_t l1, @@ -335,7 +334,7 @@ xfs_bmbt_disk_set_allf( /* * Set all the fields in a bmap extent record from the uncompressed form. */ -void +STATIC void xfs_bmbt_disk_set_all( xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s) @@ -769,12 +768,6 @@ xfs_bmbt_trace_enter( (void *)a0, (void *)a1, (void *)a2, (void *)a3, (void *)a4, (void *)a5, (void *)a6, (void *)a7, (void *)a8, (void *)a9, (void *)a10); - ktrace_enter(ip->i_btrace, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); } STATIC void diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5549d495947..0e66c4ea0f8 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block { #define BMBT_STARTBLOCK_BITLEN 52 #define BMBT_BLOCKCOUNT_BITLEN 21 - -#define BMBT_USE_64 1 - -typedef struct xfs_bmbt_rec_32 -{ - __uint32_t l0, l1, l2, l3; -} xfs_bmbt_rec_32_t; -typedef struct xfs_bmbt_rec_64 -{ +typedef struct xfs_bmbt_rec { __be64 l0, l1; -} xfs_bmbt_rec_64_t; +} xfs_bmbt_rec_t; typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ -typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; +typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; typedef struct xfs_bmbt_rec_host { __uint64_t l0, l1; @@ -231,7 +223,6 @@ extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); -extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 52b5f14d0c3..96be4b0f249 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -39,6 +39,7 @@ #include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * Cursor allocation zone. @@ -81,7 +82,7 @@ xfs_btree_check_lblock( XFS_ERRTAG_BTREE_CHECK_LBLOCK, XFS_RANDOM_BTREE_CHECK_LBLOCK))) { if (bp) - xfs_buftrace("LBTREE ERROR", bp); + trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); @@ -119,7 +120,7 @@ xfs_btree_check_sblock( XFS_ERRTAG_BTREE_CHECK_SBLOCK, XFS_RANDOM_BTREE_CHECK_SBLOCK))) { if (bp) - xfs_buftrace("SBTREE ERROR", bp); + trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, cur->bc_mp, block); return XFS_ERROR(EFSCORRUPTED); @@ -976,7 +977,7 @@ xfs_btree_get_buf_block( xfs_daddr_t d; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, @@ -1007,7 +1008,7 @@ xfs_btree_read_buf_block( int error; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h index b3f5eb3c3c6..2d8a309873e 100644 --- a/fs/xfs/xfs_btree_trace.h +++ b/fs/xfs/xfs_btree_trace.h @@ -58,8 +58,6 @@ void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, struct xfs_buf *, int, int); void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, struct xfs_buf *, int, int, int); -void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *, - xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int); void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, union xfs_btree_ptr, union xfs_btree_key *, int); @@ -71,24 +69,10 @@ void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, union xfs_btree_rec *, int); void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); - -#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */ -extern ktrace_t *xfs_allocbt_trace_buf; - -#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */ -extern ktrace_t *xfs_inobt_trace_buf; - -#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ -#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ -extern ktrace_t *xfs_bmbt_trace_buf; - - #define XFS_BTREE_TRACE_ARGBI(c, b, i) \ xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) #define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) -#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \ - xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__) #define XFS_BTREE_TRACE_ARGI(c, i) \ xfs_btree_trace_argi(__func__, c, i, __LINE__) #define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ @@ -104,7 +88,6 @@ extern ktrace_t *xfs_bmbt_trace_buf; #else #define XFS_BTREE_TRACE_ARGBI(c, b, i) #define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) #define XFS_BTREE_TRACE_ARGI(c, i) #define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) #define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 92af4098c7e..f3c49e69eab 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -29,6 +29,7 @@ #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_error.h" +#include "xfs_trace.h" kmem_zone_t *xfs_buf_item_zone; @@ -164,7 +165,7 @@ xfs_buf_item_size( * is the buf log format structure with the * cancel flag in it. */ - xfs_buf_item_trace("SIZE STALE", bip); + trace_xfs_buf_item_size_stale(bip); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); return 1; } @@ -206,7 +207,7 @@ xfs_buf_item_size( } } - xfs_buf_item_trace("SIZE NORM", bip); + trace_xfs_buf_item_size(bip); return nvecs; } @@ -249,7 +250,7 @@ xfs_buf_item_format( ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); vecp->i_addr = (xfs_caddr_t)&bip->bli_format; vecp->i_len = base_size; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); + vecp->i_type = XLOG_REG_TYPE_BFORMAT; vecp++; nvecs = 1; @@ -259,7 +260,7 @@ xfs_buf_item_format( * is the buf log format structure with the * cancel flag in it. */ - xfs_buf_item_trace("FORMAT STALE", bip); + trace_xfs_buf_item_format_stale(bip); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); bip->bli_format.blf_size = nvecs; return; @@ -296,14 +297,14 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; nvecs++; break; } else if (next_bit != last_bit + 1) { buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; nvecs++; vecp++; first_bit = next_bit; @@ -315,7 +316,7 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; /* You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here @@ -335,7 +336,7 @@ xfs_buf_item_format( /* * Check to make sure everything is consistent. */ - xfs_buf_item_trace("FORMAT NORM", bip); + trace_xfs_buf_item_format(bip); xfs_buf_item_log_check(bip); } @@ -355,8 +356,7 @@ xfs_buf_item_pin( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); - xfs_buf_item_trace("PIN", bip); - xfs_buftrace("XFS_PIN", bp); + trace_xfs_buf_item_pin(bip); xfs_bpin(bp); } @@ -383,8 +383,7 @@ xfs_buf_item_unpin( ASSERT(bp != NULL); ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); - xfs_buf_item_trace("UNPIN", bip); - xfs_buftrace("XFS_UNPIN", bp); + trace_xfs_buf_item_unpin(bip); freed = atomic_dec_and_test(&bip->bli_refcount); ailp = bip->bli_item.li_ailp; @@ -395,8 +394,8 @@ xfs_buf_item_unpin( ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); - xfs_buf_item_trace("UNPIN STALE", bip); - xfs_buftrace("XFS_UNPIN STALE", bp); + trace_xfs_buf_item_unpin_stale(bip); + /* * If we get called here because of an IO error, we may * or may not have the item on the AIL. xfs_trans_ail_delete() @@ -440,8 +439,8 @@ xfs_buf_item_unpin_remove( if ((atomic_read(&bip->bli_refcount) == 1) && (bip->bli_flags & XFS_BLI_STALE)) { ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); - xfs_buf_item_trace("UNPIN REMOVE", bip); - xfs_buftrace("XFS_UNPIN_REMOVE", bp); + trace_xfs_buf_item_unpin_stale(bip); + /* * yes -- clear the xaction descriptor in-use flag * and free the chunk if required. We can safely @@ -468,8 +467,10 @@ xfs_buf_item_unpin_remove( /* * This is called to attempt to lock the buffer associated with this * buf log item. Don't sleep on the buffer lock. If we can't get - * the lock right away, return 0. If we can get the lock, pull the - * buffer from the free list, mark it busy, and return 1. + * the lock right away, return 0. If we can get the lock, take a + * reference to the buffer. If this is a delayed write buffer that + * needs AIL help to be written back, invoke the pushbuf routine + * rather than the normal success path. */ STATIC uint xfs_buf_item_trylock( @@ -478,24 +479,18 @@ xfs_buf_item_trylock( xfs_buf_t *bp; bp = bip->bli_buf; - - if (XFS_BUF_ISPINNED(bp)) { + if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - } - - if (!XFS_BUF_CPSEMA(bp)) { + if (!XFS_BUF_CPSEMA(bp)) return XFS_ITEM_LOCKED; - } - /* - * Remove the buffer from the free list. Only do this - * if it's on the free list. Private buffers like the - * superblock buffer are not. - */ + /* take a reference to the buffer. */ XFS_BUF_HOLD(bp); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - xfs_buf_item_trace("TRYLOCK SUCCESS", bip); + trace_xfs_buf_item_trylock(bip); + if (XFS_BUF_ISDELAYWRITE(bp)) + return XFS_ITEM_PUSHBUF; return XFS_ITEM_SUCCESS; } @@ -524,7 +519,6 @@ xfs_buf_item_unlock( uint hold; bp = bip->bli_buf; - xfs_buftrace("XFS_UNLOCK", bp); /* * Clear the buffer's association with this transaction. @@ -547,7 +541,7 @@ xfs_buf_item_unlock( */ if (bip->bli_flags & XFS_BLI_STALE) { bip->bli_flags &= ~XFS_BLI_LOGGED; - xfs_buf_item_trace("UNLOCK STALE", bip); + trace_xfs_buf_item_unlock_stale(bip); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); if (!aborted) return; @@ -574,7 +568,7 @@ xfs_buf_item_unlock( * release the buffer at the end of this routine. */ hold = bip->bli_flags & XFS_BLI_HOLD; - xfs_buf_item_trace("UNLOCK", bip); + trace_xfs_buf_item_unlock(bip); /* * If the buf item isn't tracking any data, free it. @@ -618,7 +612,8 @@ xfs_buf_item_committed( xfs_buf_log_item_t *bip, xfs_lsn_t lsn) { - xfs_buf_item_trace("COMMITTED", bip); + trace_xfs_buf_item_committed(bip); + if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && (bip->bli_item.li_lsn != 0)) { return bip->bli_item.li_lsn; @@ -627,11 +622,9 @@ xfs_buf_item_committed( } /* - * This is called to asynchronously write the buffer associated with this - * buf log item out to disk. The buffer will already have been locked by - * a successful call to xfs_buf_item_trylock(). If the buffer still has - * B_DELWRI set, then get it going out to disk with a call to bawrite(). - * If not, then just release the buffer. + * The buffer is locked, but is not a delayed write buffer. This happens + * if we race with IO completion and hence we don't want to try to write it + * again. Just release the buffer. */ STATIC void xfs_buf_item_push( @@ -640,20 +633,32 @@ xfs_buf_item_push( xfs_buf_t *bp; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - xfs_buf_item_trace("PUSH", bip); + trace_xfs_buf_item_push(bip); bp = bip->bli_buf; + ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_relse(bp); +} - if (XFS_BUF_ISDELAYWRITE(bp)) { - int error; - error = xfs_bawrite(bip->bli_item.li_mountp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, - "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", - error, bip, bp); - } else { - xfs_buf_relse(bp); - } +/* + * The buffer is locked and is a delayed write buffer. Promote the buffer + * in the delayed write queue as the caller knows that they must invoke + * the xfsbufd to get this buffer written. We have to unlock the buffer + * to allow the xfsbufd to write it, too. + */ +STATIC void +xfs_buf_item_pushbuf( + xfs_buf_log_item_t *bip) +{ + xfs_buf_t *bp; + + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + trace_xfs_buf_item_pushbuf(bip); + + bp = bip->bli_buf; + ASSERT(XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); } /* ARGSUSED */ @@ -678,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, - .iop_pushbuf = NULL, + .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committing }; @@ -738,9 +743,6 @@ xfs_buf_item_init( bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); bip->bli_format.blf_map_size = map_size; -#ifdef XFS_BLI_TRACE - bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); -#endif #ifdef XFS_TRANS_DEBUG /* @@ -878,9 +880,6 @@ xfs_buf_item_free( kmem_free(bip->bli_logged); #endif /* XFS_TRANS_DEBUG */ -#ifdef XFS_BLI_TRACE - ktrace_free(bip->bli_trace); -#endif kmem_zone_free(xfs_buf_item_zone, bip); } @@ -897,7 +896,8 @@ xfs_buf_item_relse( { xfs_buf_log_item_t *bip; - xfs_buftrace("XFS_RELSE", bp); + trace_xfs_buf_item_relse(bp, _RET_IP_); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && @@ -994,7 +994,7 @@ xfs_buf_iodone_callbacks( if (XFS_FORCED_SHUTDOWN(mp)) { ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); XFS_BUF_SUPER_STALE(bp); - xfs_buftrace("BUF_IODONE_CB", bp); + trace_xfs_buf_item_iodone(bp, _RET_IP_); xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); @@ -1030,7 +1030,7 @@ xfs_buf_iodone_callbacks( XFS_BUF_SET_START(bp); } ASSERT(XFS_BUF_IODONE_FUNC(bp)); - xfs_buftrace("BUF_IODONE ASYNC", bp); + trace_xfs_buf_item_iodone_async(bp, _RET_IP_); xfs_buf_relse(bp); } else { /* @@ -1053,9 +1053,7 @@ xfs_buf_iodone_callbacks( } return; } -#ifdef XFSERRORDEBUG - xfs_buftrace("XFS BUFCB NOERR", bp); -#endif + xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); @@ -1081,7 +1079,9 @@ xfs_buf_error_relse( XFS_BUF_DONE(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_ERROR(bp,0); - xfs_buftrace("BUF_ERROR_RELSE", bp); + + trace_xfs_buf_error_relse(bp, _RET_IP_); + if (! XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); /* @@ -1128,34 +1128,3 @@ xfs_buf_iodone( xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_free(bip); } - -#if defined(XFS_BLI_TRACE) -void -xfs_buf_item_trace( - char *id, - xfs_buf_log_item_t *bip) -{ - xfs_buf_t *bp; - ASSERT(bip->bli_trace != NULL); - - bp = bip->bli_buf; - ktrace_enter(bip->bli_trace, - (void *)id, - (void *)bip->bli_buf, - (void *)((unsigned long)bip->bli_flags), - (void *)((unsigned long)bip->bli_recur), - (void *)((unsigned long)atomic_read(&bip->bli_refcount)), - (void *)((unsigned long) - (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)), - (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))), - (void *)((unsigned long)XFS_BUF_COUNT(bp)), - (void *)((unsigned long)XFS_BUF_BFLAGS(bp)), - XFS_BUF_FSPRIVATE(bp, void *), - XFS_BUF_FSPRIVATE2(bp, void *), - (void *)(unsigned long)XFS_BUF_ISPINNED(bp), - (void *)XFS_BUF_IODONE_FUNC(bp), - (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))), - (void *)bip->bli_item.li_desc, - (void *)((unsigned long)bip->bli_item.li_flags)); -} -#endif /* XFS_BLI_TRACE */ diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 5a41c348bb1..217f34af00c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -70,22 +70,21 @@ typedef struct xfs_buf_log_format_t { #define XFS_BLI_INODE_ALLOC_BUF 0x10 #define XFS_BLI_STALE_INODE 0x20 +#define XFS_BLI_FLAGS \ + { XFS_BLI_HOLD, "HOLD" }, \ + { XFS_BLI_DIRTY, "DIRTY" }, \ + { XFS_BLI_STALE, "STALE" }, \ + { XFS_BLI_LOGGED, "LOGGED" }, \ + { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ + { XFS_BLI_STALE_INODE, "STALE_INODE" } + #ifdef __KERNEL__ struct xfs_buf; -struct ktrace; struct xfs_mount; struct xfs_buf_log_item; -#if defined(XFS_BLI_TRACE) -#define XFS_BLI_TRACE_SIZE 32 - -void xfs_buf_item_trace(char *, struct xfs_buf_log_item *); -#else -#define xfs_buf_item_trace(id, bip) -#endif - /* * This is the in core log item structure used to track information * needed to log buffers. It tracks how many times the lock has been @@ -97,9 +96,6 @@ typedef struct xfs_buf_log_item { unsigned int bli_flags; /* misc flags */ unsigned int bli_recur; /* lock recursion count */ atomic_t bli_refcount; /* cnt of tp refs */ -#ifdef XFS_BLI_TRACE - struct ktrace *bli_trace; /* event trace buf */ -#endif #ifdef XFS_TRANS_DEBUG char *bli_orig; /* original buffer copy */ char *bli_logged; /* bytes logged (bitmap) */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 2847bbc1c53..0ca556b4bf3 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -46,6 +46,7 @@ #include "xfs_dir2_block.h" #include "xfs_dir2_node.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * xfs_da_btree.c @@ -1533,8 +1534,8 @@ xfs_da_hashname(const __uint8_t *name, int namelen) enum xfs_dacmp xfs_da_compname( struct xfs_da_args *args, - const char *name, - int len) + const unsigned char *name, + int len) { return (args->namelen == len && memcmp(args->name, name, len) == 0) ? XFS_CMP_EXACT : XFS_CMP_DIFFERENT; @@ -2107,7 +2108,7 @@ xfs_da_do_buf( (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { - xfs_buftrace("DA READ ERROR", rbp->bps[0]); + trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", XFS_ERRLEVEL_LOW, mp, info); error = XFS_ERROR(EFSCORRUPTED); diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 8c536167bf7..fe9f5a8c1d2 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -125,6 +125,13 @@ typedef struct xfs_da_args { #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ +#define XFS_DA_OP_FLAGS \ + { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ + { XFS_DA_OP_RENAME, "RENAME" }, \ + { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ + { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ + { XFS_DA_OP_CILOOKUP, "CILOOKUP" } + /* * Structure to describe buffer(s) for a block. * This is needed in the directory version 2 format case, when @@ -202,7 +209,8 @@ typedef struct xfs_da_state { */ struct xfs_nameops { xfs_dahash_t (*hashname)(struct xfs_name *); - enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); + enum xfs_dacmp (*compname)(struct xfs_da_args *, + const unsigned char *, int); }; @@ -253,7 +261,7 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, uint xfs_da_hashname(const __uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, - const char *name, int len); + const unsigned char *name, int len); xfs_da_state_t *xfs_da_state_alloc(void); diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index ab89a7e94a0..cd27c9d6c71 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -43,16 +43,23 @@ #include "xfs_error.h" #include "xfs_rw.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" + + +static int xfs_swap_extents( + xfs_inode_t *ip, /* target inode */ + xfs_inode_t *tip, /* tmp inode */ + xfs_swapext_t *sxp); /* - * Syssgi interface for swapext + * ioctl interface for swapext */ int xfs_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; - struct file *file, *target_file; + struct file *file, *tmp_file; int error = 0; /* Pull information for the target fd */ @@ -67,56 +74,128 @@ xfs_swapext( goto out_put_file; } - target_file = fget((int)sxp->sx_fdtmp); - if (!target_file) { + tmp_file = fget((int)sxp->sx_fdtmp); + if (!tmp_file) { error = XFS_ERROR(EINVAL); goto out_put_file; } - if (!(target_file->f_mode & FMODE_WRITE) || - (target_file->f_flags & O_APPEND)) { + if (!(tmp_file->f_mode & FMODE_WRITE) || + (tmp_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto out_put_target_file; + goto out_put_tmp_file; } if (IS_SWAPFILE(file->f_path.dentry->d_inode) || - IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { + IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } ip = XFS_I(file->f_path.dentry->d_inode); - tip = XFS_I(target_file->f_path.dentry->d_inode); + tip = XFS_I(tmp_file->f_path.dentry->d_inode); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = XFS_ERROR(EIO); - goto out_put_target_file; + goto out_put_tmp_file; } error = xfs_swap_extents(ip, tip, sxp); - out_put_target_file: - fput(target_file); + out_put_tmp_file: + fput(tmp_file); out_put_file: fput(file); out: return error; } -int +/* + * We need to check that the format of the data fork in the temporary inode is + * valid for the target inode before doing the swap. This is not a problem with + * attr1 because of the fixed fork offset, but attr2 has a dynamically sized + * data fork depending on the space the attribute fork is taking so we can get + * invalid formats on the target inode. + * + * E.g. target has space for 7 extents in extent format, temp inode only has + * space for 6. If we defragment down to 7 extents, then the tmp format is a + * btree, but when swapped it needs to be in extent format. Hence we can't just + * blindly swap data forks on attr2 filesystems. + * + * Note that we check the swap in both directions so that we don't end up with + * a corrupt temporary inode, either. + * + * Note that fixing the way xfs_fsr sets up the attribute fork in the source + * inode will prevent this situation from occurring, so all we do here is + * reject and log the attempt. basically we are putting the responsibility on + * userspace to get this right. + */ +static int +xfs_swap_extents_check_format( + xfs_inode_t *ip, /* target inode */ + xfs_inode_t *tip) /* tmp inode */ +{ + + /* Should never get a local format */ + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || + tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) + return EINVAL; + + /* + * if the target inode has less extents that then temporary inode then + * why did userspace call us? + */ + if (ip->i_d.di_nextents < tip->i_d.di_nextents) + return EINVAL; + + /* + * if the target inode is in extent form and the temp inode is in btree + * form then we will end up with the target inode in the wrong format + * as we already know there are less extents in the temp inode. + */ + if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + tip->i_d.di_format == XFS_DINODE_FMT_BTREE) + return EINVAL; + + /* Check temp in extent form to max in target */ + if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) + return EINVAL; + + /* Check target in extent form to max in temp */ + if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) + return EINVAL; + + /* Check root block of temp in btree form to max in target */ + if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && + XFS_IFORK_BOFF(ip) && + tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) + return EINVAL; + + /* Check root block of target in btree form to max in temp */ + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && + XFS_IFORK_BOFF(tip) && + ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) + return EINVAL; + + return 0; +} + +static int xfs_swap_extents( - xfs_inode_t *ip, - xfs_inode_t *tip, + xfs_inode_t *ip, /* target inode */ + xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp; @@ -160,15 +239,7 @@ xfs_swap_extents( goto out_unlock; } - /* Should never get a local format */ - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || - tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - error = XFS_ERROR(EINVAL); - goto out_unlock; - } - if (VN_CACHED(VFS_I(tip)) != 0) { - xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) @@ -189,13 +260,15 @@ xfs_swap_extents( goto out_unlock; } - /* - * If the target has extended attributes, the tmp file - * must also in order to ensure the correct data fork - * format. - */ - if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { - error = XFS_ERROR(EINVAL); + trace_xfs_swap_extent_before(ip, 0); + trace_xfs_swap_extent_before(tip, 1); + + /* check inode formats now that data is flushed */ + error = xfs_swap_extents_check_format(ip, tip); + if (error) { + xfs_fs_cmn_err(CE_NOTE, mp, + "%s: inode 0x%llx format is incompatible for exchanging.", + __FILE__, ip->i_ino); goto out_unlock; } @@ -276,6 +349,16 @@ xfs_swap_extents( *tifp = *tempifp; /* struct copy */ /* + * Fix the in-memory data fork values that are dependent on the fork + * offset in the inode. We can't assume they remain the same as attr2 + * has dynamic fork offsets. + */ + ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / + (uint)sizeof(xfs_bmbt_rec_t); + tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / + (uint)sizeof(xfs_bmbt_rec_t); + + /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; @@ -347,6 +430,8 @@ xfs_swap_extents( error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); + trace_xfs_swap_extent_after(ip, 0); + trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h index 4f55a630655..20bdd935c12 100644 --- a/fs/xfs/xfs_dfrag.h +++ b/fs/xfs/xfs_dfrag.h @@ -48,9 +48,6 @@ typedef struct xfs_swapext */ int xfs_swapext(struct xfs_swapext *sx); -int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, - struct xfs_swapext *sxp); - #endif /* __KERNEL__ */ #endif /* __XFS_DFRAG_H__ */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index bb1d58eb398..42520f04126 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -40,11 +40,11 @@ #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" #include "xfs_dir2_node.h" -#include "xfs_dir2_trace.h" #include "xfs_error.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" -struct xfs_name xfs_name_dotdot = {"..", 2}; +struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2}; /* * ASCII case-insensitive (ie. A-Z) support for directories that was @@ -66,8 +66,8 @@ xfs_ascii_ci_hashname( STATIC enum xfs_dacmp xfs_ascii_ci_compname( struct xfs_da_args *args, - const char *name, - int len) + const unsigned char *name, + int len) { enum xfs_dacmp result; int i; @@ -247,7 +247,7 @@ xfs_dir_createname( int xfs_dir_cilookup_result( struct xfs_da_args *args, - const char *name, + const unsigned char *name, int len) { if (args->cmpresult == XFS_CMP_DIFFERENT) @@ -525,7 +525,8 @@ xfs_dir2_grow_inode( xfs_trans_t *tp; xfs_drfsbno_t nblks; - xfs_dir2_trace_args_s("grow_inode", args, space); + trace_xfs_dir2_grow_inode(args, space); + dp = args->dp; tp = args->trans; mp = dp->i_mount; @@ -703,7 +704,8 @@ xfs_dir2_shrink_inode( xfs_mount_t *mp; xfs_trans_t *tp; - xfs_dir2_trace_args_db("shrink_inode", args, db, bp); + trace_xfs_dir2_shrink_inode(args, db); + dp = args->dp; mp = dp->i_mount; tp = args->trans; diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 1d9ef96f33a..74a3b105768 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -100,7 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_dabuf *bp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, - int len); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index ab52e9e1c1e..779a267b0a8 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -36,8 +36,8 @@ #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" -#include "xfs_dir2_trace.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * Local function prototypes. @@ -57,8 +57,8 @@ static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; void xfs_dir_startup(void) { - xfs_dir_hash_dot = xfs_da_hashname(".", 1); - xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); + xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1); + xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } /* @@ -94,7 +94,8 @@ xfs_dir2_block_addname( __be16 *tagp; /* pointer to tag value */ xfs_trans_t *tp; /* transaction structure */ - xfs_dir2_trace_args("block_addname", args); + trace_xfs_dir2_block_addname(args); + dp = args->dp; tp = args->trans; mp = dp->i_mount; @@ -512,8 +513,9 @@ xfs_dir2_block_getdents( /* * If it didn't fit, set the final offset to here & return. */ - if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, - be64_to_cpu(dep->inumber), DT_UNKNOWN)) { + if (filldir(dirent, (char *)dep->name, dep->namelen, + cook & 0x7fffffff, be64_to_cpu(dep->inumber), + DT_UNKNOWN)) { *offset = cook & 0x7fffffff; xfs_da_brelse(NULL, bp); return 0; @@ -590,7 +592,8 @@ xfs_dir2_block_lookup( int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_trace_args("block_lookup", args); + trace_xfs_dir2_block_lookup(args); + /* * Get the buffer, look up the entry. * If not found (ENOENT) then return, have no buffer. @@ -747,7 +750,8 @@ xfs_dir2_block_removename( int size; /* shortform size */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args("block_removename", args); + trace_xfs_dir2_block_removename(args); + /* * Look up the entry in the block. Gets the buffer and entry index. * It will always be there, the vnodeops level does a lookup first. @@ -823,7 +827,8 @@ xfs_dir2_block_replace( int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ - xfs_dir2_trace_args("block_replace", args); + trace_xfs_dir2_block_replace(args); + /* * Lookup the entry in the directory. Get buffer and entry index. * This will always succeed since the caller has already done a lookup. @@ -897,7 +902,8 @@ xfs_dir2_leaf_to_block( int to; /* block/leaf to index */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp); + trace_xfs_dir2_leaf_to_block(args); + dp = args->dp; tp = args->trans; mp = dp->i_mount; @@ -1044,7 +1050,8 @@ xfs_dir2_sf_to_block( xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; - xfs_dir2_trace_args("sf_to_block", args); + trace_xfs_dir2_sf_to_block(args); + dp = args->dp; tp = args->trans; mp = dp->i_mount; diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 41ad537c49e..e2d89854ec9 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -38,8 +38,8 @@ #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" #include "xfs_dir2_node.h" -#include "xfs_dir2_trace.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * Local function declarations. @@ -80,7 +80,8 @@ xfs_dir2_block_to_leaf( int needscan; /* need to rescan bestfree */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_b("block_to_leaf", args, dbp); + trace_xfs_dir2_block_to_leaf(args); + dp = args->dp; mp = dp->i_mount; tp = args->trans; @@ -188,7 +189,8 @@ xfs_dir2_leaf_addname( xfs_trans_t *tp; /* transaction pointer */ xfs_dir2_db_t use_block; /* data block number */ - xfs_dir2_trace_args("leaf_addname", args); + trace_xfs_dir2_leaf_addname(args); + dp = args->dp; tp = args->trans; mp = dp->i_mount; @@ -1079,7 +1081,7 @@ xfs_dir2_leaf_getdents( dep = (xfs_dir2_data_entry_t *)ptr; length = xfs_dir2_data_entsize(dep->namelen); - if (filldir(dirent, dep->name, dep->namelen, + if (filldir(dirent, (char *)dep->name, dep->namelen, xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, be64_to_cpu(dep->inumber), DT_UNKNOWN)) break; @@ -1266,7 +1268,8 @@ xfs_dir2_leaf_lookup( xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args("leaf_lookup", args); + trace_xfs_dir2_leaf_lookup(args); + /* * Look up name in the leaf block, returning both buffers and index. */ @@ -1454,7 +1457,8 @@ xfs_dir2_leaf_removename( xfs_dir2_data_off_t oldbest; /* old value of best free */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args("leaf_removename", args); + trace_xfs_dir2_leaf_removename(args); + /* * Lookup the leaf entry, get the leaf and data blocks read in. */ @@ -1586,7 +1590,8 @@ xfs_dir2_leaf_replace( xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args("leaf_replace", args); + trace_xfs_dir2_leaf_replace(args); + /* * Look up the entry. */ @@ -1766,7 +1771,9 @@ xfs_dir2_node_to_leaf( if (state->path.active > 1) return 0; args = state->args; - xfs_dir2_trace_args("node_to_leaf", args); + + trace_xfs_dir2_node_to_leaf(args); + mp = state->mp; dp = args->dp; tp = args->trans; diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 5a81ccd1045..78fc4d9ae75 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -37,8 +37,8 @@ #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" #include "xfs_dir2_node.h" -#include "xfs_dir2_trace.h" #include "xfs_error.h" +#include "xfs_trace.h" /* * Function declarations. @@ -65,7 +65,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, /* * Log entries from a freespace block. */ -void +STATIC void xfs_dir2_free_log_bests( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp, /* freespace buffer */ @@ -123,7 +123,8 @@ xfs_dir2_leaf_to_node( __be16 *to; /* pointer to freespace entry */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_b("leaf_to_node", args, lbp); + trace_xfs_dir2_leaf_to_node(args); + dp = args->dp; mp = dp->i_mount; tp = args->trans; @@ -196,7 +197,8 @@ xfs_dir2_leafn_add( xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_sb("leafn_add", args, index, bp); + trace_xfs_dir2_leafn_add(args, index); + dp = args->dp; mp = dp->i_mount; tp = args->trans; @@ -711,8 +713,8 @@ xfs_dir2_leafn_moveents( int stale; /* count stale leaves copied */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d, - start_d, count); + trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); + /* * Silently return if nothing to do. */ @@ -933,7 +935,8 @@ xfs_dir2_leafn_remove( int needscan; /* need to rescan data frees */ xfs_trans_t *tp; /* transaction pointer */ - xfs_dir2_trace_args_sb("leafn_remove", args, index, bp); + trace_xfs_dir2_leafn_remove(args, index); + dp = args->dp; tp = args->trans; mp = dp->i_mount; @@ -1363,7 +1366,8 @@ xfs_dir2_node_addname( int rval; /* sub-return value */ xfs_da_state_t *state; /* btree cursor */ - xfs_dir2_trace_args("node_addname", args); + trace_xfs_dir2_node_addname(args); + /* * Allocate and initialize the state (btree cursor). */ @@ -1822,7 +1826,8 @@ xfs_dir2_node_lookup( int rval; /* operation return value */ xfs_da_state_t *state; /* btree cursor */ - xfs_dir2_trace_args("node_lookup", args); + trace_xfs_dir2_node_lookup(args); + /* * Allocate and initialize the btree cursor. */ @@ -1875,7 +1880,8 @@ xfs_dir2_node_removename( int rval; /* operation return value */ xfs_da_state_t *state; /* btree cursor */ - xfs_dir2_trace_args("node_removename", args); + trace_xfs_dir2_node_removename(args); + /* * Allocate and initialize the btree cursor. */ @@ -1944,7 +1950,8 @@ xfs_dir2_node_replace( int rval; /* internal return value */ xfs_da_state_t *state; /* btree cursor */ - xfs_dir2_trace_args("node_replace", args); + trace_xfs_dir2_node_replace(args); + /* * Allocate and initialize the btree cursor. */ diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h index dde72db3d69..82dfe714719 100644 --- a/fs/xfs/xfs_dir2_node.h +++ b/fs/xfs/xfs_dir2_node.h @@ -75,8 +75,6 @@ xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); } -extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp); extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index e89734e8464..c1a5945d463 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -37,7 +37,7 @@ #include "xfs_dir2_data.h" #include "xfs_dir2_leaf.h" #include "xfs_dir2_block.h" -#include "xfs_dir2_trace.h" +#include "xfs_trace.h" /* * Prototypes for internal functions. @@ -169,7 +169,8 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_ino_t temp; - xfs_dir2_trace_args_sb("block_to_sf", args, size, bp); + trace_xfs_dir2_block_to_sf(args); + dp = args->dp; mp = dp->i_mount; @@ -281,7 +282,8 @@ xfs_dir2_sf_addname( xfs_dir2_sf_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ - xfs_dir2_trace_args("sf_addname", args); + trace_xfs_dir2_sf_addname(args); + ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); @@ -654,7 +656,8 @@ xfs_dir2_sf_create( xfs_dir2_sf_t *sfp; /* shortform structure */ int size; /* directory size */ - xfs_dir2_trace_args_i("sf_create", args, pino); + trace_xfs_dir2_sf_create(args); + dp = args->dp; ASSERT(dp != NULL); @@ -779,7 +782,7 @@ xfs_dir2_sf_getdents( } ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); - if (filldir(dirent, sfep->name, sfep->namelen, + if (filldir(dirent, (char *)sfep->name, sfep->namelen, off & 0x7fffffff, ino, DT_UNKNOWN)) { *offset = off & 0x7fffffff; return 0; @@ -808,7 +811,8 @@ xfs_dir2_sf_lookup( enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ - xfs_dir2_trace_args("sf_lookup", args); + trace_xfs_dir2_sf_lookup(args); + xfs_dir2_sf_check(args); dp = args->dp; @@ -891,7 +895,8 @@ xfs_dir2_sf_removename( xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_t *sfp; /* shortform structure */ - xfs_dir2_trace_args("sf_removename", args); + trace_xfs_dir2_sf_removename(args); + dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); @@ -982,7 +987,8 @@ xfs_dir2_sf_replace( xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_t *sfp; /* shortform structure */ - xfs_dir2_trace_args("sf_replace", args); + trace_xfs_dir2_sf_replace(args); + dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); @@ -1125,7 +1131,8 @@ xfs_dir2_sf_toino4( xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_t *sfp; /* new sf directory */ - xfs_dir2_trace_args("sf_toino4", args); + trace_xfs_dir2_sf_toino4(args); + dp = args->dp; /* @@ -1202,7 +1209,8 @@ xfs_dir2_sf_toino8( xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_t *sfp; /* new sf directory */ - xfs_dir2_trace_args("sf_toino8", args); + trace_xfs_dir2_sf_toino8(args); + dp = args->dp; /* diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c deleted file mode 100644 index 6cc7c0c681a..00000000000 --- a/fs/xfs/xfs_dir2_trace.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_inum.h" -#include "xfs_dir2.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_dir2_trace.h" - -#ifdef XFS_DIR2_TRACE -ktrace_t *xfs_dir2_trace_buf; - -/* - * Enter something in the trace buffers. - */ -static void -xfs_dir2_trace_enter( - xfs_inode_t *dp, - int type, - char *where, - char *name, - int namelen, - void *a0, - void *a1, - void *a2, - void *a3, - void *a4, - void *a5, - void *a6, - void *a7) -{ - void *n[5]; - - ASSERT(xfs_dir2_trace_buf); - ASSERT(dp->i_dir_trace); - if (name) - memcpy(n, name, min((int)sizeof(n), namelen)); - else - memset((char *)n, 0, sizeof(n)); - ktrace_enter(xfs_dir2_trace_buf, - (void *)(long)type, (void *)where, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)(long)namelen, - (void *)n[0], (void *)n[1], (void *)n[2], - (void *)n[3], (void *)n[4]); - ktrace_enter(dp->i_dir_trace, - (void *)(long)type, (void *)where, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)(long)namelen, - (void *)n[0], (void *)n[1], (void *)n[2], - (void *)n[3], (void *)n[4]); -} - -void -xfs_dir2_trace_args( - char *where, - xfs_da_args_t *args) -{ - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - NULL, NULL); -} - -void -xfs_dir2_trace_args_b( - char *where, - xfs_da_args_t *args, - xfs_dabuf_t *bp) -{ - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_B, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)(bp ? bp->bps[0] : NULL), NULL); -} - -void -xfs_dir2_trace_args_bb( - char *where, - xfs_da_args_t *args, - xfs_dabuf_t *lbp, - xfs_dabuf_t *dbp) -{ - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BB, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)(lbp ? lbp->bps[0] : NULL), - (void *)(dbp ? dbp->bps[0] : NULL)); -} - -void -xfs_dir2_trace_args_bibii( - char *where, - xfs_da_args_t *args, - xfs_dabuf_t *bs, - int ss, - xfs_dabuf_t *bd, - int sd, - int c) -{ - xfs_buf_t *bpbs = bs ? bs->bps[0] : NULL; - xfs_buf_t *bpbd = bd ? bd->bps[0] : NULL; - - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BIBII, where, - (char *)args->name, (int)args->namelen, - (void *)args->dp, (void *)args->trans, - (void *)bpbs, (void *)(long)ss, (void *)bpbd, (void *)(long)sd, - (void *)(long)c, NULL); -} - -void -xfs_dir2_trace_args_db( - char *where, - xfs_da_args_t *args, - xfs_dir2_db_t db, - xfs_dabuf_t *bp) -{ - xfs_buf_t *dbp = bp ? bp->bps[0] : NULL; - - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_DB, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)(long)db, (void *)dbp); -} - -void -xfs_dir2_trace_args_i( - char *where, - xfs_da_args_t *args, - xfs_ino_t i) -{ - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_I, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)((unsigned long)(i >> 32)), - (void *)((unsigned long)(i & 0xFFFFFFFF))); -} - -void -xfs_dir2_trace_args_s( - char *where, - xfs_da_args_t *args, - int s) -{ - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_S, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)(long)s, NULL); -} - -void -xfs_dir2_trace_args_sb( - char *where, - xfs_da_args_t *args, - int s, - xfs_dabuf_t *bp) -{ - xfs_buf_t *dbp = bp ? bp->bps[0] : NULL; - - xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_SB, where, - (char *)args->name, (int)args->namelen, - (void *)(unsigned long)args->hashval, - (void *)((unsigned long)(args->inumber >> 32)), - (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), - (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), - (void *)(long)s, (void *)dbp); -} -#endif /* XFS_DIR2_TRACE */ diff --git a/fs/xfs/xfs_dir2_trace.h b/fs/xfs/xfs_dir2_trace.h deleted file mode 100644 index ca3c754f482..00000000000 --- a/fs/xfs/xfs_dir2_trace.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_TRACE_H__ -#define __XFS_DIR2_TRACE_H__ - -/* - * Tracing for xfs v2 directories. - */ - -#if defined(XFS_DIR2_TRACE) - -struct ktrace; -struct xfs_dabuf; -struct xfs_da_args; - -#define XFS_DIR2_GTRACE_SIZE 4096 /* global buffer */ -#define XFS_DIR2_KTRACE_SIZE 32 /* per-inode buffer */ -extern struct ktrace *xfs_dir2_trace_buf; - -#define XFS_DIR2_KTRACE_ARGS 1 /* args only */ -#define XFS_DIR2_KTRACE_ARGS_B 2 /* args + buffer */ -#define XFS_DIR2_KTRACE_ARGS_BB 3 /* args + 2 buffers */ -#define XFS_DIR2_KTRACE_ARGS_DB 4 /* args, db, buffer */ -#define XFS_DIR2_KTRACE_ARGS_I 5 /* args, inum */ -#define XFS_DIR2_KTRACE_ARGS_S 6 /* args, int */ -#define XFS_DIR2_KTRACE_ARGS_SB 7 /* args, int, buffer */ -#define XFS_DIR2_KTRACE_ARGS_BIBII 8 /* args, buf/int/buf/int/int */ - -void xfs_dir2_trace_args(char *where, struct xfs_da_args *args); -void xfs_dir2_trace_args_b(char *where, struct xfs_da_args *args, - struct xfs_dabuf *bp); -void xfs_dir2_trace_args_bb(char *where, struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); -void xfs_dir2_trace_args_bibii(char *where, struct xfs_da_args *args, - struct xfs_dabuf *bs, int ss, - struct xfs_dabuf *bd, int sd, int c); -void xfs_dir2_trace_args_db(char *where, struct xfs_da_args *args, - xfs_dir2_db_t db, struct xfs_dabuf *bp); -void xfs_dir2_trace_args_i(char *where, struct xfs_da_args *args, xfs_ino_t i); -void xfs_dir2_trace_args_s(char *where, struct xfs_da_args *args, int s); -void xfs_dir2_trace_args_sb(char *where, struct xfs_da_args *args, int s, - struct xfs_dabuf *bp); - -#else /* XFS_DIR2_TRACE */ - -#define xfs_dir2_trace_args(where, args) -#define xfs_dir2_trace_args_b(where, args, bp) -#define xfs_dir2_trace_args_bb(where, args, lbp, dbp) -#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c) -#define xfs_dir2_trace_args_db(where, args, db, bp) -#define xfs_dir2_trace_args_i(where, args, i) -#define xfs_dir2_trace_args_s(where, args, s) -#define xfs_dir2_trace_args_sb(where, args, s, bp) - -#endif /* XFS_DIR2_TRACE */ - -#endif /* __XFS_DIR2_TRACE_H__ */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 05a4bdd4be3..6f35ed1b39b 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -82,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); log_vector->i_len = size; - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); + log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; ASSERT(size >= sizeof(xfs_efi_log_format_t)); } @@ -406,7 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); log_vector->i_len = size; - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); + log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; ASSERT(size >= sizeof(xfs_efd_log_format_t)); } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index edf8bdf4141..390850ee660 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -34,6 +34,7 @@ #include "xfs_utils.h" #include "xfs_mru_cache.h" #include "xfs_filestream.h" +#include "xfs_trace.h" #ifdef XFS_FILESTREAMS_TRACE @@ -139,6 +140,7 @@ _xfs_filestream_pick_ag( int flags, xfs_extlen_t minlen) { + int streams, max_streams; int err, trylock, nscan; xfs_extlen_t longest, free, minfree, maxfree = 0; xfs_agnumber_t ag, max_ag = NULLAGNUMBER; @@ -154,15 +156,15 @@ _xfs_filestream_pick_ag( trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { - - TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); - - pag = mp->m_perag + ag; + pag = xfs_perag_get(mp, ag); + TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); - if (err && !trylock) + if (err && !trylock) { + xfs_perag_put(pag); return err; + } } /* Might fail sometimes during the 1st pass with trylock set. */ @@ -172,6 +174,7 @@ _xfs_filestream_pick_ag( /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; + max_streams = atomic_read(&pag->pagf_fstrms); max_ag = ag; } @@ -194,6 +197,8 @@ _xfs_filestream_pick_ag( /* Break out, retaining the reference on the AG. */ free = pag->pagf_freeblks; + streams = atomic_read(&pag->pagf_fstrms); + xfs_perag_put(pag); *agp = ag; break; } @@ -201,6 +206,7 @@ _xfs_filestream_pick_ag( /* Drop the reference on this AG, it's not usable. */ xfs_filestream_put_ag(mp, ag); next_ag: + xfs_perag_put(pag); /* Move to the next AG, wrapping to AG 0 if necessary. */ if (++ag >= mp->m_sb.sb_agcount) ag = 0; @@ -228,6 +234,7 @@ next_ag: if (max_ag != NULLAGNUMBER) { xfs_filestream_get_ag(mp, max_ag); TRACE_AG_PICK1(mp, max_ag, maxfree); + streams = max_streams; free = maxfree; *agp = max_ag; break; @@ -239,16 +246,14 @@ next_ag: return 0; } - TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), - free, nscan, flags); + TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); return 0; } /* * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. Must be called with the - * m_peraglock held in read mode. + * references and per-AG references as appropriate. */ static int _xfs_filestream_update_ag( @@ -394,9 +399,7 @@ xfs_filestream_init(void) item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); if (!item_zone) return -ENOMEM; -#ifdef XFS_FILESTREAMS_TRACE - xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS); -#endif + return 0; } @@ -407,9 +410,6 @@ xfs_filestream_init(void) void xfs_filestream_uninit(void) { -#ifdef XFS_FILESTREAMS_TRACE - ktrace_free(xfs_filestreams_trace_buf); -#endif kmem_zone_destroy(item_zone); } @@ -455,20 +455,6 @@ xfs_filestream_unmount( } /* - * If the mount point's m_perag array is going to be reallocated, all - * outstanding cache entries must be flushed to avoid accessing reference count - * addresses that have been freed. The call to xfs_filestream_flush() must be - * made inside the block that holds the m_peraglock in write mode to do the - * reallocation. - */ -void -xfs_filestream_flush( - xfs_mount_t *mp) -{ - xfs_mru_cache_flush(mp->m_filestream); -} - -/* * Return the AG of the filestream the file or directory belongs to, or * NULLAGNUMBER otherwise. */ @@ -530,7 +516,6 @@ xfs_filestream_associate( mp = pip->i_mount; cache = mp->m_filestream; - down_read(&mp->m_peraglock); /* * We have a problem, Houston. @@ -547,10 +532,8 @@ xfs_filestream_associate( * * So, if we can't get the iolock without sleeping then just give up */ - if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { - up_read(&mp->m_peraglock); + if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) return 1; - } /* If the parent directory is already in the cache, use its AG. */ item = xfs_mru_cache_lookup(cache, pip->i_ino); @@ -605,7 +588,6 @@ exit_did_pick: exit: xfs_iunlock(pip, XFS_IOLOCK_EXCL); - up_read(&mp->m_peraglock); return -err; } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index f655f7dc334..260f757bbc5 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -79,28 +79,49 @@ extern ktrace_t *xfs_filestreams_trace_buf; * the cache that reference per-ag array elements that have since been * reallocated. */ -STATIC_INLINE int +/* + * xfs_filestream_peek_ag is only used in tracing code + */ +static inline int xfs_filestream_peek_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_read(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_read(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } -STATIC_INLINE int +static inline int xfs_filestream_get_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_inc_return(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } -STATIC_INLINE int +static inline int xfs_filestream_put_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_dec_return(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } /* allocation selection flags */ @@ -114,7 +135,6 @@ int xfs_filestream_init(void); void xfs_filestream_uninit(void); int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); -void xfs_filestream_flush(struct xfs_mount *mp); xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip); @@ -122,7 +142,7 @@ int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); /* filestreams for the inode? */ -STATIC_INLINE int +static inline int xfs_inode_is_filestream( struct xfs_inode *ip) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 2d0b3e1da9e..37a6f62c57b 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -45,6 +45,7 @@ #include "xfs_rtalloc.h" #include "xfs_rw.h" #include "xfs_filestream.h" +#include "xfs_trace.h" /* * File system operations @@ -166,27 +167,14 @@ xfs_growfs_data_private( } new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; - if (nagcount > oagcount) { - void *new_perag, *old_perag; - - xfs_filestream_flush(mp); - - new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount, - KM_MAYFAIL); - if (!new_perag) - return XFS_ERROR(ENOMEM); - - down_write(&mp->m_peraglock); - memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); - old_perag = mp->m_perag; - mp->m_perag = new_perag; - - mp->m_flags |= XFS_MOUNT_32BITINODES; - nagimax = xfs_initialize_perag(mp, nagcount); - up_write(&mp->m_peraglock); - kmem_free(old_perag); + /* allocate the new per-ag structures */ + if (nagcount > oagcount) { + error = xfs_initialize_perag(mp, nagcount, &nagimax); + if (error) + return error; } + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); tp->t_flags |= XFS_TRANS_RESERVE; if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), @@ -195,14 +183,19 @@ xfs_growfs_data_private( return error; } + /* + * Write new AG headers to disk. Non-transactional, but written + * synchronously so they are completed prior to the growfs transaction + * being logged. + */ nfree = 0; for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { /* * AG freelist header block */ bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); agf = XFS_BUF_TO_AGF(bp); memset(agf, 0, mp->m_sb.sb_sectsize); agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); @@ -233,8 +226,8 @@ xfs_growfs_data_private( * AG inode header block */ bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); agi = XFS_BUF_TO_AGI(bp); memset(agi, 0, mp->m_sb.sb_sectsize); agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); @@ -257,8 +250,9 @@ xfs_growfs_data_private( * BNO btree root block */ bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), - BTOBB(mp->m_sb.sb_blocksize), 0); + XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), + XBF_LOCK | XBF_MAPPED); block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); @@ -278,8 +272,9 @@ xfs_growfs_data_private( * CNT btree root block */ bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), - BTOBB(mp->m_sb.sb_blocksize), 0); + XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), + XBF_LOCK | XBF_MAPPED); block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); @@ -300,8 +295,9 @@ xfs_growfs_data_private( * INO btree root block */ bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), - BTOBB(mp->m_sb.sb_blocksize), 0); + XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), + XBF_LOCK | XBF_MAPPED); block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); @@ -344,6 +340,7 @@ xfs_growfs_data_private( be32_add_cpu(&agf->agf_length, new); ASSERT(be32_to_cpu(agf->agf_length) == be32_to_cpu(agi->agi_length)); + xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); /* * Free the new space. @@ -354,6 +351,12 @@ xfs_growfs_data_private( goto error0; } } + + /* + * Update changed superblock fields transactionally. These are not + * seen by the rest of the world until the transaction commit applies + * them atomically to the superblock. + */ if (nagcount > oagcount) xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); if (nb > mp->m_sb.sb_dblocks) @@ -364,9 +367,9 @@ xfs_growfs_data_private( if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); error = xfs_trans_commit(tp, 0); - if (error) { + if (error) return error; - } + /* New allocation groups fully initialized, so update mount struct */ if (nagimax) mp->m_maxagi = nagimax; @@ -376,6 +379,8 @@ xfs_growfs_data_private( mp->m_maxicount = icount << mp->m_sb.sb_inopblog; } else mp->m_maxicount = 0; + + /* update secondary superblocks. */ for (agno = 1; agno < nagcount; agno++) { error = xfs_read_buf(mp, mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), @@ -611,7 +616,7 @@ xfs_fs_log_dummy( xfs_inode_t *ip; int error; - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0785797db82..9d884c127bb 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -205,7 +205,7 @@ xfs_ialloc_inode_init( d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); @@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc( xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ + struct xfs_perag *pag; args.tp = tp; args.mp = tp->t_mountp; @@ -382,9 +383,9 @@ xfs_ialloc_ag_alloc( newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - down_read(&args.mp->m_peraglock); - args.mp->m_perag[agno].pagi_freecount += newlen; - up_read(&args.mp->m_peraglock); + pag = xfs_perag_get(args.mp, agno); + pag->pagi_freecount += newlen; + xfs_perag_put(pag); agi->agi_newino = cpu_to_be32(newino); /* @@ -425,7 +426,7 @@ xfs_ialloc_ag_alloc( return 0; } -STATIC_INLINE xfs_agnumber_t +STATIC xfs_agnumber_t xfs_ialloc_next_ag( xfs_mount_t *mp) { @@ -486,9 +487,8 @@ xfs_ialloc_ag_select( */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; - down_read(&mp->m_peraglock); for (;;) { - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; @@ -527,7 +527,7 @@ xfs_ialloc_ag_select( agbp = NULL; goto nextag; } - up_read(&mp->m_peraglock); + xfs_perag_put(pag); return agbp; } } @@ -535,22 +535,19 @@ unlock_nextag: if (agbp) xfs_trans_brelse(tp, agbp); nextag: + xfs_perag_put(pag); /* * No point in iterating over the rest, if we're shutting * down. */ - if (XFS_FORCED_SHUTDOWN(mp)) { - up_read(&mp->m_peraglock); + if (XFS_FORCED_SHUTDOWN(mp)) return NULL; - } agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { - if (flags == 0) { - up_read(&mp->m_peraglock); + if (flags == 0) return NULL; - } flags = 0; } } @@ -672,6 +669,7 @@ xfs_dialloc( xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ + struct xfs_perag *pag; if (*IO_agbp == NULL) { @@ -771,13 +769,13 @@ nextag: *inop = NULLFSINO; return noroom ? ENOSPC : 0; } - down_read(&mp->m_peraglock); - if (mp->m_perag[tagno].pagi_inodeok == 0) { - up_read(&mp->m_peraglock); + pag = xfs_perag_get(mp, tagno); + if (pag->pagi_inodeok == 0) { + xfs_perag_put(pag); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); - up_read(&mp->m_peraglock); + xfs_perag_put(pag); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); @@ -790,6 +788,7 @@ nextag: */ agno = tagno; *IO_agbp = NULL; + pag = xfs_perag_get(mp, agno); restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); @@ -808,7 +807,6 @@ nextag: * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { - xfs_perag_t *pag = &mp->m_perag[agno]; int doneleft; /* done, to the left */ int doneright; /* done, to the right */ int searchdistance = 10; @@ -1006,9 +1004,7 @@ alloc_inode: goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); - mp->m_perag[tagno].pagi_freecount--; - up_read(&mp->m_peraglock); + pag->pagi_freecount--; error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1016,12 +1012,14 @@ alloc_inode: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + xfs_perag_put(pag); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); return error; } @@ -1052,6 +1050,7 @@ xfs_difree( xfs_mount_t *mp; /* mount structure for filesystem */ int off; /* offset of inode in inode chunk */ xfs_inobt_rec_incore_t rec; /* btree record */ + struct xfs_perag *pag; mp = tp->t_mountp; @@ -1088,9 +1087,7 @@ xfs_difree( /* * Get the allocation group header. */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", @@ -1157,9 +1154,9 @@ xfs_difree( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount -= ilen - 1; - up_read(&mp->m_peraglock); + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount -= ilen - 1; + xfs_perag_put(pag); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1188,9 +1185,9 @@ xfs_difree( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount++; - up_read(&mp->m_peraglock); + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount++; + xfs_perag_put(pag); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1312,9 +1309,7 @@ xfs_imap( xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " @@ -1379,7 +1374,6 @@ xfs_imap( XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); return XFS_ERROR(EINVAL); } - return 0; } @@ -1523,8 +1517,7 @@ xfs_ialloc_read_agi( return error; agi = XFS_BUF_TO_AGI(*bpp); - pag = &mp->m_perag[agno]; - + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); @@ -1537,6 +1530,7 @@ xfs_ialloc_read_agi( */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); + xfs_perag_put(pag); return 0; } diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 80e526489be..e281eb4a1c4 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -43,7 +43,7 @@ #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_btree_trace.h" -#include "xfs_dir2_trace.h" +#include "xfs_trace.h" /* @@ -74,6 +74,8 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; @@ -87,30 +89,8 @@ xfs_inode_alloc( ip->i_size = 0; ip->i_new_size = 0; - /* - * Initialize inode's trace buffers. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BTREE_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - /* prevent anyone from using this yet */ - VFS_I(ip)->i_state = I_NEW|I_LOCK; + VFS_I(ip)->i_state = I_NEW; return ip; } @@ -130,25 +110,6 @@ xfs_inode_free( if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); -#ifdef XFS_INODE_TRACE - ktrace_free(ip->i_trace); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(ip->i_xtrace); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(ip->i_btrace); -#endif -#ifdef XFS_RW_TRACE - ktrace_free(ip->i_rwtrace); -#endif -#ifdef XFS_ILOCK_TRACE - ktrace_free(ip->i_lock_trace); -#endif -#ifdef XFS_DIR2_TRACE - ktrace_free(ip->i_dir_trace); -#endif - if (ip->i_itemp) { /* * Only if we are shutting down the fs will we see an @@ -207,6 +168,7 @@ xfs_iget_cache_hit( * instead of polling for it. */ if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { + trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); error = EAGAIN; goto out_error; @@ -225,7 +187,7 @@ xfs_iget_cache_hit( * Need to carefully get it back into useable state. */ if (ip->i_flags & XFS_IRECLAIMABLE) { - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + trace_xfs_iget_reclaim(ip); /* * We need to set XFS_INEW atomically with clearing the @@ -251,9 +213,10 @@ xfs_iget_cache_hit( ip->i_flags &= ~XFS_INEW; ip->i_flags |= XFS_IRECLAIMABLE; __xfs_inode_set_reclaim_tag(pag, ip); + trace_xfs_iget_reclaim(ip); goto out_error; } - inode->i_state = I_LOCK|I_NEW; + inode->i_state = I_NEW; } else { /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { @@ -270,8 +233,9 @@ xfs_iget_cache_hit( xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE); - xfs_itrace_exit_tag(ip, "xfs_iget.found"); XFS_STATS_INC(xs_ig_found); + + trace_xfs_iget_found(ip); return 0; out_error: @@ -290,7 +254,7 @@ xfs_iget_cache_miss( struct xfs_inode **ipp, xfs_daddr_t bno, int flags, - int lock_flags) __releases(pag->pag_ici_lock) + int lock_flags) { struct xfs_inode *ip; int error; @@ -305,7 +269,7 @@ xfs_iget_cache_miss( if (error) goto out_destroy; - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + xfs_itrace_entry(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; @@ -350,6 +314,8 @@ xfs_iget_cache_miss( write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + + trace_xfs_iget_alloc(ip); *ipp = ip; return 0; @@ -408,7 +374,7 @@ xfs_iget( return EINVAL; /* get the perag structure and ensure that it's inode capable */ - pag = xfs_get_perag(mp, ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); if (!pag->pagi_inodeok) return EINVAL; ASSERT(pag->pag_ici_init); @@ -432,7 +398,7 @@ again: if (error) goto out_error_or_again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); *ipp = ip; @@ -451,7 +417,7 @@ out_error_or_again: delay(1); goto again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); return error; } @@ -511,19 +477,23 @@ xfs_ireclaim( { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); XFS_STATS_INC(xs_ig_reclaims); /* - * Remove the inode from the per-AG radix tree. It doesn't matter - * if it was never added to it because radix_tree_delete can deal - * with that case just fine. + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. */ - pag = xfs_get_perag(mp, ip->i_ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); write_lock(&pag->pag_ici_lock); - radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); + if (!radix_tree_delete(&pag->pag_ici_root, agino)) + ASSERT(0); write_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); /* * Here we do an (almost) spurious inode lock in order to coordinate @@ -636,7 +606,7 @@ xfs_ilock( else if (lock_flags & XFS_ILOCK_SHARED) mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); - xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); + trace_xfs_ilock(ip, lock_flags, _RET_IP_); } /* @@ -681,7 +651,7 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_lock)) goto out_undo_iolock; } - xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); + trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); return 1; out_undo_iolock: @@ -743,7 +713,7 @@ xfs_iunlock( xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, (xfs_log_item_t*)(ip->i_itemp)); } - xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); + trace_xfs_iunlock(ip, lock_flags, _RET_IP_); } /* @@ -762,6 +732,8 @@ xfs_ilock_demote( mrdemote(&ip->i_lock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); + + trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); } #ifdef DEBUG @@ -792,52 +764,3 @@ xfs_isilocked( return 1; } #endif - -#ifdef XFS_INODE_TRACE - -#define KTRACE_ENTER(ip, vk, s, line, ra) \ - ktrace_enter((ip)->i_trace, \ -/* 0 */ (void *)(__psint_t)(vk), \ -/* 1 */ (void *)(s), \ -/* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ -/* 4 */ (void *)(ra), \ -/* 5 */ NULL, \ -/* 6 */ (void *)(__psint_t)current_cpu(), \ -/* 7 */ (void *)(__psint_t)current_pid(), \ -/* 8 */ (void *)__return_address, \ -/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) - -/* - * Vnode tracing code. - */ -void -_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); -} - -void -_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); -} - -void -xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); -} - -void -_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); -} - -void -xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); -} -#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b92a4fa2a0a..fa31360046d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -47,10 +47,10 @@ #include "xfs_rw.h" #include "xfs_error.h" #include "xfs_utils.h" -#include "xfs_dir2_trace.h" #include "xfs_quota.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -151,7 +151,7 @@ xfs_imap_to_bp( "an error %d on %s. Returning error.", error, mp->m_fsname); } else { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); } return error; } @@ -239,7 +239,7 @@ xfs_inotobp( if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); if (error) return error; @@ -285,7 +285,7 @@ xfs_itobp( return error; if (!bp) { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); ASSERT(tp == NULL); *bpp = NULL; return EAGAIN; @@ -807,7 +807,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, iget_flags); + XBF_LOCK, iget_flags); if (error) return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -1291,42 +1291,6 @@ xfs_file_last_byte( return last_byte; } -#if defined(XFS_RW_TRACE) -STATIC void -xfs_itrunc_trace( - int tag, - xfs_inode_t *ip, - int flag, - xfs_fsize_t new_size, - xfs_off_t toss_start, - xfs_off_t toss_finish) -{ - if (ip->i_rwtrace == NULL) { - return; - } - - ktrace_enter(ip->i_rwtrace, - (void*)((long)tag), - (void*)ip, - (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), - (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), - (void*)((long)flag), - (void*)(unsigned long)((new_size >> 32) & 0xffffffff), - (void*)(unsigned long)(new_size & 0xffffffff), - (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), - (void*)(unsigned long)(toss_start & 0xffffffff), - (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), - (void*)(unsigned long)(toss_finish & 0xffffffff), - (void*)(unsigned long)current_cpu(), - (void*)(unsigned long)current_pid(), - (void*)NULL, - (void*)NULL, - (void*)NULL); -} -#else -#define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) -#endif - /* * Start the truncation of the file to new_size. The new size * must be smaller than the current size. This routine will @@ -1409,8 +1373,7 @@ xfs_itruncate_start( return 0; } last_byte = xfs_file_last_byte(ip); - xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, - last_byte); + trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); if (last_byte > toss_start) { if (flags & XFS_ITRUNC_DEFINITE) { xfs_tosspages(ip, toss_start, @@ -1514,7 +1477,8 @@ xfs_itruncate_finish( new_size = 0LL; } first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); + trace_xfs_itruncate_finish_start(ip, new_size); + /* * The first thing we do is set the size to new_size permanently * on disk. This way we don't have to worry about anyone ever @@ -1731,7 +1695,7 @@ xfs_itruncate_finish( ASSERT((new_size != 0) || (fork == XFS_ATTR_FORK) || (ip->i_d.di_nextents == 0)); - xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); + trace_xfs_itruncate_finish_end(ip, new_size); return 0; } @@ -1787,7 +1751,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -1869,7 +1833,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -1931,7 +1895,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -1982,8 +1946,9 @@ xfs_ifree_cluster( xfs_inode_t *ip, **ip_found; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; - xfs_perag_t *pag = xfs_get_perag(mp, inum); + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; ninodes = mp->m_sb.sb_inopblock; @@ -2075,7 +2040,7 @@ xfs_ifree_cluster( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); pre_flushed = 0; lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); @@ -2124,7 +2089,7 @@ xfs_ifree_cluster( } kmem_free(ip_found); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } /* @@ -2186,7 +2151,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -2519,13 +2484,16 @@ __xfs_iunpin_wait( return; /* Give the log a push to start the unpinning I/O */ - xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? - iip->ili_last_lsn : 0, XFS_LOG_FORCE); + if (iip && iip->ili_last_lsn) + xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0); + else + xfs_log_force(ip->i_mount, 0); + if (wait) wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } -static inline void +void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -2711,7 +2679,7 @@ xfs_iflush_cluster( xfs_buf_t *bp) { xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_perag *pag; unsigned long first_index, mask; unsigned long inodes_per_cluster; int ilist_size; @@ -2722,6 +2690,7 @@ xfs_iflush_cluster( int bufwasdelwri; int i; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); @@ -2729,7 +2698,7 @@ xfs_iflush_cluster( ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) - return 0; + goto out_put; mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; @@ -2798,6 +2767,8 @@ xfs_iflush_cluster( out_free: read_unlock(&pag->pag_ici_lock); kmem_free(ilist); +out_put: + xfs_perag_put(pag); return 0; @@ -2841,6 +2812,7 @@ cluster_corrupt_out: */ xfs_iflush_abort(iq); kmem_free(ilist); + xfs_perag_put(pag); return XFS_ERROR(EFSCORRUPTED); } @@ -2863,8 +2835,6 @@ xfs_iflush( xfs_dinode_t *dip; xfs_mount_t *mp; int error; - int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); - enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; XFS_STATS_INC(xs_iflush_count); @@ -2877,15 +2847,6 @@ xfs_iflush( mp = ip->i_mount; /* - * If the inode isn't dirty, then just release the inode - * flush lock and do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - - /* * We can't flush the inode until it is unpinned, so wait for it if we * are allowed to block. We know noone new can pin it, because we are * holding the inode lock shared and you need to hold it exclusively to @@ -2896,7 +2857,7 @@ xfs_iflush( * in the same cluster are dirty, they will probably write the inode * out for us if they occur after the log force completes. */ - if (noblock && xfs_ipincount(ip)) { + if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { xfs_iunpin_nowait(ip); xfs_ifunlock(ip); return EAGAIN; @@ -2904,6 +2865,19 @@ xfs_iflush( xfs_iunpin_wait(ip); /* + * For stale inodes we cannot rely on the backing buffer remaining + * stale in cache for the remaining life of the stale inode and so + * xfs_itobp() below may give us a buffer that no longer contains + * inodes below. We have to check this after ensuring the inode is + * unpinned so that it is safe to reclaim the stale inode after the + * flush call. + */ + if (xfs_iflags_test(ip, XFS_ISTALE)) { + xfs_ifunlock(ip); + return 0; + } + + /* * This may have been unpinned because the filesystem is shutting * down forcibly. If that's the case we must not write this inode * to disk, because the log record didn't make it to disk! @@ -2917,60 +2891,10 @@ xfs_iflush( } /* - * Decide how buffer will be flushed out. This is done before - * the call to xfs_iflush_int because this field is zeroed by it. - */ - if (iip != NULL && iip->ili_format.ilf_fields != 0) { - /* - * Flush out the inode buffer according to the directions - * of the caller. In the cases where the caller has given - * us a choice choose the non-delwri case. This is because - * the inode is in the AIL and we need to get it out soon. - */ - switch (flags) { - case XFS_IFLUSH_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - flags = 0; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } else { - switch (flags) { - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_SYNC: - flags = 0; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } - - /* * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); + (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); if (error || !bp) { xfs_ifunlock(ip); return error; @@ -2988,7 +2912,7 @@ xfs_iflush( * get stuck waiting in the write for too long. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); /* * inode clustering: @@ -2998,13 +2922,10 @@ xfs_iflush( if (error) goto cluster_corrupt_out; - if (flags & INT_DELWRI) { - xfs_bdwrite(mp, bp); - } else if (flags & INT_ASYNC) { - error = xfs_bawrite(mp, bp); - } else { + if (flags & SYNC_WAIT) error = xfs_bwrite(mp, bp); - } + else + xfs_bdwrite(mp, bp); return error; corrupt_out: @@ -3039,16 +2960,6 @@ xfs_iflush_int( iip = ip->i_itemp; mp = ip->i_mount; - - /* - * If the inode isn't dirty, then just release the inode - * flush lock and do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3252,23 +3163,6 @@ corrupt_out: return XFS_ERROR(EFSCORRUPTED); } - - -#ifdef XFS_ILOCK_TRACE -void -xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) -{ - ktrace_enter(ip->i_lock_trace, - (void *)ip, - (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ - (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ - (void *)ra, /* caller of ilock */ - (void *)(unsigned long)current_cpu(), - (void *)(unsigned long)current_pid(), - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); -} -#endif - /* * Return a pointer to the extent record at file index idx. */ @@ -3300,13 +3194,17 @@ xfs_iext_get_ext( */ void xfs_iext_insert( - xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* starting index of new items */ xfs_extnum_t count, /* number of inserted items */ - xfs_bmbt_irec_t *new) /* items to insert */ + xfs_bmbt_irec_t *new, /* items to insert */ + int state) /* type of extent conversion */ { + xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; xfs_extnum_t i; /* extent record index */ + trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); xfs_iext_add(ifp, idx, count); for (i = idx; i < idx + count; i++, new++) @@ -3549,13 +3447,17 @@ xfs_iext_add_indirect_multi( */ void xfs_iext_remove( - xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff) /* number of extents to remove */ + int ext_diff, /* number of extents to remove */ + int state) /* type of extent conversion */ { + xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; xfs_extnum_t nextents; /* number of extents in file */ int new_size; /* size of extents after removal */ + trace_xfs_iext_remove(ip, idx, state, _RET_IP_); + ASSERT(ext_diff > 0); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 41555de1d1d..6c912b02759 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -213,7 +213,6 @@ typedef struct xfs_icdinode { struct bhv_desc; struct cred; -struct ktrace; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; @@ -222,13 +221,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_dquot; -#if defined(XFS_ILOCK_TRACE) -#define XFS_ILOCK_KTRACE_SIZE 32 -extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); -#else -#define xfs_ilock_trace(i,n,f,ra) -#endif - typedef struct dm_attrs_s { __uint32_t da_dmevmask; /* DMIG event mask */ __uint16_t da_dmstate; /* DMIG state info */ @@ -271,26 +263,6 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ - - /* Trace buffers per inode. */ -#ifdef XFS_INODE_TRACE - struct ktrace *i_trace; /* general inode trace */ -#endif -#ifdef XFS_BMAP_TRACE - struct ktrace *i_xtrace; /* inode extent list trace */ -#endif -#ifdef XFS_BTREE_TRACE - struct ktrace *i_btrace; /* inode bmap btree trace */ -#endif -#ifdef XFS_RW_TRACE - struct ktrace *i_rwtrace; /* inode read/write trace */ -#endif -#ifdef XFS_ILOCK_TRACE - struct ktrace *i_lock_trace; /* inode lock/unlock trace */ -#endif -#ifdef XFS_DIR2_TRACE - struct ktrace *i_dir_trace; /* inode directory trace */ -#endif } xfs_inode_t; #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ @@ -406,6 +378,14 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) +#define XFS_LOCK_FLAGS \ + { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ + { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ + { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ + { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ + { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" } + + /* * Flags for lockdep annotations. * @@ -440,21 +420,15 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) /* - * Flags for xfs_iflush() - */ -#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 -#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 -#define XFS_IFLUSH_SYNC 3 -#define XFS_IFLUSH_ASYNC 4 -#define XFS_IFLUSH_DELWRI 5 -#define XFS_IFLUSH_ASYNC_NOBLOCK 6 - -/* * Flags for xfs_itruncate_start(). */ #define XFS_ITRUNC_DEFINITE 0x1 #define XFS_ITRUNC_MAYBE 0x2 +#define XFS_ITRUNC_FLAGS \ + { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \ + { XFS_ITRUNC_MAYBE, "MAYBE" } + /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -499,6 +473,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); +void xfs_iunpin_wait(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); void xfs_ichgtime(xfs_inode_t *, int); void xfs_lock_inodes(xfs_inode_t **, int, uint); @@ -507,48 +482,16 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_times(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); -#if defined(XFS_INODE_TRACE) - -#define INODE_TRACE_SIZE 16 /* number of trace entries */ -#define INODE_KTRACE_ENTRY 1 -#define INODE_KTRACE_EXIT 2 -#define INODE_KTRACE_HOLD 3 -#define INODE_KTRACE_REF 4 -#define INODE_KTRACE_RELE 5 - -extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); -extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); -extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); -#define xfs_itrace_entry(ip) \ - _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit(ip) \ - _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit_tag(ip, tag) \ - _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) -#define xfs_itrace_ref(ip) \ - _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) - -#else -#define xfs_itrace_entry(a) -#define xfs_itrace_exit(a) -#define xfs_itrace_exit_tag(a, b) -#define xfs_itrace_hold(a, b, c, d) -#define xfs_itrace_ref(a) -#define xfs_itrace_rele(a, b, c, d) -#endif - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ atomic_inc(&(VFS_I(ip)->i_count)); \ - xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + trace_xfs_ihold(ip, _THIS_IP_); \ } while (0) #define IRELE(ip) \ do { \ - xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + trace_xfs_irele(ip, _THIS_IP_); \ iput(VFS_I(ip)); \ } while (0) @@ -577,11 +520,11 @@ int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); -void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, - xfs_bmbt_irec_t *); +void xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t, + xfs_bmbt_irec_t *, int); void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); -void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); +void xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int); void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9794b876d6f..d4dc063111f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -41,6 +41,7 @@ #include "xfs_ialloc.h" #include "xfs_rw.h" #include "xfs_error.h" +#include "xfs_trace.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ @@ -227,7 +228,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); + vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; @@ -278,7 +279,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); + vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; @@ -335,7 +336,7 @@ xfs_inode_item_format( vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); + vecp->i_type = XLOG_REG_TYPE_IEXT; } else #endif { @@ -354,7 +355,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); + vecp->i_type = XLOG_REG_TYPE_IEXT; } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -372,7 +373,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); + vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; @@ -398,7 +399,7 @@ xfs_inode_item_format( ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); + vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; @@ -476,7 +477,7 @@ xfs_inode_item_format( vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -491,7 +492,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); + vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; @@ -515,7 +516,7 @@ xfs_inode_item_format( ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); + vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; @@ -601,33 +602,20 @@ xfs_inode_item_trylock( if (!xfs_iflock_nowait(ip)) { /* - * If someone else isn't already trying to push the inode - * buffer, we get to do it. + * inode has already been flushed to the backing buffer, + * leave it locked in shared mode, pushbuf routine will + * unlock it. */ - if (iip->ili_pushbuf_flag == 0) { - iip->ili_pushbuf_flag = 1; -#ifdef DEBUG - iip->ili_push_owner = current_pid(); -#endif - /* - * Inode is left locked in shared mode. - * Pushbuf routine gets to unlock it. - */ - return XFS_ITEM_PUSHBUF; - } else { - /* - * We hold the AIL lock, so we must specify the - * NONOTIFY flag so that we won't double trip. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); - return XFS_ITEM_FLUSHING; - } - /* NOTREACHED */ + return XFS_ITEM_PUSHBUF; } /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); + /* + * we hold the AIL lock - notify the unlock routine of this + * so it doesn't try to get the lock again. + */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } @@ -745,11 +733,8 @@ xfs_inode_item_committed( * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's - * marked delayed write. If that's the case, we'll initiate a bawrite on that - * buffer to expedite the process. - * - * We aren't holding the AIL lock (or the flush lock) when this gets called, - * so it is inherently race-y. + * marked delayed write. If that's the case, we'll promote it and that will + * allow the caller to write the buffer by triggering the xfsbufd to run. */ STATIC void xfs_inode_item_pushbuf( @@ -758,80 +743,30 @@ xfs_inode_item_pushbuf( xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; ip = iip->ili_inode; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* - * The ili_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(iip->ili_pushbuf_flag != 0); - ASSERT(iip->ili_push_owner == current_pid()); - - /* * If a flush is not in progress anymore, chances are that the * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, - iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); + iip->ili_format.ilf_len, XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - /* - * We were racing with iflush because we don't hold - * the AIL lock or the flush lock. However, at this point, - * we have the buffer, and we know that it's dirty. - * So, it's possible that iflush raced with us, and - * this item is already taken off the AIL. - * If not, we can flush it async. - */ - dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&ip->i_flush)); - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - xfs_buftrace("INODE ITEM PUSH", bp); - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE); - } - if (dopush) { - int error; - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", - error, iip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - xfs_buf_relse(bp); - } - return; - } - /* - * We have to be careful about resetting pushbuf flag too early (above). - * Even though in theory we can do it as soon as we have the buflock, - * we don't want others to be doing work needlessly. They'll come to - * this function thinking that pushing the buffer is their - * responsibility only to find that the buffer is still locked by - * another doing the same thing - */ - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (!bp) + return; + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); return; } @@ -864,10 +799,14 @@ xfs_inode_item_push( iip->ili_format.ilf_fields != 0); /* - * Write out the inode. The completion routine ('iflush_done') will - * pull it from the AIL, mark it clean, unlock the flush lock. + * Push the inode to it's backing buffer. This will not remove the + * inode from the AIL - a further push will be required to trigger a + * buffer push. However, this allows all the dirty inodes to be pushed + * to the buffer before it is pushed to disk. THe buffer IO completion + * will pull th einode from the AIL, mark it clean and unlock the flush + * lock. */ - (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); + (void) xfs_iflush(ip, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); return; @@ -931,7 +870,6 @@ xfs_inode_item_init( /* We have zeroed memory. No need ... iip->ili_extents_buf = NULL; - iip->ili_pushbuf_flag = 0; */ iip->ili_format.ilf_type = XFS_LI_INODE; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 65bae4c9b8b..9a467958ecd 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w) #ifdef __KERNEL__ struct xfs_buf; -struct xfs_bmbt_rec_64; +struct xfs_bmbt_rec; struct xfs_inode; struct xfs_mount; @@ -140,16 +140,10 @@ typedef struct xfs_inode_log_item { unsigned short ili_flags; /* misc flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ - struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged + struct xfs_bmbt_rec *ili_extents_buf; /* array of logged data exts */ - struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged + struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ - unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ - -#ifdef DEBUG - uint64_t ili_push_owner; /* one who sets pushbuf_flag - above gets to push the buf */ -#endif #ifdef XFS_TRANS_DEBUG int ili_root_size; char *ili_orig_root; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 67ae5555a30..0b65039951a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -47,72 +47,8 @@ #include "xfs_trans_space.h" #include "xfs_utils.h" #include "xfs_iomap.h" +#include "xfs_trace.h" -#if defined(XFS_RW_TRACE) -void -xfs_iomap_enter_trace( - int tag, - xfs_inode_t *ip, - xfs_off_t offset, - ssize_t count) -{ - if (!ip->i_rwtrace) - return; - - ktrace_enter(ip->i_rwtrace, - (void *)((unsigned long)tag), - (void *)ip, - (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), - (void *)((unsigned long)((offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(offset & 0xffffffff)), - (void *)((unsigned long)count), - (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), - (void *)((unsigned long)current_pid()), - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL, - (void *)NULL); -} - -void -xfs_iomap_map_trace( - int tag, - xfs_inode_t *ip, - xfs_off_t offset, - ssize_t count, - xfs_iomap_t *iomapp, - xfs_bmbt_irec_t *imapp, - int flags) -{ - if (!ip->i_rwtrace) - return; - - ktrace_enter(ip->i_rwtrace, - (void *)((unsigned long)tag), - (void *)ip, - (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), - (void *)((unsigned long)((offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(offset & 0xffffffff)), - (void *)((unsigned long)count), - (void *)((unsigned long)flags), - (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)), - (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)), - (void *)((unsigned long)(iomapp->iomap_delta)), - (void *)((unsigned long)(iomapp->iomap_bsize)), - (void *)((unsigned long)(iomapp->iomap_bn)), - (void *)(__psint_t)(imapp->br_startoff), - (void *)((unsigned long)(imapp->br_blockcount)), - (void *)(__psint_t)(imapp->br_startblock)); -} -#else -#define xfs_iomap_enter_trace(tag, io, offset, count) -#define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags) -#endif #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ << mp->m_writeio_log) @@ -187,21 +123,20 @@ xfs_iomap( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + trace_xfs_iomap_enter(ip, offset, count, flags, NULL); + switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { case BMAPI_READ: - xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count); lockmode = xfs_ilock_map_shared(ip); bmapi_flags = XFS_BMAPI_ENTIRE; break; case BMAPI_WRITE: - xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); lockmode = XFS_ILOCK_EXCL; if (flags & BMAPI_IGNSTATE) bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; xfs_ilock(ip, lockmode); break; case BMAPI_ALLOCATE: - xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); lockmode = XFS_ILOCK_SHARED; bmapi_flags = XFS_BMAPI_ENTIRE; @@ -237,8 +172,7 @@ xfs_iomap( if (nimaps && (imap.br_startblock != HOLESTARTBLOCK) && (imap.br_startblock != DELAYSTARTBLOCK)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, - offset, count, iomapp, &imap, flags); + trace_xfs_iomap_found(ip, offset, count, flags, &imap); break; } @@ -250,8 +184,7 @@ xfs_iomap( &imap, &nimaps); } if (!error) { - xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, - offset, count, iomapp, &imap, flags); + trace_xfs_iomap_alloc(ip, offset, count, flags, &imap); } iomap_flags = IOMAP_NEW; break; @@ -261,8 +194,7 @@ xfs_iomap( lockmode = 0; if (nimaps && !isnullstartblock(imap.br_startblock)) { - xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, - offset, count, iomapp, &imap, flags); + trace_xfs_iomap_found(ip, offset, count, flags, &imap); break; } @@ -623,8 +555,7 @@ retry: * delalloc blocks and retry without EOF preallocation. */ if (nimaps == 0) { - xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, - ip, offset, count); + trace_xfs_delalloc_enospc(ip, offset, count); if (flushed) return XFS_ERROR(ENOSPC); @@ -837,7 +768,7 @@ xfs_iomap_write_unwritten( int committed; int error; - xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); + trace_xfs_unwritten_convert(ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); @@ -860,8 +791,15 @@ xfs_iomap_write_unwritten( * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. + * + * Note that we open code the transaction allocation here + * to pass KM_NOFS--we can't risk to recursing back into + * the filesystem here as we might be asked to write out + * the same inode that we complete here and might deadlock + * on the iolock. */ - tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index fdcf7b82747..174f2999099 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -43,6 +43,14 @@ typedef enum { BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ } bmapi_flags_t; +#define BMAPI_FLAGS \ + { BMAPI_READ, "READ" }, \ + { BMAPI_WRITE, "WRITE" }, \ + { BMAPI_ALLOCATE, "ALLOCATE" }, \ + { BMAPI_IGNSTATE, "IGNSTATE" }, \ + { BMAPI_DIRECT, "DIRECT" }, \ + { BMAPI_MMAP, "MMAP" }, \ + { BMAPI_TRYLOCK, "TRYLOCK" } /* * xfs_iomap_t: File system I/O map diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 62efab2f383..3af02314c60 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -408,8 +408,10 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, - KM_SLEEP | KM_MAYFAIL | KM_LARGE); + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + if (!irbuf) + return ENOMEM; + nirbuf = irbsize / sizeof(*irbuf); /* @@ -420,9 +422,7 @@ xfs_bulkstat( while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); bp = NULL; - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { /* * Skip this allocation group and go to the next one. @@ -729,7 +729,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free(irbuf); + kmem_free_large(irbuf); *ubcountp = ubelem; /* * Found some inodes, return them now and return the error next time. @@ -849,9 +849,7 @@ xfs_inumbers( agbp = NULL; while (left > 0 && agno < mp->m_sb.sb_agcount) { if (agbp == NULL) { - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { /* * If we can't read the AGI of this ag, diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9dbdff3ea48..4f16be4b6ee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -40,6 +40,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_rw.h" +#include "xfs_trace.h" kmem_zone_t *xfs_log_ticket_zone; @@ -49,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone; (off) += (bytes);} /* Local miscellaneous function prototypes */ -STATIC int xlog_bdstrat_cb(struct xfs_buf *); STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, xlog_in_core_t **, xfs_lsn_t *); STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, @@ -79,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log, STATIC void xlog_state_switch_iclogs(xlog_t *log, xlog_in_core_t *iclog, int eventual_size); -STATIC int xlog_state_sync(xlog_t *log, - xfs_lsn_t lsn, - uint flags, - int *log_flushed); -STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed); STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); /* local functions to manipulate grant head */ @@ -122,85 +117,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, STATIC int xlog_iclogs_empty(xlog_t *log); -#if defined(XFS_LOG_TRACE) - -#define XLOG_TRACE_LOGGRANT_SIZE 2048 -#define XLOG_TRACE_ICLOG_SIZE 256 - -void -xlog_trace_loggrant_alloc(xlog_t *log) -{ - log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); -} - -void -xlog_trace_loggrant_dealloc(xlog_t *log) -{ - ktrace_free(log->l_grant_trace); -} - -void -xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) -{ - unsigned long cnts; - - /* ticket counts are 1 byte each */ - cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; - - ktrace_enter(log->l_grant_trace, - (void *)tic, - (void *)log->l_reserve_headq, - (void *)log->l_write_headq, - (void *)((unsigned long)log->l_grant_reserve_cycle), - (void *)((unsigned long)log->l_grant_reserve_bytes), - (void *)((unsigned long)log->l_grant_write_cycle), - (void *)((unsigned long)log->l_grant_write_bytes), - (void *)((unsigned long)log->l_curr_cycle), - (void *)((unsigned long)log->l_curr_block), - (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), - (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), - (void *)string, - (void *)((unsigned long)tic->t_trans_type), - (void *)cnts, - (void *)((unsigned long)tic->t_curr_res), - (void *)((unsigned long)tic->t_unit_res)); -} - -void -xlog_trace_iclog_alloc(xlog_in_core_t *iclog) -{ - iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); -} - -void -xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) -{ - ktrace_free(iclog->ic_trace); -} - -void -xlog_trace_iclog(xlog_in_core_t *iclog, uint state) -{ - ktrace_enter(iclog->ic_trace, - (void *)((unsigned long)state), - (void *)((unsigned long)current_pid()), - (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, - (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, - (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, - (void *)NULL, (void *)NULL); -} -#else - -#define xlog_trace_loggrant_alloc(log) -#define xlog_trace_loggrant_dealloc(log) -#define xlog_trace_loggrant(log,tic,string) - -#define xlog_trace_iclog_alloc(iclog) -#define xlog_trace_iclog_dealloc(iclog) -#define xlog_trace_iclog(iclog,state) - -#endif /* XFS_LOG_TRACE */ - static void xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) @@ -353,15 +269,17 @@ xfs_log_done(xfs_mount_t *mp, if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || (flags & XFS_LOG_REL_PERM_RESERV)) { + trace_xfs_log_done_nonperm(log, ticket); + /* * Release ticket if not permanent reservation or a specific * request has been made to release a permanent reservation. */ - xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); xfs_log_ticket_put(ticket); } else { - xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); + trace_xfs_log_done_perm(log, ticket); + xlog_regrant_reserve_log_space(log, ticket); /* If this ticket was a permanent reservation and we aren't * trying to release it, reset the inited flags; so next time @@ -373,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp, return lsn; } /* xfs_log_done */ - -/* - * Force the in-core log to disk. If flags == XFS_LOG_SYNC, - * the force is done synchronously. - * - * Asynchronous forces are implemented by setting the WANT_SYNC - * bit in the appropriate in-core log and then returning. - * - * Synchronous forces are implemented with a signal variable. All callers - * to force a given lsn to disk will wait on a the sv attached to the - * specific in-core log. When given in-core log finally completes its - * write to disk, that thread will wake up all threads waiting on the - * sv. - */ -int -_xfs_log_force( - xfs_mount_t *mp, - xfs_lsn_t lsn, - uint flags, - int *log_flushed) -{ - xlog_t *log = mp->m_log; - int dummy; - - if (!log_flushed) - log_flushed = &dummy; - - ASSERT(flags & XFS_LOG_FORCE); - - XFS_STATS_INC(xs_log_force); - - if (log->l_flags & XLOG_IO_ERROR) - return XFS_ERROR(EIO); - if (lsn == 0) - return xlog_state_sync_all(log, flags, log_flushed); - else - return xlog_state_sync(log, lsn, flags, log_flushed); -} /* _xfs_log_force */ - -/* - * Wrapper for _xfs_log_force(), to be used when caller doesn't care - * about errors or whether the log was flushed or not. This is the normal - * interface to use when trying to unpin items or move the log forward. - */ -void -xfs_log_force( - xfs_mount_t *mp, - xfs_lsn_t lsn, - uint flags) -{ - int error; - error = _xfs_log_force(mp, lsn, flags, NULL); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " - "error %d returned.", error); - } -} - - /* * Attaches a new iclog I/O completion callback routine during * transaction commit. If the log is in error state, a non-zero @@ -505,10 +364,13 @@ xfs_log_reserve(xfs_mount_t *mp, XFS_STATS_INC(xs_try_logspace); + if (*ticket != NULL) { ASSERT(flags & XFS_LOG_PERM_RESERV); internal_ticket = (xlog_ticket_t *)*ticket; - xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); + + trace_xfs_log_reserve(log, internal_ticket); + xlog_grant_push_ail(mp, internal_ticket->t_unit_res); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { @@ -519,10 +381,9 @@ xfs_log_reserve(xfs_mount_t *mp, return XFS_ERROR(ENOMEM); internal_ticket->t_trans_type = t_type; *ticket = internal_ticket; - xlog_trace_loggrant(log, internal_ticket, - (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? - "xfs_log_reserve: create new ticket (permanent trans)" : - "xfs_log_reserve: create new ticket"); + + trace_xfs_log_reserve(log, internal_ticket); + xlog_grant_push_ail(mp, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); @@ -676,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; - error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); + error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); #ifdef DEBUG @@ -692,7 +553,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (! (XLOG_FORCED_SHUTDOWN(log))) { reg[0].i_addr = (void*)&magic; reg[0].i_len = sizeof(magic); - XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); + reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); @@ -734,7 +595,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_unlock(&log->l_icloglock); } if (tic) { - xlog_trace_loggrant(log, tic, "unmount rec"); + trace_xfs_log_umount_write(log, tic); xlog_ungrant_log_space(log, tic); xfs_log_ticket_put(tic); } @@ -1030,7 +891,6 @@ xlog_iodone(xfs_buf_t *bp) xfs_fs_cmn_err(CE_WARN, l->l_mp, "xlog_iodone: Barriers are no longer supported" " by device. Disabling barriers\n"); - xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp); } /* @@ -1063,38 +923,6 @@ xlog_iodone(xfs_buf_t *bp) } /* xlog_iodone */ /* - * The bdstrat callback function for log bufs. This gives us a central - * place to trap bufs in case we get hit by a log I/O error and need to - * shutdown. Actually, in practice, even when we didn't get a log error, - * we transition the iclogs to IOERROR state *after* flushing all existing - * iclogs to disk. This is because we don't want anymore new transactions to be - * started or completed afterwards. - */ -STATIC int -xlog_bdstrat_cb(struct xfs_buf *bp) -{ - xlog_in_core_t *iclog; - - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); - - if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { - /* note for irix bstrat will need struct bdevsw passed - * Fix the following macro if the code ever is merged - */ - XFS_bdstrat(bp); - return 0; - } - - xfs_buftrace("XLOG__BDSTRAT IOERROR", bp); - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_STALE(bp); - xfs_biodone(bp); - return XFS_ERROR(EIO); - - -} - -/* * Return size of each in-core log record buffer. * * All machines get 8 x 32kB buffers by default, unless tuned otherwise. @@ -1236,7 +1064,6 @@ xlog_alloc_log(xfs_mount_t *mp, if (!bp) goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); @@ -1246,7 +1073,6 @@ xlog_alloc_log(xfs_mount_t *mp, spin_lock_init(&log->l_grant_lock); sv_init(&log->l_flush_wait, 0, "flush_wait"); - xlog_trace_loggrant_alloc(log); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1275,7 +1101,6 @@ xlog_alloc_log(xfs_mount_t *mp, if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1305,8 +1130,6 @@ xlog_alloc_log(xfs_mount_t *mp, sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); - xlog_trace_iclog_alloc(iclog); - iclogp = &iclog->ic_next; } *iclogp = log->l_iclog; /* complete ring */ @@ -1321,13 +1144,11 @@ out_free_iclog: sv_destroy(&iclog->ic_force_wait); sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); - xlog_trace_iclog_dealloc(iclog); } kmem_free(iclog); } spinlock_destroy(&log->l_icloglock); spinlock_destroy(&log->l_grant_lock); - xlog_trace_loggrant_dealloc(log); xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); @@ -1351,7 +1172,7 @@ xlog_commit_record(xfs_mount_t *mp, reg[0].i_addr = NULL; reg[0].i_len = 0; - XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); + reg[0].i_type = XLOG_REG_TYPE_COMMIT; ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, @@ -1426,6 +1247,37 @@ xlog_grant_push_ail(xfs_mount_t *mp, xfs_trans_ail_push(log->l_ailp, threshold_lsn); } /* xlog_grant_push_ail */ +/* + * The bdstrat callback function for log bufs. This gives us a central + * place to trap bufs in case we get hit by a log I/O error and need to + * shutdown. Actually, in practice, even when we didn't get a log error, + * we transition the iclogs to IOERROR state *after* flushing all existing + * iclogs to disk. This is because we don't want anymore new transactions to be + * started or completed afterwards. + */ +STATIC int +xlog_bdstrat( + struct xfs_buf *bp) +{ + struct xlog_in_core *iclog; + + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); + if (iclog->ic_state & XLOG_STATE_IOERROR) { + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_STALE(bp); + xfs_biodone(bp); + /* + * It would seem logical to return EIO here, but we rely on + * the log state machine to propagate I/O errors instead of + * doing it here. + */ + return 0; + } + + bp->b_flags |= _XBF_RUN_QUEUES; + xfs_buf_iorequest(bp); + return 0; +} /* * Flush out the in-core log (iclog) to the on-disk log in an asynchronous @@ -1524,6 +1376,7 @@ xlog_sync(xlog_t *log, XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); + bp->b_flags |= XBF_LOG_BUFFER; /* * Do an ordered write for the log block. * Its unnecessary to flush the first split block in the log wrap case. @@ -1544,7 +1397,7 @@ xlog_sync(xlog_t *log, */ XFS_BUF_WRITE(bp); - if ((error = XFS_bwrite(bp))) { + if ((error = xlog_bdstrat(bp))) { xfs_ioerror_alert("xlog_sync", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; @@ -1561,6 +1414,7 @@ xlog_sync(xlog_t *log, XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) XFS_BUF_ORDERED(bp); dptr = XFS_BUF_PTR(bp); @@ -1583,7 +1437,7 @@ xlog_sync(xlog_t *log, /* account for internal log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); XFS_BUF_WRITE(bp); - if ((error = XFS_bwrite(bp))) { + if ((error = xlog_bdstrat(bp))) { xfs_ioerror_alert("xlog_sync (split)", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; @@ -1607,7 +1461,6 @@ xlog_dealloc_log(xlog_t *log) sv_destroy(&iclog->ic_force_wait); sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); - xlog_trace_iclog_dealloc(iclog); next_iclog = iclog->ic_next; kmem_free(iclog); iclog = next_iclog; @@ -1616,7 +1469,6 @@ xlog_dealloc_log(xlog_t *log) spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); - xlog_trace_loggrant_dealloc(log); log->l_mp->m_log = NULL; kmem_free(log); } /* xlog_dealloc_log */ @@ -2414,7 +2266,6 @@ restart: iclog = log->l_iclog; if (iclog->ic_state != XLOG_STATE_ACTIVE) { - xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); XFS_STATS_INC(xs_log_noiclogs); /* Wait for log writes to have flushed */ @@ -2520,13 +2371,15 @@ xlog_grant_log_space(xlog_t *log, /* Is there space or do we need to sleep? */ spin_lock(&log->l_grant_lock); - xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); + + trace_xfs_log_grant_enter(log, tic); /* something is already sleeping; insert new transaction at end */ if (log->l_reserve_headq) { xlog_ins_ticketq(&log->l_reserve_headq, tic); - xlog_trace_loggrant(log, tic, - "xlog_grant_log_space: sleep 1"); + + trace_xfs_log_grant_sleep1(log, tic); + /* * Gotta check this before going to sleep, while we're * holding the grant lock. @@ -2540,8 +2393,7 @@ xlog_grant_log_space(xlog_t *log, * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... */ - xlog_trace_loggrant(log, tic, - "xlog_grant_log_space: wake 1"); + trace_xfs_log_grant_wake1(log, tic); spin_lock(&log->l_grant_lock); } if (tic->t_flags & XFS_LOG_PERM_RESERV) @@ -2558,8 +2410,9 @@ redo: if (free_bytes < need_bytes) { if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_reserve_headq, tic); - xlog_trace_loggrant(log, tic, - "xlog_grant_log_space: sleep 2"); + + trace_xfs_log_grant_sleep2(log, tic); + spin_unlock(&log->l_grant_lock); xlog_grant_push_ail(log->l_mp, need_bytes); spin_lock(&log->l_grant_lock); @@ -2571,8 +2424,8 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - xlog_trace_loggrant(log, tic, - "xlog_grant_log_space: wake 2"); + trace_xfs_log_grant_wake2(log, tic); + goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2592,7 +2445,7 @@ redo: ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); } #endif - xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); + trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_head(log, 1); spin_unlock(&log->l_grant_lock); return 0; @@ -2600,7 +2453,9 @@ redo: error_return: if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); - xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); + + trace_xfs_log_grant_error(log, tic); + /* * If we are failing, make sure the ticket doesn't have any * current reservations. We don't want to add this back when @@ -2640,7 +2495,8 @@ xlog_regrant_write_log_space(xlog_t *log, #endif spin_lock(&log->l_grant_lock); - xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); + + trace_xfs_log_regrant_write_enter(log, tic); if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; @@ -2669,8 +2525,8 @@ xlog_regrant_write_log_space(xlog_t *log, if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_write_headq, tic); - xlog_trace_loggrant(log, tic, - "xlog_regrant_write_log_space: sleep 1"); + trace_xfs_log_regrant_write_sleep1(log, tic); + spin_unlock(&log->l_grant_lock); xlog_grant_push_ail(log->l_mp, need_bytes); spin_lock(&log->l_grant_lock); @@ -2685,8 +2541,7 @@ xlog_regrant_write_log_space(xlog_t *log, if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - xlog_trace_loggrant(log, tic, - "xlog_regrant_write_log_space: wake 1"); + trace_xfs_log_regrant_write_wake1(log, tic); } } @@ -2704,6 +2559,8 @@ redo: spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_regrant_write_sleep2(log, tic); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already off the queue */ @@ -2711,8 +2568,7 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - xlog_trace_loggrant(log, tic, - "xlog_regrant_write_log_space: wake 2"); + trace_xfs_log_regrant_write_wake2(log, tic); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); @@ -2727,7 +2583,8 @@ redo: } #endif - xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); + trace_xfs_log_regrant_write_exit(log, tic); + xlog_verify_grant_head(log, 1); spin_unlock(&log->l_grant_lock); return 0; @@ -2736,7 +2593,9 @@ redo: error_return: if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); - xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); + + trace_xfs_log_regrant_write_error(log, tic); + /* * If we are failing, make sure the ticket doesn't have any * current reservations. We don't want to add this back when @@ -2760,8 +2619,8 @@ STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket) { - xlog_trace_loggrant(log, ticket, - "xlog_regrant_reserve_log_space: enter"); + trace_xfs_log_regrant_reserve_enter(log, ticket); + if (ticket->t_cnt > 0) ticket->t_cnt--; @@ -2769,8 +2628,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_grant_sub_space(log, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); - xlog_trace_loggrant(log, ticket, - "xlog_regrant_reserve_log_space: sub current res"); + + trace_xfs_log_regrant_reserve_sub(log, ticket); + xlog_verify_grant_head(log, 1); /* just return if we still have some of the pre-reserved space */ @@ -2780,8 +2640,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, } xlog_grant_add_space_reserve(log, ticket->t_unit_res); - xlog_trace_loggrant(log, ticket, - "xlog_regrant_reserve_log_space: exit"); + + trace_xfs_log_regrant_reserve_exit(log, ticket); + xlog_verify_grant_head(log, 0); spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; @@ -2811,11 +2672,11 @@ xlog_ungrant_log_space(xlog_t *log, ticket->t_cnt--; spin_lock(&log->l_grant_lock); - xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); + trace_xfs_log_ungrant_enter(log, ticket); xlog_grant_sub_space(log, ticket->t_curr_res); - xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); + trace_xfs_log_ungrant_sub(log, ticket); /* If this is a permanent reservation ticket, we may be able to free * up more space based on the remaining count. @@ -2825,7 +2686,8 @@ xlog_ungrant_log_space(xlog_t *log, xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); } - xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); + trace_xfs_log_ungrant_exit(log, ticket); + xlog_verify_grant_head(log, 1); spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); @@ -2927,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log, log->l_iclog = iclog->ic_next; } /* xlog_state_switch_iclogs */ - /* * Write out all data in the in-core log as of this exact moment in time. * @@ -2955,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log, * b) when we return from flushing out this iclog, it is still * not in the active nor dirty state. */ -STATIC int -xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) +int +_xfs_log_force( + struct xfs_mount *mp, + uint flags, + int *log_flushed) { - xlog_in_core_t *iclog; - xfs_lsn_t lsn; + struct log *log = mp->m_log; + struct xlog_in_core *iclog; + xfs_lsn_t lsn; + + XFS_STATS_INC(xs_log_force); spin_lock(&log->l_icloglock); @@ -3005,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); - *log_flushed = 1; + + if (log_flushed) + *log_flushed = 1; spin_lock(&log->l_icloglock); if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && iclog->ic_state != XLOG_STATE_DIRTY) @@ -3049,19 +2918,37 @@ maybe_sleep: */ if (iclog->ic_state & XLOG_STATE_IOERROR) return XFS_ERROR(EIO); - *log_flushed = 1; - + if (log_flushed) + *log_flushed = 1; } else { no_sleep: spin_unlock(&log->l_icloglock); } return 0; -} /* xlog_state_sync_all */ +} + +/* + * Wrapper for _xfs_log_force(), to be used when caller doesn't care + * about errors or whether the log was flushed or not. This is the normal + * interface to use when trying to unpin items or move the log forward. + */ +void +xfs_log_force( + xfs_mount_t *mp, + uint flags) +{ + int error; + error = _xfs_log_force(mp, flags, NULL); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " + "error %d returned.", error); + } +} /* - * Used by code which implements synchronous log forces. + * Force the in-core log to disk for a specific LSN. * * Find in-core log with lsn. * If it is in the DIRTY state, just return. @@ -3069,109 +2956,142 @@ no_sleep: * state and go to sleep or return. * If it is in any other state, go to sleep or return. * - * If filesystem activity goes to zero, the iclog will get flushed only by - * bdflush(). + * Synchronous forces are implemented with a signal variable. All callers + * to force a given lsn to disk will wait on a the sv attached to the + * specific in-core log. When given in-core log finally completes its + * write to disk, that thread will wake up all threads waiting on the + * sv. */ -STATIC int -xlog_state_sync(xlog_t *log, - xfs_lsn_t lsn, - uint flags, - int *log_flushed) +int +_xfs_log_force_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags, + int *log_flushed) { - xlog_in_core_t *iclog; - int already_slept = 0; + struct log *log = mp->m_log; + struct xlog_in_core *iclog; + int already_slept = 0; -try_again: - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; + ASSERT(lsn != 0); - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); - } - - do { - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { - iclog = iclog->ic_next; - continue; - } + XFS_STATS_INC(xs_log_force); - if (iclog->ic_state == XLOG_STATE_DIRTY) { +try_again: + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return 0; + return XFS_ERROR(EIO); } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { - /* - * We sleep here if we haven't already slept (e.g. - * this is the first time we've looked at the correct - * iclog buf) and the buffer before us is going to - * be sync'ed. The reason for this is that if we - * are doing sync transactions here, by waiting for - * the previous I/O to complete, we can allow a few - * more transactions into this iclog before we close - * it down. - * - * Otherwise, we mark the buffer WANT_SYNC, and bump - * up the refcnt so we can release the log (which drops - * the ref count). The state switch keeps new transaction - * commits from using this buffer. When the current commits - * finish writing into the buffer, the refcount will drop to - * zero and the buffer will go out then. - */ - if (!already_slept && - (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | - XLOG_STATE_SYNCING))) { - ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, - &log->l_icloglock, s); - *log_flushed = 1; - already_slept = 1; - goto try_again; - } else { + do { + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + continue; + } + + if (iclog->ic_state == XLOG_STATE_DIRTY) { + spin_unlock(&log->l_icloglock); + return 0; + } + + if (iclog->ic_state == XLOG_STATE_ACTIVE) { + /* + * We sleep here if we haven't already slept (e.g. + * this is the first time we've looked at the correct + * iclog buf) and the buffer before us is going to + * be sync'ed. The reason for this is that if we + * are doing sync transactions here, by waiting for + * the previous I/O to complete, we can allow a few + * more transactions into this iclog before we close + * it down. + * + * Otherwise, we mark the buffer WANT_SYNC, and bump + * up the refcnt so we can release the log (which + * drops the ref count). The state switch keeps new + * transaction commits from using this buffer. When + * the current commits finish writing into the buffer, + * the refcount will drop to zero and the buffer will + * go out then. + */ + if (!already_slept && + (iclog->ic_prev->ic_state & + (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { + ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); + + XFS_STATS_INC(xs_log_force_sleep); + + sv_wait(&iclog->ic_prev->ic_write_wait, + PSWP, &log->l_icloglock, s); + if (log_flushed) + *log_flushed = 1; + already_slept = 1; + goto try_again; + } atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); - *log_flushed = 1; + if (log_flushed) + *log_flushed = 1; spin_lock(&log->l_icloglock); } - } - if ((flags & XFS_LOG_SYNC) && /* sleep */ - !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { + if ((flags & XFS_LOG_SYNC) && /* sleep */ + !(iclog->ic_state & + (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { + /* + * Don't wait on completion if we know that we've + * gotten a log write error. + */ + if (iclog->ic_state & XLOG_STATE_IOERROR) { + spin_unlock(&log->l_icloglock); + return XFS_ERROR(EIO); + } + XFS_STATS_INC(xs_log_force_sleep); + sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); + /* + * No need to grab the log lock here since we're + * only deciding whether or not to return EIO + * and the memory read should be atomic. + */ + if (iclog->ic_state & XLOG_STATE_IOERROR) + return XFS_ERROR(EIO); - /* - * Don't wait on completion if we know that we've - * gotten a log write error. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) { + if (log_flushed) + *log_flushed = 1; + } else { /* just return */ spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); } - XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); - /* - * No need to grab the log lock here since we're - * only deciding whether or not to return EIO - * and the memory read should be atomic. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); - *log_flushed = 1; - } else { /* just return */ - spin_unlock(&log->l_icloglock); - } - return 0; - } while (iclog != log->l_iclog); + return 0; + } while (iclog != log->l_iclog); + + spin_unlock(&log->l_icloglock); + return 0; +} - spin_unlock(&log->l_icloglock); - return 0; -} /* xlog_state_sync */ +/* + * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care + * about errors or whether the log was flushed or not. This is the normal + * interface to use when trying to unpin items or move the log forward. + */ +void +xfs_log_force_lsn( + xfs_mount_t *mp, + xfs_lsn_t lsn, + uint flags) +{ + int error; + error = _xfs_log_force_lsn(mp, lsn, flags, NULL); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " + "error %d returned.", error); + } +} /* * Called when we want to mark the current iclog as being ready to sync to @@ -3536,7 +3456,6 @@ xfs_log_force_umount( xlog_ticket_t *tic; xlog_t *log; int retval; - int dummy; log = mp->m_log; @@ -3610,13 +3529,14 @@ xfs_log_force_umount( } spin_unlock(&log->l_grant_lock); - if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { + if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); /* * Force the incore logs to disk before shutting the * log down completely. */ - xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); + _xfs_log_force(mp, XFS_LOG_SYNC, NULL); + spin_lock(&log->l_icloglock); retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d0c9baa50b1..7074be9d13e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -70,14 +70,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) * Flags to xfs_log_force() * * XFS_LOG_SYNC: Synchronous force in-core log to disk - * XFS_LOG_FORCE: Start in-core log write now. - * XFS_LOG_URGE: Start write within some window of time. - * - * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set. */ #define XFS_LOG_SYNC 0x1 -#define XFS_LOG_FORCE 0x2 -#define XFS_LOG_URGE 0x4 #endif /* __KERNEL__ */ @@ -110,10 +104,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XLOG_REG_TYPE_TRANSHDR 19 #define XLOG_REG_TYPE_MAX 19 -#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) - typedef struct xfs_log_iovec { - xfs_caddr_t i_addr; /* beginning address of region */ + xfs_caddr_t i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ uint i_type; /* type of region */ } xfs_log_iovec_t; @@ -140,12 +132,17 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp, void **iclog, uint flags); int _xfs_log_force(struct xfs_mount *mp, - xfs_lsn_t lsn, uint flags, int *log_forced); void xfs_log_force(struct xfs_mount *mp, - xfs_lsn_t lsn, uint flags); +int _xfs_log_force_lsn(struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags, + int *log_forced); +void xfs_log_force_lsn(struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags); int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, xfs_daddr_t start_block, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 679c7c4926a..fd02a18facd 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -19,7 +19,6 @@ #define __XFS_LOG_PRIV_H__ struct xfs_buf; -struct ktrace; struct log; struct xlog_ticket; struct xfs_buf_cancel; @@ -135,6 +134,12 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_TIC_INITED 0x1 /* has been initialized */ #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ #define XLOG_TIC_IN_Q 0x4 + +#define XLOG_TIC_FLAGS \ + { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ + { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ + { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } + #endif /* __KERNEL__ */ #define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ @@ -361,9 +366,6 @@ typedef struct xlog_in_core { int ic_bwritecnt; unsigned short ic_state; char *ic_datap; /* pointer to iclog data */ -#ifdef XFS_LOG_TRACE - struct ktrace *ic_trace; -#endif /* Callback structures need their own cacheline */ spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; @@ -429,10 +431,6 @@ typedef struct log { int l_grant_write_cycle; int l_grant_write_bytes; -#ifdef XFS_LOG_TRACE - struct ktrace *l_grant_trace; -#endif - /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG char *l_iclog_bak[XLOG_MAX_ICLOGS]; @@ -445,23 +443,12 @@ typedef struct log { /* common routines */ extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -extern int xlog_find_tail(xlog_t *log, - xfs_daddr_t *head_blk, - xfs_daddr_t *tail_blk); extern int xlog_recover(xlog_t *log); extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); -extern struct xfs_buf *xlog_get_bp(xlog_t *, int); -extern void xlog_put_bp(struct xfs_buf *); extern kmem_zone_t *xfs_log_ticket_zone; -/* iclog tracing */ -#define XLOG_TRACE_GRAB_FLUSH 1 -#define XLOG_TRACE_REL_FLUSH 2 -#define XLOG_TRACE_SLEEP_FLUSH 3 -#define XLOG_TRACE_WAKE_FLUSH 4 - /* * Unmount record type is used as a pseudo transaction type for the ticket. * It's value must be outside the range of XFS_TRANS_* values. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index fb17f8226b0..22e6efdc17e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -46,11 +46,10 @@ #include "xfs_quota.h" #include "xfs_rw.h" #include "xfs_utils.h" +#include "xfs_trace.h" STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); -STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, - xlog_recover_item_t *item); #if defined(DEBUG) STATIC void xlog_recover_check_summary(xlog_t *); #else @@ -67,7 +66,7 @@ STATIC void xlog_recover_check_summary(xlog_t *); ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) -xfs_buf_t * +STATIC xfs_buf_t * xlog_get_bp( xlog_t *log, int nbblks) @@ -87,7 +86,7 @@ xlog_get_bp( return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); } -void +STATIC void xlog_put_bp( xfs_buf_t *bp) { @@ -225,16 +224,10 @@ xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { - int b; - - cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); - for (b = 0; b < 16; b++) - cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); - cmn_err(CE_DEBUG, " log : uuid = "); - for (b = 0; b < 16; b++) - cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); + cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", + __func__, &mp->m_sb.sb_uuid, XLOG_FMT); + cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", + &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else #define xlog_header_check_dump(mp, head) @@ -810,7 +803,7 @@ xlog_find_head( * We could speed up search by using current head_blk buffer, but it is not * available. */ -int +STATIC int xlog_find_tail( xlog_t *log, xfs_daddr_t *head_blk, @@ -1372,36 +1365,45 @@ xlog_clear_stale_blocks( STATIC xlog_recover_t * xlog_recover_find_tid( - xlog_recover_t *q, + struct hlist_head *head, xlog_tid_t tid) { - xlog_recover_t *p = q; + xlog_recover_t *trans; + struct hlist_node *n; - while (p != NULL) { - if (p->r_log_tid == tid) - break; - p = p->r_next; + hlist_for_each_entry(trans, n, head, r_list) { + if (trans->r_log_tid == tid) + return trans; } - return p; + return NULL; } STATIC void -xlog_recover_put_hashq( - xlog_recover_t **q, - xlog_recover_t *trans) +xlog_recover_new_tid( + struct hlist_head *head, + xlog_tid_t tid, + xfs_lsn_t lsn) { - trans->r_next = *q; - *q = trans; + xlog_recover_t *trans; + + trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); + trans->r_log_tid = tid; + trans->r_lsn = lsn; + INIT_LIST_HEAD(&trans->r_itemq); + + INIT_HLIST_NODE(&trans->r_list); + hlist_add_head(&trans->r_list, head); } STATIC void xlog_recover_add_item( - xlog_recover_item_t **itemq) + struct list_head *head) { xlog_recover_item_t *item; item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); - xlog_recover_insert_item_backq(itemq, item); + INIT_LIST_HEAD(&item->ri_list); + list_add_tail(&item->ri_list, head); } STATIC int @@ -1414,8 +1416,7 @@ xlog_recover_add_to_cont_trans( xfs_caddr_t ptr, old_ptr; int old_len; - item = trans->r_itemq; - if (item == NULL) { + if (list_empty(&trans->r_itemq)) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); ptr = (xfs_caddr_t) &trans->r_theader + @@ -1423,7 +1424,8 @@ xlog_recover_add_to_cont_trans( memcpy(ptr, dp, len); /* d, s, l */ return 0; } - item = item->ri_prev; + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; @@ -1460,8 +1462,7 @@ xlog_recover_add_to_trans( if (!len) return 0; - item = trans->r_itemq; - if (item == NULL) { + if (list_empty(&trans->r_itemq)) { /* we need to catch log corruptions here */ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { xlog_warn("XFS: xlog_recover_add_to_trans: " @@ -1479,12 +1480,15 @@ xlog_recover_add_to_trans( memcpy(ptr, dp, len); in_f = (xfs_inode_log_format_t *)ptr; - if (item->ri_prev->ri_total != 0 && - item->ri_prev->ri_total == item->ri_prev->ri_cnt) { + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); + if (item->ri_total != 0 && + item->ri_total == item->ri_cnt) { + /* tail item is in use, get a new one */ xlog_recover_add_item(&trans->r_itemq); + item = list_entry(trans->r_itemq.prev, + xlog_recover_item_t, ri_list); } - item = trans->r_itemq; - item = item->ri_prev; if (item->ri_total == 0) { /* first region to be added */ if (in_f->ilf_size == 0 || @@ -1509,96 +1513,29 @@ xlog_recover_add_to_trans( return 0; } -STATIC void -xlog_recover_new_tid( - xlog_recover_t **q, - xlog_tid_t tid, - xfs_lsn_t lsn) -{ - xlog_recover_t *trans; - - trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); - trans->r_log_tid = tid; - trans->r_lsn = lsn; - xlog_recover_put_hashq(q, trans); -} - -STATIC int -xlog_recover_unlink_tid( - xlog_recover_t **q, - xlog_recover_t *trans) -{ - xlog_recover_t *tp; - int found = 0; - - ASSERT(trans != NULL); - if (trans == *q) { - *q = (*q)->r_next; - } else { - tp = *q; - while (tp) { - if (tp->r_next == trans) { - found = 1; - break; - } - tp = tp->r_next; - } - if (!found) { - xlog_warn( - "XFS: xlog_recover_unlink_tid: trans not found"); - ASSERT(0); - return XFS_ERROR(EIO); - } - tp->r_next = tp->r_next->r_next; - } - return 0; -} - -STATIC void -xlog_recover_insert_item_backq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - if (*q == NULL) { - item->ri_prev = item->ri_next = item; - *q = item; - } else { - item->ri_next = *q; - item->ri_prev = (*q)->ri_prev; - (*q)->ri_prev = item; - item->ri_prev->ri_next = item; - } -} - -STATIC void -xlog_recover_insert_item_frontq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - xlog_recover_insert_item_backq(q, item); - *q = item; -} - +/* + * Sort the log items in the transaction. Cancelled buffers need + * to be put first so they are processed before any items that might + * modify the buffers. If they are cancelled, then the modifications + * don't need to be replayed. + */ STATIC int xlog_recover_reorder_trans( xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *itemq, *itemq_next; - xfs_buf_log_format_t *buf_f; - ushort flags = 0; + xlog_recover_item_t *item, *n; + LIST_HEAD(sort_list); - first_item = itemq = trans->r_itemq; - trans->r_itemq = NULL; - do { - itemq_next = itemq->ri_next; - buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; + list_splice_init(&trans->r_itemq, &sort_list); + list_for_each_entry_safe(item, n, &sort_list, ri_list) { + xfs_buf_log_format_t *buf_f; - switch (ITEM_TYPE(itemq)) { + buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; + + switch (ITEM_TYPE(item)) { case XFS_LI_BUF: - flags = buf_f->blf_flags; - if (!(flags & XFS_BLI_CANCEL)) { - xlog_recover_insert_item_frontq(&trans->r_itemq, - itemq); + if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { + list_move(&item->ri_list, &trans->r_itemq); break; } case XFS_LI_INODE: @@ -1606,7 +1543,7 @@ xlog_recover_reorder_trans( case XFS_LI_QUOTAOFF: case XFS_LI_EFD: case XFS_LI_EFI: - xlog_recover_insert_item_backq(&trans->r_itemq, itemq); + list_move_tail(&item->ri_list, &trans->r_itemq); break; default: xlog_warn( @@ -1614,8 +1551,8 @@ xlog_recover_reorder_trans( ASSERT(0); return XFS_ERROR(EIO); } - itemq = itemq_next; - } while (first_item != itemq); + } + ASSERT(list_empty(&sort_list)); return 0; } @@ -2206,6 +2143,7 @@ xlog_recover_do_buffer_trans( xfs_daddr_t blkno; int len; ushort flags; + uint buf_flags; buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; @@ -2246,12 +2184,11 @@ xlog_recover_do_buffer_trans( } mp = log->l_mp; - if (flags & XFS_BLI_INODE_BUF) { - bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, - XFS_BUF_LOCK); - } else { - bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); - } + buf_flags = XBF_LOCK; + if (!(flags & XFS_BLI_INODE_BUF)) + buf_flags |= XBF_MAPPED; + + bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, bp, blkno); @@ -2350,8 +2287,8 @@ xlog_recover_do_inode_trans( goto error; } - bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, - in_f->ilf_len, XFS_BUF_LOCK); + bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, + XBF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, bp, in_f->ilf_blkno); @@ -2819,14 +2756,13 @@ xlog_recover_do_trans( int pass) { int error = 0; - xlog_recover_item_t *item, *first_item; + xlog_recover_item_t *item; error = xlog_recover_reorder_trans(trans); if (error) return error; - first_item = item = trans->r_itemq; - do { + list_for_each_entry(item, &trans->r_itemq, ri_list) { switch (ITEM_TYPE(item)) { case XFS_LI_BUF: error = xlog_recover_do_buffer_trans(log, item, pass); @@ -2859,8 +2795,7 @@ xlog_recover_do_trans( if (error) return error; - item = item->ri_next; - } while (first_item != item); + } return 0; } @@ -2874,21 +2809,18 @@ STATIC void xlog_recover_free_trans( xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *item, *free_item; + xlog_recover_item_t *item, *n; int i; - item = first_item = trans->r_itemq; - do { - free_item = item; - item = item->ri_next; - /* Free the regions in the item. */ - for (i = 0; i < free_item->ri_cnt; i++) { - kmem_free(free_item->ri_buf[i].i_addr); - } + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { + /* Free the regions in the item. */ + list_del(&item->ri_list); + for (i = 0; i < item->ri_cnt; i++) + kmem_free(item->ri_buf[i].i_addr); /* Free the item itself */ - kmem_free(free_item->ri_buf); - kmem_free(free_item); - } while (first_item != item); + kmem_free(item->ri_buf); + kmem_free(item); + } /* Free the transaction recover structure */ kmem_free(trans); } @@ -2896,14 +2828,12 @@ xlog_recover_free_trans( STATIC int xlog_recover_commit_trans( xlog_t *log, - xlog_recover_t **q, xlog_recover_t *trans, int pass) { int error; - if ((error = xlog_recover_unlink_tid(q, trans))) - return error; + hlist_del(&trans->r_list); if ((error = xlog_recover_do_trans(log, trans, pass))) return error; xlog_recover_free_trans(trans); /* no error */ @@ -2931,7 +2861,7 @@ xlog_recover_unmount_trans( STATIC int xlog_recover_process_data( xlog_t *log, - xlog_recover_t *rhash[], + struct hlist_head rhash[], xlog_rec_header_t *rhead, xfs_caddr_t dp, int pass) @@ -2965,7 +2895,7 @@ xlog_recover_process_data( } tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); - trans = xlog_recover_find_tid(rhash[hash], tid); + trans = xlog_recover_find_tid(&rhash[hash], tid); if (trans == NULL) { /* not found; add new tid */ if (ohead->oh_flags & XLOG_START_TRANS) xlog_recover_new_tid(&rhash[hash], tid, @@ -2983,7 +2913,7 @@ xlog_recover_process_data( switch (flags) { case XLOG_COMMIT_TRANS: error = xlog_recover_commit_trans(log, - &rhash[hash], trans, pass); + trans, pass); break; case XLOG_UNMOUNT_TRANS: error = xlog_recover_unmount_trans(trans); @@ -3216,7 +3146,7 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); if (error) goto fail_iput; @@ -3517,12 +3447,12 @@ xlog_do_recovery_pass( { xlog_rec_header_t *rhead; xfs_daddr_t blk_no; - xfs_caddr_t bufaddr, offset; + xfs_caddr_t offset; xfs_buf_t *hbp, *dbp; int error = 0, h_size; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; - xlog_recover_t *rhash[XLOG_RHASH_SIZE]; + struct hlist_head rhash[XLOG_RHASH_SIZE]; ASSERT(head_blk != tail_blk); @@ -3610,7 +3540,7 @@ xlog_do_recovery_pass( /* * Check for header wrapping around physical end-of-log */ - offset = NULL; + offset = XFS_BUF_PTR(hbp); split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { @@ -3646,9 +3576,8 @@ xlog_do_recovery_pass( * - order is important. */ wrapped_hblks = hblks - split_hblks; - bufaddr = XFS_BUF_PTR(hbp); error = XFS_BUF_SET_PTR(hbp, - bufaddr + BBTOB(split_hblks), + offset + BBTOB(split_hblks), BBTOB(hblks - split_hblks)); if (error) goto bread_err2; @@ -3658,14 +3587,10 @@ xlog_do_recovery_pass( if (error) goto bread_err2; - error = XFS_BUF_SET_PTR(hbp, bufaddr, + error = XFS_BUF_SET_PTR(hbp, offset, BBTOB(hblks)); if (error) goto bread_err2; - - if (!offset) - offset = xlog_align(log, 0, - wrapped_hblks, hbp); } rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, @@ -3685,7 +3610,7 @@ xlog_do_recovery_pass( } else { /* This log record is split across the * physical end of log */ - offset = NULL; + offset = XFS_BUF_PTR(dbp); split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical @@ -3714,9 +3639,8 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - bufaddr = XFS_BUF_PTR(dbp); error = XFS_BUF_SET_PTR(dbp, - bufaddr + BBTOB(split_bblks), + offset + BBTOB(split_bblks), BBTOB(bblks - split_bblks)); if (error) goto bread_err2; @@ -3727,13 +3651,9 @@ xlog_do_recovery_pass( if (error) goto bread_err2; - error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size); + error = XFS_BUF_SET_PTR(dbp, offset, h_size); if (error) goto bread_err2; - - if (!offset) - offset = xlog_align(log, wrapped_hblks, - bblks - split_bblks, dbp); } xlog_unpack_data(rhead, offset, log); if ((error = xlog_recover_process_data(log, rhash, @@ -3993,8 +3913,7 @@ xlog_recover_finish( * case the unlink transactions would have problems * pushing the EFIs out of the way. */ - xfs_log_force(log->l_mp, (xfs_lsn_t)0, - (XFS_LOG_FORCE | XFS_LOG_SYNC)); + xfs_log_force(log->l_mp, XFS_LOG_SYNC); xlog_recover_process_iunlinks(log); diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h index b2254555530..75d74920725 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/xfs_log_recover.h @@ -35,22 +35,21 @@ * item headers are in ri_buf[0]. Additional buffers follow. */ typedef struct xlog_recover_item { - struct xlog_recover_item *ri_next; - struct xlog_recover_item *ri_prev; - int ri_type; - int ri_cnt; /* count of regions found */ - int ri_total; /* total regions */ - xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ + struct list_head ri_list; + int ri_type; + int ri_cnt; /* count of regions found */ + int ri_total; /* total regions */ + xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; struct xlog_tid; typedef struct xlog_recover { - struct xlog_recover *r_next; - xlog_tid_t r_log_tid; /* log's transaction id */ - xfs_trans_header_t r_theader; /* trans header for partial */ - int r_state; /* not needed */ - xfs_lsn_t r_lsn; /* xact lsn */ - xlog_recover_item_t *r_itemq; /* q for items */ + struct hlist_node r_list; + xlog_tid_t r_log_tid; /* log's transaction id */ + xfs_trans_header_t r_theader; /* trans header for partial */ + int r_state; /* not needed */ + xfs_lsn_t r_lsn; /* xact lsn */ + struct list_head r_itemq; /* q for items */ } xlog_recover_t; #define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 8b6c9e807ef..6afaaeb2950 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -44,6 +44,8 @@ #include "xfs_quota.h" #include "xfs_fsops.h" #include "xfs_utils.h" +#include "xfs_trace.h" + STATIC void xfs_unmountfs_wait(xfs_mount_t *); @@ -199,6 +201,38 @@ xfs_uuid_unmount( /* + * Reference counting access wrappers to the perag structures. + */ +struct xfs_perag * +xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int ref = 0; + + spin_lock(&mp->m_perag_lock); + pag = radix_tree_lookup(&mp->m_perag_tree, agno); + if (pag) { + ASSERT(atomic_read(&pag->pag_ref) >= 0); + /* catch leaks in the positive direction during testing */ + ASSERT(atomic_read(&pag->pag_ref) < 1000); + ref = atomic_inc_return(&pag->pag_ref); + } + spin_unlock(&mp->m_perag_lock); + trace_xfs_perag_get(mp, agno, ref, _RET_IP_); + return pag; +} + +void +xfs_perag_put(struct xfs_perag *pag) +{ + int ref; + + ASSERT(atomic_read(&pag->pag_ref) > 0); + ref = atomic_dec_return(&pag->pag_ref); + trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); +} + +/* * Free up the resources associated with a mount structure. Assume that * the structure was initially zeroed, so we can tell which fields got * initialized. @@ -207,13 +241,16 @@ STATIC void xfs_free_perag( xfs_mount_t *mp) { - if (mp->m_perag) { - int agno; + xfs_agnumber_t agno; + struct xfs_perag *pag; - for (agno = 0; agno < mp->m_maxagi; agno++) - if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list); - kmem_free(mp->m_perag); + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, agno); + ASSERT(pag); + ASSERT(atomic_read(&pag->pag_ref) == 0); + spin_unlock(&mp->m_perag_lock); + kmem_free(pag); } } @@ -387,22 +424,57 @@ xfs_initialize_perag_icache( } } -xfs_agnumber_t +int xfs_initialize_perag( xfs_mount_t *mp, - xfs_agnumber_t agcount) + xfs_agnumber_t agcount, + xfs_agnumber_t *maxagi) { xfs_agnumber_t index, max_metadata; + xfs_agnumber_t first_initialised = 0; xfs_perag_t *pag; xfs_agino_t agino; xfs_ino_t ino; xfs_sb_t *sbp = &mp->m_sb; xfs_ino_t max_inum = XFS_MAXINUMBER_32; + int error = -ENOMEM; /* Check to see if the filesystem can overflow 32 bit inodes */ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); + /* + * Walk the current per-ag tree so we don't try to initialise AGs + * that already exist (growfs case). Allocate and insert all the + * AGs we don't find ready for initialisation. + */ + for (index = 0; index < agcount; index++) { + pag = xfs_perag_get(mp, index); + if (pag) { + xfs_perag_put(pag); + continue; + } + if (!first_initialised) + first_initialised = index; + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + if (!pag) + goto out_unwind; + if (radix_tree_preload(GFP_NOFS)) + goto out_unwind; + spin_lock(&mp->m_perag_lock); + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { + BUG(); + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + error = -EEXIST; + goto out_unwind; + } + pag->pag_agno = index; + pag->pag_mount = mp; + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + } + /* Clear the mount flag if no inode can overflow 32 bits * on this filesystem, or if specifically requested.. */ @@ -436,21 +508,33 @@ xfs_initialize_perag( } /* This ag is preferred for inodes */ - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; if (index < max_metadata) pag->pagf_metadata = 1; xfs_initialize_perag_icache(pag); + xfs_perag_put(pag); } } else { /* Setup default behavior for smaller filesystems */ for (index = 0; index < agcount; index++) { - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; xfs_initialize_perag_icache(pag); + xfs_perag_put(pag); } } - return index; + if (maxagi) + *maxagi = index; + return 0; + +out_unwind: + kmem_free(pag); + for (; index > first_initialised; index--) { + pag = radix_tree_delete(&mp->m_perag_tree, index); + kmem_free(pag); + } + return error; } void @@ -581,10 +665,10 @@ xfs_readsb(xfs_mount_t *mp, int flags) * access to the superblock. */ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; + extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; - bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), extra_flags); + bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), + extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { xfs_fs_mount_cmn_err(flags, "SB read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; @@ -624,8 +708,8 @@ xfs_readsb(xfs_mount_t *mp, int flags) XFS_BUF_UNMANAGE(bp); xfs_buf_relse(bp); sector_size = mp->m_sb.sb_sectsize; - bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), extra_flags); + bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, + BTOBB(sector_size), extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { xfs_fs_mount_cmn_err(flags, "SB re-read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; @@ -729,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) error = xfs_ialloc_pagi_init(mp, NULL, index); if (error) return error; - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); ifree += pag->pagi_freecount; ialloc += pag->pagi_count; bfree += pag->pagf_freeblks; bfreelst += pag->pagf_flcount; btree += pag->pagf_btreeblks; + xfs_perag_put(pag); } /* * Overwrite incore superblock counters with just-read data @@ -1006,6 +1091,22 @@ xfs_mount_reset_sbqflags( return xfs_trans_commit(tp, 0); } +__uint64_t +xfs_default_resblks(xfs_mount_t *mp) +{ + __uint64_t resblks; + + /* + * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. + * This may drive us straight to ENOSPC on mount, but that implies + * we were already there on the last unmount. Warn if this occurs. + */ + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 1024); + return resblks; +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -1150,13 +1251,13 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - init_rwsem(&mp->m_peraglock); - mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), - KM_MAYFAIL); - if (!mp->m_perag) + spin_lock_init(&mp->m_perag_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); + error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); + if (error) { + cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); goto out_remove_uuid; - - mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); + } if (!sbp->sb_logblocks) { cmn_err(CE_WARN, "XFS: no log defined"); @@ -1316,18 +1417,14 @@ xfs_mountfs( * when at ENOSPC. This is needed for operations like create with * attr, unwritten extent conversion at ENOSPC, etc. Data allocations * are not allowed to use this reserved space. - * - * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. - * This may drive us straight to ENOSPC on mount, but that implies - * we were already there on the last unmount. Warn if this occurs. */ - resblks = mp->m_sb.sb_dblocks; - do_div(resblks, 20); - resblks = min_t(__uint64_t, resblks, 1024); - error = xfs_reserve_blocks(mp, &resblks, NULL); - if (error) - cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " - "Continuing without a reserve pool."); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + resblks = xfs_default_resblks(mp); + error = xfs_reserve_blocks(mp, &resblks, NULL); + if (error) + cmn_err(CE_WARN, "XFS: Unable to allocate reserve " + "blocks. Continuing without a reserve pool."); + } return 0; @@ -1370,8 +1467,19 @@ xfs_unmountfs( * push out the iclog we will never get that unlocked. hence we * need to force the log first. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); + xfs_log_force(mp, XFS_LOG_SYNC); + + /* + * Do a delwri reclaim pass first so that as many dirty inodes are + * queued up for IO as possible. Then flush the buffers before making + * a synchronous path to catch all the remaining inodes are reclaimed. + * This makes the reclaim process as quick as possible by avoiding + * synchronous writeout and blocking on inodes already in the delwri + * state as much as possible. + */ + xfs_reclaim_inodes(mp, 0); + XFS_bflush(mp->m_ddev_targp); + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_qm_unmount(mp); @@ -1380,7 +1488,7 @@ xfs_unmountfs( * that nothing is pinned. This is important because bflush() * will skip pinned buffers. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); xfs_binval(mp->m_ddev_targp); if (mp->m_rtdev_targp) { @@ -1471,7 +1579,7 @@ xfs_log_sbcount( if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) return 0; - tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { @@ -1546,15 +1654,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); /* find modified range */ + f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); + ASSERT((1LL << f) & XFS_SB_MOD_BITS); + last = xfs_sb_info[f + 1].offset - 1; f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); ASSERT((1LL << f) & XFS_SB_MOD_BITS); first = xfs_sb_info[f].offset; - f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - last = xfs_sb_info[f + 1].offset - 1; - xfs_trans_log_buf(tp, bp, first, last); } @@ -1885,7 +1992,7 @@ xfs_getsb( ASSERT(mp->m_sb_bp != NULL); bp = mp->m_sb_bp; - if (flags & XFS_BUF_TRYLOCK) { + if (flags & XBF_TRYLOCK) { if (!XFS_BUF_CPSEMA(bp)) { return NULL; } @@ -2123,7 +2230,7 @@ xfs_icsb_destroy_counters( mutex_destroy(&mp->m_icsb_mutex); } -STATIC_INLINE void +STATIC void xfs_icsb_lock_cntr( xfs_icsb_cnts_t *icsbp) { @@ -2132,7 +2239,7 @@ xfs_icsb_lock_cntr( } } -STATIC_INLINE void +STATIC void xfs_icsb_unlock_cntr( xfs_icsb_cnts_t *icsbp) { @@ -2140,7 +2247,7 @@ xfs_icsb_unlock_cntr( } -STATIC_INLINE void +STATIC void xfs_icsb_lock_all_counters( xfs_mount_t *mp) { @@ -2153,7 +2260,7 @@ xfs_icsb_lock_all_counters( } } -STATIC_INLINE void +STATIC void xfs_icsb_unlock_all_counters( xfs_mount_t *mp) { @@ -2389,12 +2496,12 @@ xfs_icsb_modify_counters( { xfs_icsb_cnts_t *icsbp; long long lcounter; /* long counter for 64 bit fields */ - int cpu, ret = 0; + int ret = 0; might_sleep(); again: - cpu = get_cpu(); - icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); + preempt_disable(); + icsbp = this_cpu_ptr(mp->m_sb_cnts); /* * if the counter is disabled, go to slow path @@ -2438,11 +2545,11 @@ again: break; } xfs_icsb_unlock_cntr(icsbp); - put_cpu(); + preempt_enable(); return 0; slow_path: - put_cpu(); + preempt_enable(); /* * serialise with a mutex so we don't burn lots of cpu on @@ -2490,7 +2597,7 @@ slow_path: balance_counter: xfs_icsb_unlock_cntr(icsbp); - put_cpu(); + preempt_enable(); /* * We may have multiple threads here if multiple per-cpu diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a6c023bc0fb..70504fcf14c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -78,7 +78,8 @@ typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, struct xfs_inode *, dm_right_t, struct xfs_inode *, dm_right_t, - const char *, const char *, mode_t, int, int); + const unsigned char *, const unsigned char *, + mode_t, int, int); typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, char *, char *); typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, @@ -93,6 +94,9 @@ typedef struct xfs_dmops { xfs_send_unmount_t xfs_send_unmount; } xfs_dmops_t; +#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \ + (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED) + #define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) #define XFS_SEND_MMAP(mp, vma,fl) \ @@ -101,12 +105,24 @@ typedef struct xfs_dmops { (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) -#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ - (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) #define XFS_SEND_MOUNT(mp,right,path,name) \ (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) -#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ - (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) +#define XFS_SEND_PREUNMOUNT(mp) \ +do { \ + if (mp->m_flags & XFS_MOUNT_DMAPI) { \ + (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \ + (mp)->m_rootip, DM_RIGHT_NULL, \ + (mp)->m_rootip, DM_RIGHT_NULL, \ + NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \ + } \ +} while (0) +#define XFS_SEND_UNMOUNT(mp) \ +do { \ + if (mp->m_flags & XFS_MOUNT_DMAPI) { \ + (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \ + DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \ + } \ +} while (0) #ifdef HAVE_PERCPU_SB @@ -192,8 +208,8 @@ typedef struct xfs_mount { uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* max inobt btree levels. */ - struct xfs_perag *m_perag; /* per-ag accounting info */ - struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ + struct radix_tree_root m_perag_tree; /* per-ag accounting info */ + spinlock_t m_perag_lock; /* lock for m_perag_tree */ struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ @@ -209,6 +225,7 @@ typedef struct xfs_mount { __uint64_t m_maxioffset; /* maximum inode offset */ __uint64_t m_resblks; /* total reserved blocks */ __uint64_t m_resblks_avail;/* available reserved blocks */ + __uint64_t m_resblks_save; /* reserved blks @ remount,ro */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ int m_sinoalign; /* stripe unit inode alignment */ @@ -369,31 +386,22 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) } /* - * perag get/put wrappers for eventual ref counting + * perag get/put wrappers for ref counting */ -static inline xfs_perag_t * -xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) -{ - return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)]; -} - -static inline void -xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag) -{ - /* nothing to see here, move along */ -} +struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); +void xfs_perag_put(struct xfs_perag *pag); /* * Per-cpu superblock locking functions */ #ifdef HAVE_PERCPU_SB -STATIC_INLINE void +static inline void xfs_icsb_lock(xfs_mount_t *mp) { mutex_lock(&mp->m_icsb_mutex); } -STATIC_INLINE void +static inline void xfs_icsb_unlock(xfs_mount_t *mp) { mutex_unlock(&mp->m_icsb_mutex); @@ -413,6 +421,7 @@ typedef struct xfs_mod_sb { } xfs_mod_sb_t; extern int xfs_log_sbcount(xfs_mount_t *, uint); +extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); @@ -435,7 +444,8 @@ extern struct xfs_dmops xfs_dmcore_xfs; #endif /* __KERNEL__ */ extern void xfs_mod_sb(struct xfs_trans *, __int64_t); -extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); +extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, + xfs_agnumber_t *); extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4b0613d99fa..45ce15dc5b2 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -398,7 +398,7 @@ exit: * guaranteed that all the free functions for all the elements have finished * executing and the reaper is not running. */ -void +static void xfs_mru_cache_flush( xfs_mru_cache_t *mru) { diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 5d439f34b0c..36dd3ec8b4e 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -42,7 +42,6 @@ void xfs_mru_cache_uninit(void); int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func); -void xfs_mru_cache_flush(xfs_mru_cache_t *mru); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void *value); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 3ec91ac74c2..fdcab3f81dd 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -92,6 +92,14 @@ typedef struct xfs_dqblk { #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) +#define XFS_DQ_FLAGS \ + { XFS_DQ_USER, "USER" }, \ + { XFS_DQ_PROJ, "PROJ" }, \ + { XFS_DQ_GROUP, "GROUP" }, \ + { XFS_DQ_DIRTY, "DIRTY" }, \ + { XFS_DQ_WANT, "WANT" }, \ + { XFS_DQ_INACTIVE, "INACTIVE" } + /* * In the worst case, when both user and group quotas are on, * we can have a max of three dquots changing in a single transaction. @@ -215,16 +223,9 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_RES_INOS 0x0800000 /* - * flags for dqflush and dqflush_all. - */ -#define XFS_QMOPT_SYNC 0x1000000 -#define XFS_QMOPT_ASYNC 0x2000000 -#define XFS_QMOPT_DELWRI 0x4000000 - -/* * flags for dqalloc. */ -#define XFS_QMOPT_INHERIT 0x8000000 +#define XFS_QMOPT_INHERIT 0x1000000 /* * flags to xfs_trans_mod_dquot. diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index b81deea0ce1..fc1cda23b81 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -39,6 +39,7 @@ #include "xfs_utils.h" #include "xfs_trans_space.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" /* diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 385f6dceba5..6be05f756d5 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -45,6 +45,7 @@ #include "xfs_inode_item.h" #include "xfs_trans_space.h" #include "xfs_utils.h" +#include "xfs_trace.h" /* @@ -1516,6 +1517,8 @@ xfs_rtfree_range( */ error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, &postblock); + if (error) + return error; /* * If there are blocks not being freed at the front of the * old extent, add summary data for them to be allocated. diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 3f816ad7ff1..e336742a58a 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -44,48 +44,7 @@ #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_rw.h" - -/* - * This is a subroutine for xfs_write() and other writers (xfs_ioctl) - * which clears the setuid and setgid bits when a file is written. - */ -int -xfs_write_clear_setuid( - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - xfs_trans_t *tp; - int error; - - mp = ip->i_mount; - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - if ((error = xfs_trans_reserve(tp, 0, - XFS_WRITEID_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); - return error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - ip->i_d.di_mode &= ~S_ISUID; - - /* - * Note that we don't have to worry about mandatory - * file locking being disabled here because we only - * clear the S_ISGID bit if the Group execute bit is - * on, but if it was on then mandatory locking wouldn't - * have been enabled. - */ - if (ip->i_d.di_mode & S_IXGRP) { - ip->i_d.di_mode &= ~S_ISGID; - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} +#include "xfs_trace.h" /* * Force a shutdown of the filesystem instantly while keeping @@ -152,90 +111,6 @@ xfs_do_force_shutdown( } } - -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call biodone - * so that the proper iodone callbacks get called. - */ -int -xfs_bioerror( - xfs_buf_t *bp) -{ - -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - */ - xfs_buftrace("XFS IOERROR", bp); - XFS_BUF_ERROR(bp, EIO); - /* - * We're calling biodone, so delete B_DONE flag. Either way - * we have to call the iodone callback, and calling biodone - * probably is the best way since it takes care of - * GRIO as well. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - xfs_biodone(bp); - - return (EIO); -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the biodone call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -int -xfs_bioerror_relse( - xfs_buf_t *bp) -{ - int64_t fl; - - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); - - xfs_buftrace("XFS IOERRELSE", bp); - fl = XFS_BUF_BFLAGS(bp); - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - if (!(fl & XFS_B_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); - } else { - xfs_buf_relse(bp); - } - return (EIO); -} - /* * Prints out an ALERT message about I/O error. */ @@ -277,10 +152,10 @@ xfs_read_buf( xfs_buf_t *bp; int error; - if (flags) - bp = xfs_buf_read_flags(target, blkno, len, flags); - else - bp = xfs_buf_read(target, blkno, len, flags); + if (!flags) + flags = XBF_LOCK | XBF_MAPPED; + + bp = xfs_buf_read(target, blkno, len, flags); if (!bp) return XFS_ERROR(EIO); error = XFS_BUF_GETERROR(bp); @@ -307,32 +182,23 @@ xfs_read_buf( } /* - * Wrapper around bwrite() so that we can trap - * write errors, and act accordingly. + * helper function to extract extent size hint from inode */ -int -xfs_bwrite( - struct xfs_mount *mp, - struct xfs_buf *bp) +xfs_extlen_t +xfs_get_extsz_hint( + struct xfs_inode *ip) { - int error; + xfs_extlen_t extsz; - /* - * XXXsup how does this work for quotas. - */ - XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); - bp->b_mount = mp; - XFS_BUF_WRITE(bp); - - if ((error = XFS_bwrite(bp))) { - ASSERT(mp); - /* - * Cannot put a buftrace here since if the buffer is not - * B_HOLD then we will brelse() the buffer before returning - * from bwrite and we could be tracing a buffer that has - * been reused. - */ - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + if (unlikely(XFS_IS_REALTIME_INODE(ip))) { + extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) + ? ip->i_d.di_extsize + : ip->i_mount->m_sb.sb_rextsize; + ASSERT(extsz); + } else { + extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) + ? ip->i_d.di_extsize : 0; } - return (error); + + return extsz; } diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f5e4874c37d..11c41ec6ed7 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -37,44 +37,13 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) } /* - * Flags for xfs_free_eofblocks - */ -#define XFS_FREE_EOF_LOCK (1<<0) -#define XFS_FREE_EOF_NOLOCK (1<<1) - - -/* - * helper function to extract extent size hint from inode - */ -STATIC_INLINE xfs_extlen_t -xfs_get_extsz_hint( - xfs_inode_t *ip) -{ - xfs_extlen_t extsz; - - if (unlikely(XFS_IS_REALTIME_INODE(ip))) { - extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) - ? ip->i_d.di_extsize - : ip->i_mount->m_sb.sb_rextsize; - ASSERT(extsz); - } else { - extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) - ? ip->i_d.di_extsize : 0; - } - return extsz; -} - -/* * Prototypes for functions in xfs_rw.c. */ -extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); -extern int xfs_bioerror(struct xfs_buf *bp); -extern int xfs_bioerror_relse(struct xfs_buf *bp); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, xfs_daddr_t blkno, int len, uint flags, struct xfs_buf **bpp); extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, xfs_buf_t *bp, xfs_daddr_t blkno); +extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); #endif /* __XFS_RW_H__ */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 66b849358e6..be942d4e332 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -236,19 +236,20 @@ xfs_trans_alloc( uint type) { xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - return _xfs_trans_alloc(mp, type); + return _xfs_trans_alloc(mp, type, KM_SLEEP); } xfs_trans_t * _xfs_trans_alloc( xfs_mount_t *mp, - uint type) + uint type, + uint memflags) { xfs_trans_t *tp; atomic_inc(&mp->m_active_trans); - tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); + tp = kmem_zone_zalloc(xfs_trans_zone, memflags); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; tp->t_mountp = mp; @@ -980,9 +981,8 @@ shut_us_down: */ if (sync) { if (!error) { - error = _xfs_log_force(mp, commit_lsn, - XFS_LOG_FORCE | XFS_LOG_SYNC, - log_flushed); + error = _xfs_log_force_lsn(mp, commit_lsn, + XFS_LOG_SYNC, log_flushed); } XFS_STATS_INC(xs_trans_sync); } else { @@ -1120,7 +1120,7 @@ xfs_trans_fill_vecs( tp->t_header.th_num_items = nitems; log_vector->i_addr = (xfs_caddr_t)&tp->t_header; log_vector->i_len = sizeof(xfs_trans_header_t); - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); + log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ed47fc77759..c93e3a10285 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -100,6 +100,49 @@ typedef struct xfs_trans_header { #define XFS_TRANS_TYPE_MAX 41 /* new transaction types need to be reflected in xfs_logprint(8) */ +#define XFS_TRANS_TYPES \ + { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ + { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ + { XFS_TRANS_INACTIVE, "INACTIVE" }, \ + { XFS_TRANS_CREATE, "CREATE" }, \ + { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ + { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ + { XFS_TRANS_REMOVE, "REMOVE" }, \ + { XFS_TRANS_LINK, "LINK" }, \ + { XFS_TRANS_RENAME, "RENAME" }, \ + { XFS_TRANS_MKDIR, "MKDIR" }, \ + { XFS_TRANS_RMDIR, "RMDIR" }, \ + { XFS_TRANS_SYMLINK, "SYMLINK" }, \ + { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ + { XFS_TRANS_GROWFS, "GROWFS" }, \ + { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ + { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ + { XFS_TRANS_WRITEID, "WRITEID" }, \ + { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ + { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ + { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ + { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ + { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ + { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ + { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ + { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ + { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ + { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ + { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ + { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ + { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ + { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ + { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ + { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ + { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ + { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ + { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ + { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ + { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ + { XFS_TRANS_DUMMY1, "DUMMY1" }, \ + { XFS_TRANS_DUMMY2, "DUMMY2" }, \ + { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } + /* * This structure is used to track log items associated with * a transaction. It points to the log item and keeps some @@ -782,6 +825,10 @@ typedef struct xfs_log_item { #define XFS_LI_IN_AIL 0x1 #define XFS_LI_ABORTED 0x2 +#define XFS_LI_FLAGS \ + { XFS_LI_IN_AIL, "IN_AIL" }, \ + { XFS_LI_ABORTED, "ABORTED" } + typedef struct xfs_item_ops { uint (*iop_size)(xfs_log_item_t *); void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); @@ -814,8 +861,7 @@ typedef struct xfs_item_ops { #define XFS_ITEM_SUCCESS 0 #define XFS_ITEM_PINNED 1 #define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_FLUSHING 3 -#define XFS_ITEM_PUSHBUF 4 +#define XFS_ITEM_PUSHBUF 3 /* * This structure is used to maintain a list of block ranges that have been @@ -924,7 +970,7 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces. */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2ffc570679b..e799824f724 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -237,14 +237,15 @@ out: } /* - * Function that does the work of pushing on the AIL + * xfsaild_push does the work of pushing on the AIL. Returning a timeout of + * zero indicates that the caller should sleep until woken. */ long xfsaild_push( struct xfs_ail *ailp, xfs_lsn_t *last_lsn) { - long tout = 1000; /* milliseconds */ + long tout = 0; xfs_lsn_t last_pushed_lsn = *last_lsn; xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; @@ -252,6 +253,7 @@ xfsaild_push( int flush_log, count, stuck; xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); @@ -262,7 +264,7 @@ xfsaild_push( */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - last_pushed_lsn = 0; + *last_lsn = 0; return tout; } @@ -279,7 +281,6 @@ xfsaild_push( * prevents use from spinning when we can't do anything or there is * lots of contention on the AIL lists. */ - tout = 10; lsn = lip->li_lsn; flush_log = stuck = count = 0; while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { @@ -308,6 +309,7 @@ xfsaild_push( XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); last_pushed_lsn = lsn; + push_xfsbufd = 1; break; case XFS_ITEM_PINNED: @@ -322,12 +324,6 @@ xfsaild_push( stuck++; break; - case XFS_ITEM_FLUSHING: - XFS_STATS_INC(xs_push_ail_flushing); - last_pushed_lsn = lsn; - stuck++; - break; - default: ASSERT(0); break; @@ -371,19 +367,24 @@ xfsaild_push( * move forward in the AIL. */ XFS_STATS_INC(xs_push_ail_flush); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); + } + + if (push_xfsbufd) { + /* we've got delayed write buffers to flush */ + wake_up_process(mp->m_ddev_targp->bt_task); } if (!count) { /* We're past our target or empty, so idle */ - tout = 1000; + last_pushed_lsn = 0; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* * We reached the target so wait a bit longer for I/O to * complete and remove pushed items from the AIL before we * start the next scan from the start of the AIL. */ - tout += 20; + tout = 50; last_pushed_lsn = 0; } else if ((stuck * 100) / count > 90) { /* @@ -395,11 +396,14 @@ xfsaild_push( * Backoff a bit more to allow some I/O to complete before * continuing from where we were. */ - tout += 10; + tout = 20; + } else { + /* more to do, but wait a short while before continuing */ + tout = 10; } *last_lsn = last_pushed_lsn; return tout; -} /* xfsaild_push */ +} /* diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 218829e6a15..5ffd544434e 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -38,6 +38,7 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_rw.h" +#include "xfs_trace.h" STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, @@ -74,16 +75,14 @@ xfs_trans_get_buf(xfs_trans_t *tp, xfs_buf_log_item_t *bip; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ - if (tp == NULL) { - bp = xfs_buf_get_flags(target_dev, blkno, len, - flags | BUF_BUSY); - return(bp); - } + if (tp == NULL) + return xfs_buf_get(target_dev, blkno, len, + flags | XBF_DONT_BLOCK); /* * If we find the buffer in the cache with this transaction @@ -98,38 +97,35 @@ xfs_trans_get_buf(xfs_trans_t *tp, } if (bp != NULL) { ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); - if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { - xfs_buftrace("TRANS GET RECUR SHUT", bp); + if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) XFS_BUF_SUPER_STALE(bp); - } + /* * If the buffer is stale then it was binval'ed * since last read. This doesn't matter since the * caller isn't allowed to use the data anyway. */ - else if (XFS_BUF_ISSTALE(bp)) { - xfs_buftrace("TRANS GET RECUR STALE", bp); + else if (XFS_BUF_ISSTALE(bp)) ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); - } + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; - xfs_buftrace("TRANS GET RECUR", bp); - xfs_buf_item_trace("GET RECUR", bip); + trace_xfs_trans_get_buf_recur(bip); return (bp); } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { return NULL; } @@ -169,8 +165,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, */ XFS_BUF_SET_FSPRIVATE2(bp, tp); - xfs_buftrace("TRANS GET", bp); - xfs_buf_item_trace("GET", bip); + trace_xfs_trans_get_buf(bip); return (bp); } @@ -210,7 +205,7 @@ xfs_trans_getsb(xfs_trans_t *tp, ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; - xfs_buf_item_trace("GETSB RECUR", bip); + trace_xfs_trans_getsb_recur(bip); return (bp); } @@ -252,7 +247,7 @@ xfs_trans_getsb(xfs_trans_t *tp, */ XFS_BUF_SET_FSPRIVATE2(bp, tp); - xfs_buf_item_trace("GETSB", bip); + trace_xfs_trans_getsb(bip); return (bp); } @@ -296,15 +291,15 @@ xfs_trans_read_buf( int error; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) { - bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (!bp) - return (flags & XFS_BUF_TRYLOCK) ? + return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); if (XFS_BUF_GETERROR(bp) != 0) { @@ -350,7 +345,7 @@ xfs_trans_read_buf( ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT((XFS_BUF_ISERROR(bp)) == 0); if (!(XFS_BUF_ISDONE(bp))) { - xfs_buftrace("READ_BUF_INCORE !DONE", bp); + trace_xfs_trans_read_buf_io(bp, _RET_IP_); ASSERT(!XFS_BUF_ISASYNC(bp)); XFS_BUF_READ(bp); xfsbdstrat(tp->t_mountp, bp); @@ -375,7 +370,7 @@ xfs_trans_read_buf( * brelse it either. Just get out. */ if (XFS_FORCED_SHUTDOWN(mp)) { - xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); + trace_xfs_trans_read_buf_shut(bp, _RET_IP_); *bpp = NULL; return XFS_ERROR(EIO); } @@ -385,27 +380,26 @@ xfs_trans_read_buf( bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); - xfs_buf_item_trace("READ RECUR", bip); + trace_xfs_trans_read_buf_recur(bip); *bpp = bp; return 0; } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { *bpp = NULL; return 0; } if (XFS_BUF_GETERROR(bp) != 0) { XFS_BUF_SUPER_STALE(bp); - xfs_buftrace("READ ERROR", bp); error = XFS_BUF_GETERROR(bp); xfs_ioerror_alert("xfs_trans_read_buf", mp, @@ -464,8 +458,7 @@ xfs_trans_read_buf( */ XFS_BUF_SET_FSPRIVATE2(bp, tp); - xfs_buftrace("TRANS READ", bp); - xfs_buf_item_trace("READ", bip); + trace_xfs_trans_read_buf(bip); *bpp = bp; return 0; @@ -480,10 +473,10 @@ shutdown_abort: if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); #endif - ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != - (XFS_B_STALE|XFS_B_DELWRI)); + ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != + (XBF_STALE|XBF_DELWRI)); - xfs_buftrace("READ_BUF XFSSHUTDN", bp); + trace_xfs_trans_read_buf_shut(bp, _RET_IP_); xfs_buf_relse(bp); *bpp = NULL; return XFS_ERROR(EIO); @@ -549,13 +542,14 @@ xfs_trans_brelse(xfs_trans_t *tp, lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); ASSERT(lidp != NULL); + trace_xfs_trans_brelse(bip); + /* * If the release is just for a recursive lock, * then decrement the count and return. */ if (bip->bli_recur > 0) { bip->bli_recur--; - xfs_buf_item_trace("RELSE RECUR", bip); return; } @@ -563,10 +557,8 @@ xfs_trans_brelse(xfs_trans_t *tp, * If the buffer is dirty within this transaction, we can't * release it until we commit. */ - if (lidp->lid_flags & XFS_LID_DIRTY) { - xfs_buf_item_trace("RELSE DIRTY", bip); + if (lidp->lid_flags & XFS_LID_DIRTY) return; - } /* * If the buffer has been invalidated, then we can't release @@ -574,13 +566,10 @@ xfs_trans_brelse(xfs_trans_t *tp, * as part of this transaction. This prevents us from pulling * the item from the AIL before we should. */ - if (bip->bli_flags & XFS_BLI_STALE) { - xfs_buf_item_trace("RELSE STALE", bip); + if (bip->bli_flags & XFS_BLI_STALE) return; - } ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); - xfs_buf_item_trace("RELSE", bip); /* * Free up the log item descriptor tracking the released item. @@ -677,7 +666,7 @@ xfs_trans_bjoin(xfs_trans_t *tp, */ XFS_BUF_SET_FSPRIVATE2(bp, tp); - xfs_buf_item_trace("BJOIN", bip); + trace_xfs_trans_bjoin(bip); } /* @@ -701,7 +690,7 @@ xfs_trans_bhold(xfs_trans_t *tp, ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_HOLD; - xfs_buf_item_trace("BHOLD", bip); + trace_xfs_trans_bhold(bip); } /* @@ -724,7 +713,8 @@ xfs_trans_bhold_release(xfs_trans_t *tp, ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); bip->bli_flags &= ~XFS_BLI_HOLD; - xfs_buf_item_trace("BHOLD RELEASE", bip); + + trace_xfs_trans_bhold_release(bip); } /* @@ -770,6 +760,8 @@ xfs_trans_log_buf(xfs_trans_t *tp, XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; + trace_xfs_trans_log_buf(bip); + /* * If we invalidated the buffer within this transaction, then * cancel the invalidation now that we're dirtying the buffer @@ -777,7 +769,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, * because we have a reference to the buffer this entire time. */ if (bip->bli_flags & XFS_BLI_STALE) { - xfs_buf_item_trace("BLOG UNSTALE", bip); bip->bli_flags &= ~XFS_BLI_STALE; ASSERT(XFS_BUF_ISSTALE(bp)); XFS_BUF_UNSTALE(bp); @@ -792,7 +783,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, lidp->lid_flags &= ~XFS_LID_BUF_STALE; bip->bli_flags |= XFS_BLI_LOGGED; xfs_buf_item_log(bip, first, last); - xfs_buf_item_trace("BLOG", bip); } @@ -831,6 +821,8 @@ xfs_trans_binval( ASSERT(lidp != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); + trace_xfs_trans_binval(bip); + if (bip->bli_flags & XFS_BLI_STALE) { /* * If the buffer is already invalidated, then @@ -843,8 +835,6 @@ xfs_trans_binval( ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); ASSERT(lidp->lid_flags & XFS_LID_DIRTY); ASSERT(tp->t_flags & XFS_TRANS_DIRTY); - xfs_buftrace("XFS_BINVAL RECUR", bp); - xfs_buf_item_trace("BINVAL RECUR", bip); return; } @@ -878,8 +868,6 @@ xfs_trans_binval( (bip->bli_format.blf_map_size * sizeof(uint))); lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; tp->t_flags |= XFS_TRANS_DIRTY; - xfs_buftrace("XFS_BINVAL", bp); - xfs_buf_item_trace("BINVAL", bip); } /* diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index d725428c9df..b09904555d0 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -151,8 +151,8 @@ typedef enum { } xfs_btnum_t; struct xfs_name { - const char *name; - int len; + const unsigned char *name; + int len; }; #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b572f7e840e..ddd2c5d1b85 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -53,6 +53,7 @@ #include "xfs_log_priv.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" +#include "xfs_trace.h" int xfs_setattr( @@ -69,7 +70,6 @@ xfs_setattr( uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; - int timeflags = 0; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; int need_iolock = 1; @@ -134,16 +134,13 @@ xfs_setattr( if (flags & XFS_ATTR_NOLOCK) need_iolock = 0; if (!(mask & ATTR_SIZE)) { - if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || - (mp->m_flags & XFS_MOUNT_WSYNC)) { - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - commit_flags = 0; - if ((code = xfs_trans_reserve(tp, 0, - XFS_ICHANGE_LOG_RES(mp), 0, - 0, 0))) { - lock_flags = 0; - goto error_return; - } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + commit_flags = 0; + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), + 0, 0, 0); + if (code) { + lock_flags = 0; + goto error_return; } } else { if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && @@ -259,7 +256,7 @@ xfs_setattr( iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XFS_B_ASYNC, FI_NONE); + XBF_ASYNC, FI_NONE); } /* wait for all I/O to complete */ @@ -294,15 +291,23 @@ xfs_setattr( * or we are explicitly asked to change it. This handles * the semantic difference between truncate() and ftruncate() * as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME + * is a special case where we need to update the times despite + * not having these flags set. For all other operations the + * VFS set these flags explicitly if it wants a timestamp + * update. */ - if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) - timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } if (iattr->ia_size > ip->i_size) { ip->i_d.di_size = iattr->ia_size; ip->i_size = iattr->ia_size; - if (!(flags & XFS_ATTR_DMI)) - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } else if (iattr->ia_size <= ip->i_size || (iattr->ia_size == 0 && ip->i_d.di_nextents)) { @@ -373,9 +378,6 @@ xfs_setattr( ip->i_d.di_gid = gid; inode->i_gid = gid; } - - xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; } /* @@ -392,51 +394,37 @@ xfs_setattr( inode->i_mode &= S_IFMT; inode->i_mode |= mode & ~S_IFMT; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; } /* * Change file access or modified times. */ - if (mask & (ATTR_ATIME|ATTR_MTIME)) { - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - timeflags &= ~XFS_ICHGTIME_MOD; - timeflags |= XFS_ICHGTIME_CHG; - } - if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET))) - xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; } - - /* - * Change file inode change time only if ATTR_CTIME set - * AND we have been called by a DMI function. - */ - - if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) { + if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; ip->i_update_core = 1; - timeflags &= ~XFS_ICHGTIME_CHG; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; } /* - * Send out timestamp changes that need to be set to the - * current time. Not done when called by a DMI function. + * And finally, log the inode core if any attribute in it + * has been changed. */ - if (timeflags && !(flags & XFS_ATTR_DMI)) - xfs_ichgtime(ip, timeflags); + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE| + ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); @@ -451,12 +439,10 @@ xfs_setattr( * mix so this probably isn't worth the trouble to optimize. */ code = 0; - if (tp) { - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, commit_flags); - } + code = xfs_trans_commit(tp, commit_flags); xfs_iunlock(ip, lock_flags); @@ -538,9 +524,8 @@ xfs_readlink_bmap( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), - XBF_LOCK | XBF_MAPPED | - XBF_DONT_BLOCK); + bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), + XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); error = XFS_BUF_GETERROR(bp); if (error) { xfs_ioerror_alert("xfs_readlink", @@ -612,7 +597,7 @@ xfs_fsync( { xfs_trans_t *tp; int error = 0; - int log_flushed = 0, changed = 1; + int log_flushed = 0; xfs_itrace_entry(ip); @@ -642,19 +627,16 @@ xfs_fsync( * disk yet, the inode will be still be pinned. If it is, * force the log. */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { - error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | XFS_LOG_SYNC, - &log_flushed); - } else { - /* - * If the inode is not pinned and nothing has changed - * we don't need to flush the cache. - */ - changed = 0; + if (ip->i_itemp->ili_last_lsn) { + error = _xfs_log_force_lsn(ip->i_mount, + ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, &log_flushed); + } else { + error = _xfs_log_force(ip->i_mount, + XFS_LOG_SYNC, &log_flushed); + } } } else { /* @@ -689,7 +671,7 @@ xfs_fsync( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { + if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. @@ -709,6 +691,11 @@ xfs_fsync( } /* + * Flags for xfs_free_eofblocks + */ +#define XFS_FREE_EOF_TRYLOCK (1<<0) + +/* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. @@ -726,7 +713,6 @@ xfs_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; - int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end @@ -768,14 +754,19 @@ xfs_free_eofblocks( * cache and we can't * do that within a transaction. */ - if (use_iolock) + if (flags & XFS_FREE_EOF_TRYLOCK) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + xfs_trans_cancel(tp, 0); + return 0; + } + } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); + } error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size); if (error) { xfs_trans_cancel(tp, 0); - if (use_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -812,8 +803,7 @@ xfs_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) - : XFS_ILOCK_EXCL)); + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); } return error; } @@ -1103,7 +1093,7 @@ xfs_release( */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); } if (ip->i_d.di_nlink != 0) { @@ -1113,7 +1103,17 @@ xfs_release( (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + + /* + * If we can't get the iolock just skip truncating + * the blocks past EOF because we could deadlock + * with the mmap_sem otherwise. We'll get another + * chance to drop them once the last reference to + * the inode is dropped, so we'll never leak blocks + * permanently. + */ + error = xfs_free_eofblocks(mp, ip, + XFS_FREE_EOF_TRYLOCK); if (error) return error; } @@ -1184,7 +1184,7 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + error = xfs_free_eofblocks(mp, ip, 0); if (error) return VN_INACTIVE_CACHE; } @@ -1380,7 +1380,6 @@ xfs_lookup( if (error) goto out_free_name; - xfs_itrace_ref(*ipp); return 0; out_free_name: @@ -1526,7 +1525,6 @@ xfs_create( * At this point, we've gotten a newly allocated inode. * It is locked (and joined to the transaction). */ - xfs_itrace_ref(ip); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* @@ -1986,9 +1984,6 @@ xfs_remove( if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); - xfs_itrace_exit(ip); - xfs_itrace_exit(dp); - std_return: if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, @@ -2201,7 +2196,8 @@ xfs_symlink( if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - link_name->name, target_path, 0, 0, 0); + link_name->name, + (unsigned char *)target_path, 0, 0, 0); if (error) return error; } @@ -2285,7 +2281,6 @@ xfs_symlink( goto error_return; goto error1; } - xfs_itrace_ref(ip); /* * An error after we've joined dp to the transaction will result in the @@ -2398,7 +2393,8 @@ std_return: dp, DM_RIGHT_NULL, error ? NULL : ip, DM_RIGHT_NULL, link_name->name, - target_path, 0, error, 0); + (unsigned char *)target_path, + 0, error, 0); } if (!error) @@ -2456,46 +2452,6 @@ xfs_set_dmattrs( return error; } -int -xfs_reclaim( - xfs_inode_t *ip) -{ - - xfs_itrace_entry(ip); - - ASSERT(!VN_MAPPED(VFS_I(ip))); - - /* bad inode, get out here ASAP */ - if (is_bad_inode(VFS_I(ip))) { - xfs_ireclaim(ip); - return 0; - } - - xfs_ioend_wait(ip); - - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - - /* - * If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode and the - * linux inode and clean up the xfs inode later. This avoids flushing - * the inode to disk during the delete operation itself. - * - * When breaking the link, we need to set the XFS_IRECLAIMABLE flag - * first to ensure that xfs_iunpin() will never see an xfs inode - * that has a linux inode being reclaimed. Synchronisation is provided - * by the i_flags_lock. - */ - if (!ip->i_update_core && (ip->i_itemp == NULL)) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - xfs_iflags_set(ip, XFS_IRECLAIMABLE); - return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); - } - xfs_inode_set_reclaim_tag(ip); - return 0; -} - /* * xfs_alloc_file_space() * This routine allocates disk space for the given file. @@ -2868,7 +2824,6 @@ xfs_free_file_space( ioffset = offset & ~(rounding - 1); if (VN_CACHED(VFS_I(ip)) != 0) { - xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_iolock; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index a9e102de71a..774f40729ca 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -38,17 +38,16 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, mode_t mode, struct xfs_inode **ipp, cred_t *credp); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); -int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); -int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, - int *valuelenp, int flags); -int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, - int valuelen, int flags); -int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); +int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, + unsigned char *value, int *valuelenp, int flags); +int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, + unsigned char *value, int valuelen, int flags); +int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, |