From dd8a306ac0c918268bd2ae89da2dea627f6e352d Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 10 Nov 2005 07:50:03 -0600
Subject: JFS: Add back directory i_size calculations for legacy partitions

Linux-formatted jfs partitions have a different idea about what i_size
represents than partitions formatted on OS/2.  The i_size calculation is
now based on the size of the directory index.  For legacy partitions, which
have no directory index, the i_size is never being updated.

This patch adds back the original i_size calculations for legacy partitions.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dtree.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 404f33eae50..6c3f0831984 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -1005,6 +1005,9 @@ static int dtSplitUp(tid_t tid,
 
 		DT_PUTPAGE(smp);
 
+		if (!DO_INDEX(ip))
+			ip->i_size = xlen << sbi->l2bsize;
+
 		goto freeKeyName;
 	}
 
@@ -1055,7 +1058,9 @@ static int dtSplitUp(tid_t tid,
 				xaddr = addressPXD(pxd) + xlen;
 				dbFree(ip, xaddr, (s64) n);
 			}
-		}
+		} else if (!DO_INDEX(ip))
+			ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
+
 
 	      extendOut:
 		DT_PUTPAGE(smp);
@@ -1098,6 +1103,9 @@ static int dtSplitUp(tid_t tid,
 		goto splitOut;
 	}
 
+	if (!DO_INDEX(ip))
+		ip->i_size += PSIZE;
+
 	/*
 	 * propagate up the router entry for the leaf page just split
 	 *
@@ -2424,6 +2432,9 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 		break;
 	}
 
+	if (!DO_INDEX(ip))
+		ip->i_size -= PSIZE;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4d5dbd0945d9e0833dd7964a3d6ee33157f7cc7a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 29 Nov 2005 08:28:58 -0600
Subject: JFS: make buddy table static

Idea is to reduce false cacheline sharing and stuff

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb..adb9f05093b 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -125,7 +125,7 @@ static int dbGetL2AGSize(s64 nblocks);
  * into the table, with the table elements yielding the maximum
  * binary buddy of free bits within the character.
  */
-static s8 budtab[256] = {
+static const s8 budtab[256] = {
 	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-- 
cgit v1.2.3


From 1de87444f8f91009b726108c9a56600645ee8751 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 24 Jan 2006 15:22:50 -0600
Subject: JFS: semaphore to mutex conversion.

the conversion was generated via scripts, and the result was validated
automatically via a script as well.

build and boot tested.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c        |  4 +--
 fs/jfs/inode.c      | 14 ++++----
 fs/jfs/jfs_dmap.c   |  6 ++--
 fs/jfs/jfs_dmap.h   |  2 +-
 fs/jfs/jfs_extent.c | 20 +++++------
 fs/jfs/jfs_imap.c   | 22 ++++++------
 fs/jfs/jfs_imap.h   |  4 +--
 fs/jfs/jfs_incore.h |  5 +--
 fs/jfs/jfs_lock.h   |  1 +
 fs/jfs/jfs_logmgr.c |  6 ++--
 fs/jfs/jfs_logmgr.h |  2 +-
 fs/jfs/jfs_txnmgr.c | 10 +++---
 fs/jfs/namei.c      | 98 ++++++++++++++++++++++++++---------------------------
 fs/jfs/super.c      |  2 +-
 fs/jfs/xattr.c      |  8 ++---
 15 files changed, 103 insertions(+), 101 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 461e4934ca7..e0b6fdab200 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -207,12 +207,12 @@ static int jfs_acl_chmod(struct inode *inode)
 	rc = posix_acl_chmod_masq(clone, inode->i_mode);
 	if (!rc) {
 		tid_t tid = txBegin(inode->i_sb, 0);
-		down(&JFS_IP(inode)->commit_sem);
+		mutex_lock(&JFS_IP(inode)->commit_mutex);
 		rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
 		if (!rc)
 			rc = txCommit(tid, 1, &inode, 0);
 		txEnd(tid);
-		up(&JFS_IP(inode)->commit_sem);
+		mutex_unlock(&JFS_IP(inode)->commit_mutex);
 	}
 
 	posix_acl_release(clone);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9f942ca8e4e..d7834a9117c 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -89,16 +89,16 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	}
 
 	tid = txBegin(inode->i_sb, COMMIT_INODE);
-	down(&JFS_IP(inode)->commit_sem);
+	mutex_lock(&JFS_IP(inode)->commit_mutex);
 
 	/*
-	 * Retest inode state after taking commit_sem
+	 * Retest inode state after taking commit_mutex
 	 */
 	if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode))
 		rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
 
 	txEnd(tid);
-	up(&JFS_IP(inode)->commit_sem);
+	mutex_unlock(&JFS_IP(inode)->commit_mutex);
 	return rc;
 }
 
@@ -335,18 +335,18 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 		tid = txBegin(ip->i_sb, 0);
 
 		/*
-		 * The commit_sem cannot be taken before txBegin.
+		 * The commit_mutex cannot be taken before txBegin.
 		 * txBegin may block and there is a chance the inode
 		 * could be marked dirty and need to be committed
 		 * before txBegin unblocks
 		 */
-		down(&JFS_IP(ip)->commit_sem);
+		mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 		newsize = xtTruncate(tid, ip, length,
 				     COMMIT_TRUNCATE | COMMIT_PWMAP);
 		if (newsize < 0) {
 			txEnd(tid);
-			up(&JFS_IP(ip)->commit_sem);
+			mutex_unlock(&JFS_IP(ip)->commit_mutex);
 			break;
 		}
 
@@ -355,7 +355,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 
 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	} while (newsize > length);	/* Truncate isn't always atomic */
 }
 
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 038d8b76d11..4fb3ed18492 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -64,9 +64,9 @@
  *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
  */
 
-#define BMAP_LOCK_INIT(bmp)	init_MUTEX(&bmp->db_bmaplock)
-#define BMAP_LOCK(bmp)		down(&bmp->db_bmaplock)
-#define BMAP_UNLOCK(bmp)	up(&bmp->db_bmaplock)
+#define BMAP_LOCK_INIT(bmp)	mutex_init(&bmp->db_bmaplock)
+#define BMAP_LOCK(bmp)		mutex_lock(&bmp->db_bmaplock)
+#define BMAP_UNLOCK(bmp)	mutex_unlock(&bmp->db_bmaplock)
 
 /*
  * forward references
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 32e25884e7e..8b14cc8e022 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -243,7 +243,7 @@ struct dbmap {
 struct bmap {
 	struct dbmap db_bmap;		/* on-disk aggregate map descriptor */
 	struct inode *db_ipbmap;	/* ptr to aggregate map incore inode */
-	struct semaphore db_bmaplock;	/* aggregate map lock */
+	struct mutex db_bmaplock;	/* aggregate map lock */
 	atomic_t db_active[MAXAG];	/* count of active, open files in AG */
 	u32 *db_DBmap;
 };
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 4879603daa1..5549378358b 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -94,7 +94,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	txBeginAnon(ip->i_sb);
 
 	/* Avoid race with jfs_commit_inode() */
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/* validate extent length */
 	if (xlen > MAXXLEN)
@@ -136,14 +136,14 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 */
 	nxlen = xlen;
 	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
 
 	/* Allocate blocks to quota. */
 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
 		dbFree(ip, nxaddr, (s64) nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return -EDQUOT;
 	}
 
@@ -165,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
 		DQUOT_FREE_BLOCK(ip, nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
 
@@ -177,7 +177,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 
 	mark_inode_dirty(ip);
 
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	/*
 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
 	 * sync list.
@@ -222,7 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);
 
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 	/* validate extent length */
 	if (nxlen > MAXXLEN)
 		nxlen = MAXXLEN;
@@ -258,7 +258,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* Allocat blocks to quota. */
 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
 		dbFree(ip, nxaddr, (s64) nxlen);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return -EDQUOT;
 	}
 
@@ -338,7 +338,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 
 	mark_inode_dirty(ip);
 exit:
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	return (rc);
 }
 #endif			/* _NOTYET */
@@ -439,12 +439,12 @@ int extRecord(struct inode *ip, xad_t * xp)
 
 	txBeginAnon(ip->i_sb);
 
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/* update the extent */
 	rc = xtUpdate(0, ip, xp);
 
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	return rc;
 }
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 31b4aa13dd4..87dd86c34c2 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -66,14 +66,14 @@ static HLIST_HEAD(aggregate_hash);
  * imap locks
  */
 /* iag free list lock */
-#define IAGFREE_LOCK_INIT(imap)		init_MUTEX(&imap->im_freelock)
-#define IAGFREE_LOCK(imap)		down(&imap->im_freelock)
-#define IAGFREE_UNLOCK(imap)		up(&imap->im_freelock)
+#define IAGFREE_LOCK_INIT(imap)		mutex_init(&imap->im_freelock)
+#define IAGFREE_LOCK(imap)		mutex_lock(&imap->im_freelock)
+#define IAGFREE_UNLOCK(imap)		mutex_unlock(&imap->im_freelock)
 
 /* per ag iag list locks */
-#define AG_LOCK_INIT(imap,index)	init_MUTEX(&(imap->im_aglock[index]))
-#define AG_LOCK(imap,agno)		down(&imap->im_aglock[agno])
-#define AG_UNLOCK(imap,agno)		up(&imap->im_aglock[agno])
+#define AG_LOCK_INIT(imap,index)	mutex_init(&(imap->im_aglock[index]))
+#define AG_LOCK(imap,agno)		mutex_lock(&imap->im_aglock[agno])
+#define AG_UNLOCK(imap,agno)		mutex_unlock(&imap->im_aglock[agno])
 
 /*
  * forward references
@@ -1261,7 +1261,7 @@ int diFree(struct inode *ip)
 	 * to be freed by the transaction;  
 	 */
 	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
-	down(&JFS_IP(ipimap)->commit_sem);
+	mutex_lock(&JFS_IP(ipimap)->commit_mutex);
 
 	/* acquire tlock of the iag page of the freed ixad 
 	 * to force the page NOHOMEOK (even though no data is
@@ -1294,7 +1294,7 @@ int diFree(struct inode *ip)
 	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 	txEnd(tid);
-	up(&JFS_IP(ipimap)->commit_sem);
+	mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 	/* unlock the AG inode map information */
 	AG_UNLOCK(imap, agno);
@@ -2554,13 +2554,13 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 		 * addressing structure pointing to the new iag page;
 		 */
 		tid = txBegin(sb, COMMIT_FORCE);
-		down(&JFS_IP(ipimap)->commit_sem);
+		mutex_lock(&JFS_IP(ipimap)->commit_mutex);
 
 		/* update the inode map addressing structure to point to it */
 		if ((rc =
 		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
 			txEnd(tid);
-			up(&JFS_IP(ipimap)->commit_sem);
+			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 			/* Free the blocks allocated for the iag since it was
 			 * not successfully added to the inode map
 			 */
@@ -2626,7 +2626,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 		txEnd(tid);
-		up(&JFS_IP(ipimap)->commit_sem);
+		mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 		duplicateIXtree(sb, blkno, xlen, &xaddr);
 
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 6b59adec036..6e24465f0f9 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -140,8 +140,8 @@ struct dinomap {
 struct inomap {
 	struct dinomap im_imap;		/* 4096: inode allocation control */
 	struct inode *im_ipimap;	/* 4: ptr to inode for imap   */
-	struct semaphore im_freelock;	/* 4: iag free list lock      */
-	struct semaphore im_aglock[MAXAG];	/* 512: per AG locks          */
+	struct mutex im_freelock;	/* 4: iag free list lock      */
+	struct mutex im_aglock[MAXAG];	/* 512: per AG locks          */
 	u32 *im_DBGdimap;
 	atomic_t im_numinos;	/* num of backed inodes */
 	atomic_t im_numfree;	/* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index dc21a5bd54d..a97ead889a6 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -19,6 +19,7 @@
 #ifndef _H_JFS_INCORE
 #define _H_JFS_INCORE
 
+#include <linux/mutex.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -62,12 +63,12 @@ struct jfs_inode_info {
 	 */
 	struct rw_semaphore rdwrlock;
 	/*
-	 * commit_sem serializes transaction processing on an inode.
+	 * commit_mutex serializes transaction processing on an inode.
 	 * It must be taken after beginning a transaction (txBegin), since
 	 * dirty inodes may be committed while a new transaction on the
 	 * inode is blocked in txBegin or TxBeginAnon
 	 */
-	struct semaphore commit_sem;
+	struct mutex commit_mutex;
 	/* xattr_sem allows us to access the xattrs without taking i_mutex */
 	struct rw_semaphore xattr_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index 10ad1d08668..70ac9f7d1e0 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -20,6 +20,7 @@
 #define _H_JFS_LOCK
 
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/sched.h>
 
 /*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index d27bac6acaa..06bded6c12b 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -87,9 +87,9 @@ DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
 /*
  *	log read/write serialization (per log)
  */
-#define LOG_LOCK_INIT(log)	init_MUTEX(&(log)->loglock)
-#define LOG_LOCK(log)		down(&((log)->loglock))
-#define LOG_UNLOCK(log)		up(&((log)->loglock))
+#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
+#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
+#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
 
 
 /*
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index e4978b5b65e..8c6909b8001 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -389,7 +389,7 @@ struct jfs_log {
 	int eor;		/* 4: eor of last record in eol page */
 	struct lbuf *bp;	/* 4: current log page buffer */
 
-	struct semaphore loglock;	/* 4: log write serialization lock */
+	struct mutex loglock;	/* 4: log write serialization lock */
 
 	/* syncpt */
 	int nextsync;		/* 4: bytes to write before next syncpt */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 2ddb6b892bc..d38f605d948 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2876,10 +2876,10 @@ restart:
 		 */
 		TXN_UNLOCK();
 		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
-		down(&jfs_ip->commit_sem);
+		mutex_lock(&jfs_ip->commit_mutex);
 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
-		up(&jfs_ip->commit_sem);
+		mutex_unlock(&jfs_ip->commit_mutex);
 		/*
 		 * Just to be safe.  I don't know how
 		 * long we can run without blocking
@@ -2952,7 +2952,7 @@ int jfs_sync(void *arg)
 				 * Inode is being freed
 				 */
 				list_del_init(&jfs_ip->anon_inode_list);
-			} else if (! down_trylock(&jfs_ip->commit_sem)) {
+			} else if (! !mutex_trylock(&jfs_ip->commit_mutex)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed
@@ -2961,7 +2961,7 @@ int jfs_sync(void *arg)
 				tid = txBegin(ip->i_sb, COMMIT_INODE);
 				rc = txCommit(tid, 1, &ip, 0);
 				txEnd(tid);
-				up(&jfs_ip->commit_sem);
+				mutex_unlock(&jfs_ip->commit_mutex);
 
 				iput(ip);
 				/*
@@ -2971,7 +2971,7 @@ int jfs_sync(void *arg)
 				cond_resched();
 				TXN_LOCK();
 			} else {
-				/* We can't get the commit semaphore.  It may
+				/* We can't get the commit mutex.  It may
 				 * be held by a thread waiting for tlock's
 				 * so let's not block here.  Save it to
 				 * put back on the anon_list.
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4abbe860430..ed4d170c212 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dip);
 	if (rc)
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dip);
 	if (rc)
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);
 		txEnd(tid);
-		up(&JFS_IP(dip)->commit_sem);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 		goto out2;
 	}
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
@@ -488,8 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);	/* Marks FS Dirty */
 		txEnd(tid);
-		up(&JFS_IP(dip)->commit_sem);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		IWRITE_UNLOCK(ip);
 		goto out1;
 	}
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if ((new_size = commitZeroLink(tid, ip)) < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
 			txEnd(tid);
-			up(&JFS_IP(dip)->commit_sem);
-			up(&JFS_IP(ip)->commit_sem);
+			mutex_unlock(&JFS_IP(dip)->commit_mutex);
+			mutex_unlock(&JFS_IP(ip)->commit_mutex);
 			IWRITE_UNLOCK(ip);
 			rc = new_size;
 			goto out1;
@@ -556,13 +556,13 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(dip->i_sb, 0);
-		down(&JFS_IP(ip)->commit_sem);
+		mutex_lock(&JFS_IP(ip)->commit_mutex);
 		new_size = xtTruncate_pmap(tid, ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
@@ -570,7 +570,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		} else
 			rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC);
 		txEnd(tid);
-		up(&JFS_IP(ip)->commit_sem);
+		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	}
 
 	if (ip->i_nlink == 0)
@@ -805,8 +805,8 @@ static int jfs_link(struct dentry *old_dentry,
 
 	tid = txBegin(ip->i_sb, 0);
 
-	down(&JFS_IP(dir)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dir)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	/*
 	 * scan parent directory for entry/freespace
@@ -847,8 +847,8 @@ static int jfs_link(struct dentry *old_dentry,
       out:
 	txEnd(tid);
 
-	up(&JFS_IP(dir)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 
 	jfs_info("jfs_link: rc:%d", rc);
 	return rc;
@@ -916,8 +916,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	tid = txBegin(dip->i_sb, 0);
 
-	down(&JFS_IP(dip)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dip)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_security(tid, ip, dip);
 	if (rc)
@@ -1037,8 +1037,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(dip)->commit_sem);
-	up(&JFS_IP(ip)->commit_sem);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -1141,13 +1141,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	tid = txBegin(new_dir->i_sb, 0);
 
-	down(&JFS_IP(new_dir)->commit_sem);
-	down(&JFS_IP(old_ip)->commit_sem);
+	mutex_lock(&JFS_IP(new_dir)->commit_mutex);
+	mutex_lock(&JFS_IP(old_ip)->commit_mutex);
 	if (old_dir != new_dir)
-		down(&JFS_IP(old_dir)->commit_sem);
+		mutex_lock(&JFS_IP(old_dir)->commit_mutex);
 
 	if (new_ip) {
-		down(&JFS_IP(new_ip)->commit_sem);
+		mutex_lock(&JFS_IP(new_ip)->commit_mutex);
 		/*
 		 * Change existing directory entry to new inode number
 		 */
@@ -1160,10 +1160,10 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (S_ISDIR(new_ip->i_mode)) {
 			new_ip->i_nlink--;
 			if (new_ip->i_nlink) {
-				up(&JFS_IP(new_dir)->commit_sem);
-				up(&JFS_IP(old_ip)->commit_sem);
+				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
+				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
 				if (old_dir != new_dir)
-					up(&JFS_IP(old_dir)->commit_sem);
+					mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 				if (!S_ISDIR(old_ip->i_mode) && new_ip)
 					IWRITE_UNLOCK(new_ip);
 				jfs_error(new_ip->i_sb,
@@ -1282,16 +1282,16 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
       out4:
 	txEnd(tid);
 
-	up(&JFS_IP(new_dir)->commit_sem);
-	up(&JFS_IP(old_ip)->commit_sem);
+	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
 	if (old_dir != new_dir)
-		up(&JFS_IP(old_dir)->commit_sem);
+		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 	if (new_ip)
-		up(&JFS_IP(new_ip)->commit_sem);
+		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(new_ip->i_sb, 0);
-		down(&JFS_IP(new_ip)->commit_sem);
+		mutex_lock(&JFS_IP(new_ip)->commit_mutex);
 		new_size = xtTruncate_pmap(tid, new_ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);
@@ -1299,7 +1299,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		} else
 			rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
 		txEnd(tid);
-		up(&JFS_IP(new_ip)->commit_sem);
+		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 	}
 	if (new_ip && (new_ip->i_nlink == 0))
 		set_cflag(COMMIT_Nolink, new_ip);
@@ -1361,8 +1361,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	tid = txBegin(dir->i_sb, 0);
 
-	down(&JFS_IP(dir)->commit_sem);
-	down(&JFS_IP(ip)->commit_sem);
+	mutex_lock(&JFS_IP(dir)->commit_mutex);
+	mutex_lock(&JFS_IP(ip)->commit_mutex);
 
 	rc = jfs_init_acl(tid, ip, dir);
 	if (rc)
@@ -1407,8 +1407,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
       out3:
 	txEnd(tid);
-	up(&JFS_IP(ip)->commit_sem);
-	up(&JFS_IP(dir)->commit_sem);
+	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8d31f133643..1639d2cd371 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -617,7 +617,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 		memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
 		INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
 		init_rwsem(&jfs_ip->rdwrlock);
-		init_MUTEX(&jfs_ip->commit_sem);
+		mutex_init(&jfs_ip->commit_mutex);
 		init_rwsem(&jfs_ip->xattr_sem);
 		spin_lock_init(&jfs_ip->ag_lock);
 		jfs_ip->active_ag = -1;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index f23048f9471..9bc5b7c055c 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -934,13 +934,13 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	}
 
 	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
+	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
 			    flags);
 	if (!rc)
 		rc = txCommit(tid, 1, &inode, 0);
 	txEnd(tid);
-	up(&ji->commit_sem);
+	mutex_unlock(&ji->commit_mutex);
 
 	return rc;
 }
@@ -1093,12 +1093,12 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 		return rc;
 
 	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
+	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
 	if (!rc)
 		rc = txCommit(tid, 1, &inode, 0);
 	txEnd(tid);
-	up(&ji->commit_sem);
+	mutex_unlock(&ji->commit_mutex);
 
 	return rc;
 }
-- 
cgit v1.2.3


From fa3241d24cf1182b0ffb6e4d412c3bc2a2ab7bf6 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Thu, 9 Feb 2006 09:09:16 -0600
Subject: JFS: ext2 inode attributes for jfs

ext2 inode attributes with relevance for jfs:

'a' 	EXT2_APPEND_FL       -> append only
'i' 	EXT2_IMMUTABLE_FL    -> immutable file
's' 	EXT2_SECRM_FL	     -> zero file
'u' 	EXT2_UNRM_FL	     -> allow for unrm
'A' 	EXT2_NOATIME_FL      -> no access time
'D' 	EXT2_DIRSYNC_FL      -> dirsync
'S' 	EXT2_SYNC_FL	     -> sync

overview of jfs flags (partially for OS/2)

value	   (OS/2)	Linux	ext2 attrs
------------------------------------------------
0x00010000 IFJOURNAL	-
0x00020000 ISPARSE  	used
0x00040000 INLINEEA 	used
0x00080000 -	    	-	JFS_NOATIME_FL

0x00100000 -	    	-	JFS_DIRSYNC_FL
0x00200000 -	    	-	JFS_SYNC_FL
0x00400000 -	    	-	JFS_SECRM_FL
0x00800000 ISWAPFILE	-	JFS_UNRM_FL

0x01000000 -	    	-	JFS_APPEND_FL
0x02000000 IREADONLY	-	JFS_IMMUTABLE_FL
0x04000000 IHIDDEN  	-	-
0x08000000 ISYSTEM  	-	-

0x10000000 -	    	-
0x20000000 IDIRECTORY	used
0x40000000 IARCHIVE 	-
0x80000000 INEWNAME 	-

the implementation is straight forward, except
for the fact that the attributes have to be mapped
to match with the ext2 ones to avoid a separate
tool for manipulating them (this could be avoided
when using a separate flag field in the on-disk
representation, but the overhead is minimal)

a special jfs_ioctl is added to allow for the new
JFS_IOC_GETFLAGS and JFS_IOC_SETFLAGS calls.

a helper function jfs_set_inode_flags() to transfer
the flags from the on-disk version to the inode

minor changes to allow flag inheritance on inode
creation, as well as a cleanup of the on-disk
flags (including the new ones)

beforementioned helper to map between ext2 and jfs
versions of the new flags ...

the JFS_SECRM_FL and JFS_UNRM_FL are not done yet
and I'm not 100% sure they are worth the effort,
the rest seems to work out of the box ...

Signed-off-by: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/Makefile     |  3 ++-
 fs/jfs/file.c       |  1 +
 fs/jfs/inode.c      |  1 +
 fs/jfs/jfs_dinode.h | 31 +++++++++++++++++++++++++++----
 fs/jfs/jfs_inode.c  | 37 ++++++++++++++++++++++++++++++++++---
 fs/jfs/jfs_inode.h  |  3 +++
 fs/jfs/namei.c      |  1 +
 7 files changed, 69 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 6f1e0e95587..3adb6395e42 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -8,7 +8,8 @@ jfs-y    := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
 	    jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
 	    jfs_unicode.o jfs_dtree.o jfs_inode.o \
 	    jfs_extent.o symlink.o jfs_metapage.o \
-	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o
+	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
+	    resize.o xattr.o ioctl.o
 
 jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
 
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index c2c19c9ed9a..e1ac6e497e2 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -113,4 +113,5 @@ struct file_operations jfs_file_operations = {
  	.sendfile	= generic_file_sendfile,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
+	.ioctl		= jfs_ioctl,
 };
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index d7834a9117c..51a5fed90cc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -55,6 +55,7 @@ void jfs_read_inode(struct inode *inode)
 		inode->i_op = &jfs_file_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	}
+	jfs_set_inode_flags(inode);
 }
 
 /*
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 580a3258449..9f2572aea56 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -139,13 +139,36 @@ struct dinode {
 
 /* more extended mode bits: attributes for OS/2 */
 #define IREADONLY	0x02000000	/* no write access to file */
-#define IARCHIVE	0x40000000	/* file archive bit */
-#define ISYSTEM		0x08000000	/* system file */
 #define IHIDDEN		0x04000000	/* hidden file */
-#define IRASH		0x4E000000	/* mask for changeable attributes */
-#define INEWNAME	0x80000000	/* non-8.3 filename format */
+#define ISYSTEM		0x08000000	/* system file */
+
 #define IDIRECTORY	0x20000000	/* directory (shadow of real bit) */
+#define IARCHIVE	0x40000000	/* file archive bit */
+#define INEWNAME	0x80000000	/* non-8.3 filename format */
+
+#define IRASH		0x4E000000	/* mask for changeable attributes */
 #define ATTRSHIFT	25	/* bits to shift to move attribute
 				   specification to mode position */
 
+/* extended attributes for Linux */
+
+#define JFS_NOATIME_FL		0x00080000 /* do not update atime */
+
+#define JFS_DIRSYNC_FL		0x00100000 /* dirsync behaviour */
+#define JFS_SYNC_FL		0x00200000 /* Synchronous updates */
+#define JFS_SECRM_FL		0x00400000 /* Secure deletion */
+#define JFS_UNRM_FL		0x00800000 /* allow for undelete */
+
+#define JFS_APPEND_FL		0x01000000 /* writes to file may only append */
+#define JFS_IMMUTABLE_FL	0x02000000 /* Immutable file */
+
+#define JFS_FL_USER_VISIBLE	0x03F80000
+#define JFS_FL_USER_MODIFIABLE	0x03F80000
+#define JFS_FL_INHERIT		0x03C80000
+
+/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
+#define JFS_IOC_GETFLAGS	_IOR('f', 1, long)
+#define JFS_IOC_SETFLAGS	_IOW('f', 2, long)
+
+
 #endif /*_H_JFS_DINODE */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 2af5efbfd06..ae2772cba2d 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -25,6 +25,26 @@
 #include "jfs_dinode.h"
 #include "jfs_debug.h"
 
+
+void jfs_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = JFS_IP(inode)->mode2;
+
+	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
+		S_NOATIME | S_DIRSYNC | S_SYNC);
+
+	if (flags & JFS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & JFS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & JFS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & JFS_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+	if (flags & JFS_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+}
+
 /*
  * NAME:	ialloc()
  *
@@ -74,10 +94,20 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	}
 
 	inode->i_mode = mode;
-	if (S_ISDIR(mode))
-		jfs_inode->mode2 = IDIRECTORY | mode;
+	/* inherit flags from parent */
+	jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
+
+	if (S_ISDIR(mode)) {
+		jfs_inode->mode2 |= IDIRECTORY;
+		jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
+	}
+	else if (S_ISLNK(mode))
+		jfs_inode->mode2 &=
+			~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
 	else
-		jfs_inode->mode2 = INLINEEA | ISPARSE | mode;
+		jfs_inode->mode2 |= INLINEEA | ISPARSE;
+	jfs_inode->mode2 |= mode;
+
 	inode->i_blksize = sb->s_blocksize;
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -98,6 +128,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	jfs_inode->atlhead = 0;
 	jfs_inode->atltail = 0;
 	jfs_inode->xtlid = 0;
+	jfs_set_inode_flags(inode);
 
 	jfs_info("ialloc returns inode = 0x%p\n", inode);
 
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b54bac576cb..095d471b9f9 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -20,6 +20,8 @@
 
 extern struct inode *ialloc(struct inode *, umode_t);
 extern int jfs_fsync(struct file *, struct dentry *, int);
+extern int jfs_ioctl(struct inode *, struct file *,
+			unsigned int, unsigned long);
 extern void jfs_read_inode(struct inode *);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode*, int);
@@ -29,6 +31,7 @@ extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
+extern void jfs_set_inode_flags(struct inode *);
 
 extern struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index ed4d170c212..309cee575f7 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1523,6 +1523,7 @@ struct file_operations jfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= jfs_readdir,
 	.fsync		= jfs_fsync,
+	.ioctl		= jfs_ioctl,
 };
 
 static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
-- 
cgit v1.2.3


From 4837c672fd4d43c519d6b53308ee68d45b91b872 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 10 Feb 2006 08:11:53 -0600
Subject: JFS: Fix regression.  fsck complains if symlinks do not have INLINEEA
 attribute

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ae2772cba2d..ffd2a8a0078 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -101,11 +101,11 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 		jfs_inode->mode2 |= IDIRECTORY;
 		jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
 	}
-	else if (S_ISLNK(mode))
-		jfs_inode->mode2 &=
-			~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
-	else
+	else {
 		jfs_inode->mode2 |= INLINEEA | ISPARSE;
+		if (S_ISLNK(mode))
+			jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
+	}
 	jfs_inode->mode2 |= mode;
 
 	inode->i_blksize = sb->s_blocksize;
-- 
cgit v1.2.3


From 91dbb4deb30e817efc8d6bed89b1190a489ca776 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Feb 2006 12:49:04 -0600
Subject: JFS: Use the kthread_ API

Use the kthread_ API instead of opencoding lots of hairy code for kernel
thread creation and teardown.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_logmgr.c     | 20 ++++++------------
 fs/jfs/jfs_superblock.h |  9 +++-----
 fs/jfs/jfs_txnmgr.c     | 26 +++++++----------------
 fs/jfs/super.c          | 55 ++++++++++++++++++++-----------------------------
 4 files changed, 38 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 06bded6c12b..3113ff53bd5 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -64,6 +64,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
+#include <linux/kthread.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
 #include <linux/suspend.h>
@@ -81,7 +82,6 @@
  */
 static struct lbuf *log_redrive_list;
 static DEFINE_SPINLOCK(log_redrive_lock);
-DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
 
 
 /*
@@ -1980,7 +1980,7 @@ static inline void lbmRedrive(struct lbuf *bp)
 	log_redrive_list = bp;
 	spin_unlock_irqrestore(&log_redrive_lock, flags);
 
-	wake_up(&jfs_IO_thread_wait);
+	wake_up_process(jfsIOthread);
 }
 
 
@@ -2347,13 +2347,7 @@ int jfsIOWait(void *arg)
 {
 	struct lbuf *bp;
 
-	daemonize("jfsIO");
-
-	complete(&jfsIOwait);
-
 	do {
-		DECLARE_WAITQUEUE(wq, current);
-
 		spin_lock_irq(&log_redrive_lock);
 		while ((bp = log_redrive_list) != 0) {
 			log_redrive_list = bp->l_redrive_next;
@@ -2362,21 +2356,19 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
+		spin_unlock_irq(&log_redrive_lock);
+
 		if (freezing(current)) {
-			spin_unlock_irq(&log_redrive_lock);
 			refrigerator();
 		} else {
-			add_wait_queue(&jfs_IO_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
-			spin_unlock_irq(&log_redrive_lock);
 			schedule();
 			current->state = TASK_RUNNING;
-			remove_wait_queue(&jfs_IO_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	jfs_info("jfsIOWait being killed!");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 /*
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index fcf781bf31c..682cf1a68a1 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -113,12 +113,9 @@ extern int jfs_mount(struct super_block *);
 extern int jfs_mount_rw(struct super_block *, int);
 extern int jfs_umount(struct super_block *);
 extern int jfs_umount_rw(struct super_block *);
-
-extern int jfs_stop_threads;
-extern struct completion jfsIOwait;
-extern wait_queue_head_t jfs_IO_thread_wait;
-extern wait_queue_head_t jfs_commit_thread_wait;
-extern wait_queue_head_t jfs_sync_thread_wait;
 extern int jfs_extendfs(struct super_block *, s64, int);
 
+extern struct task_struct *jfsIOthread;
+extern struct task_struct *jfsSyncThread;
+
 #endif /*_H_JFS_SUPERBLOCK */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d38f605d948..ac3d66948e8 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -49,6 +49,7 @@
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/kthread.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -121,8 +122,7 @@ static DEFINE_SPINLOCK(jfsTxnLock);
 #define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 
-DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait);
-DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
+static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 static int jfs_commit_thread_waking;
 
 /*
@@ -207,7 +207,7 @@ static lid_t txLockAlloc(void)
 	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 		jfs_info("txLockAlloc tlocks low");
 		jfs_tlocks_low = 1;
-		wake_up(&jfs_sync_thread_wait);
+		wake_up_process(jfsSyncThread);
 	}
 
 	return lid;
@@ -2743,10 +2743,6 @@ int jfs_lazycommit(void *arg)
 	unsigned long flags;
 	struct jfs_sb_info *sbi;
 
-	daemonize("jfsCommit");
-
-	complete(&jfsIOwait);
-
 	do {
 		LAZY_LOCK(flags);
 		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
@@ -2806,13 +2802,13 @@ int jfs_lazycommit(void *arg)
 			current->state = TASK_RUNNING;
 			remove_wait_queue(&jfs_commit_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	if (!list_empty(&TxAnchor.unlock_queue))
 		jfs_err("jfs_lazycommit being killed w/pending transactions!");
 	else
 		jfs_info("jfs_lazycommit being killed\n");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 void txLazyUnlock(struct tblock * tblk)
@@ -2932,10 +2928,6 @@ int jfs_sync(void *arg)
 	int rc;
 	tid_t tid;
 
-	daemonize("jfsSync");
-
-	complete(&jfsIOwait);
-
 	do {
 		/*
 		 * write each inode on the anonymous inode list
@@ -2996,19 +2988,15 @@ int jfs_sync(void *arg)
 			TXN_UNLOCK();
 			refrigerator();
 		} else {
-			DECLARE_WAITQUEUE(wq, current);
-
-			add_wait_queue(&jfs_sync_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
 			TXN_UNLOCK();
 			schedule();
 			current->state = TASK_RUNNING;
-			remove_wait_queue(&jfs_sync_thread_wait, &wq);
 		}
-	} while (!jfs_stop_threads);
+	} while (!kthread_should_stop());
 
 	jfs_info("jfs_sync being killed");
-	complete_and_exit(&jfsIOwait, 0);
+	return 0;
 }
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1639d2cd371..bd6720d807a 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -25,6 +25,7 @@
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
+#include <linux/kthread.h>
 #include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
@@ -54,11 +55,9 @@ static int commit_threads = 0;
 module_param(commit_threads, int, 0);
 MODULE_PARM_DESC(commit_threads, "Number of commit threads");
 
-int jfs_stop_threads;
-static pid_t jfsIOthread;
-static pid_t jfsCommitThread[MAX_COMMIT_THREADS];
-static pid_t jfsSyncThread;
-DECLARE_COMPLETION(jfsIOwait);
+static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS];
+struct task_struct *jfsIOthread;
+struct task_struct *jfsSyncThread;
 
 #ifdef CONFIG_JFS_DEBUG
 int jfsloglevel = JFS_LOGLEVEL_WARN;
@@ -661,12 +660,12 @@ static int __init init_jfs_fs(void)
 	/*
 	 * I/O completion thread (endio)
 	 */
-	jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL);
-	if (jfsIOthread < 0) {
-		jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread);
+	jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO");
+	if (IS_ERR(jfsIOthread)) {
+		rc = PTR_ERR(jfsIOthread);
+		jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 		goto end_txmngr;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 
 	if (commit_threads < 1)
 		commit_threads = num_online_cpus();
@@ -674,24 +673,21 @@ static int __init init_jfs_fs(void)
 		commit_threads = MAX_COMMIT_THREADS;
 
 	for (i = 0; i < commit_threads; i++) {
-		jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL,
-						   CLONE_KERNEL);
-		if (jfsCommitThread[i] < 0) {
-			jfs_err("init_jfs_fs: fork failed w/rc = %d",
-				jfsCommitThread[i]);
+		jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit");
+		if (IS_ERR(jfsCommitThread[i])) {
+			rc = PTR_ERR(jfsCommitThread[i]);
+			jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 			commit_threads = i;
 			goto kill_committask;
 		}
-		/* Wait until thread starts */
-		wait_for_completion(&jfsIOwait);
 	}
 
-	jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL);
-	if (jfsSyncThread < 0) {
-		jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread);
+	jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync");
+	if (IS_ERR(jfsSyncThread)) {
+		rc = PTR_ERR(jfsSyncThread);
+		jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
 		goto kill_committask;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 
 #ifdef PROC_FS_JFS
 	jfs_proc_init();
@@ -700,13 +696,9 @@ static int __init init_jfs_fs(void)
 	return register_filesystem(&jfs_fs_type);
 
 kill_committask:
-	jfs_stop_threads = 1;
-	wake_up_all(&jfs_commit_thread_wait);
 	for (i = 0; i < commit_threads; i++)
-		wait_for_completion(&jfsIOwait);
-
-	wake_up(&jfs_IO_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait for thread exit */
+		kthread_stop(jfsCommitThread[i]);
+	kthread_stop(jfsIOthread);
 end_txmngr:
 	txExit();
 free_metapage:
@@ -722,16 +714,13 @@ static void __exit exit_jfs_fs(void)
 
 	jfs_info("exit_jfs_fs called");
 
-	jfs_stop_threads = 1;
 	txExit();
 	metapage_exit();
-	wake_up(&jfs_IO_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread exits */
-	wake_up_all(&jfs_commit_thread_wait);
+
+	kthread_stop(jfsIOthread);
 	for (i = 0; i < commit_threads; i++)
-		wait_for_completion(&jfsIOwait);
-	wake_up(&jfs_sync_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until Sync thread exits */
+		kthread_stop(jfsCommitThread[i]);
+	kthread_stop(jfsSyncThread);
 #ifdef PROC_FS_JFS
 	jfs_proc_clean();
 #endif
-- 
cgit v1.2.3


From d9e902668e815f9f33ba5056089684b0704eeac6 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Wed, 22 Feb 2006 14:14:58 -0600
Subject: JFS: Add missing file from fa3241d24cf1182b0ffb6e4d412c3bc2a2ab7bf6

My mistake here.  I failed to checkin fs/jfs/ioctl.c

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/ioctl.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 fs/jfs/ioctl.c

(limited to 'fs')

diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
new file mode 100644
index 00000000000..67b3774820e
--- /dev/null
+++ b/fs/jfs/ioctl.c
@@ -0,0 +1,107 @@
+/*
+ * linux/fs/jfs/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/ctype.h>
+#include <linux/capability.h>
+#include <linux/time.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+
+#include "jfs_incore.h"
+#include "jfs_dinode.h"
+#include "jfs_inode.h"
+
+
+static struct {
+	long jfs_flag;
+	long ext2_flag;
+} jfs_map[] = {
+	{JFS_NOATIME_FL, EXT2_NOATIME_FL},
+	{JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
+	{JFS_SYNC_FL, EXT2_SYNC_FL},
+	{JFS_SECRM_FL, EXT2_SECRM_FL},
+	{JFS_UNRM_FL, EXT2_UNRM_FL},
+	{JFS_APPEND_FL, EXT2_APPEND_FL},
+	{JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+	{0, 0},
+};
+
+static long jfs_map_ext2(unsigned long flags, int from)
+{
+	int index=0;
+	long mapped=0;
+
+	while (jfs_map[index].jfs_flag) {
+		if (from) {
+			if (jfs_map[index].ext2_flag & flags)
+				mapped |= jfs_map[index].jfs_flag;
+		} else {
+			if (jfs_map[index].jfs_flag & flags)
+				mapped |= jfs_map[index].ext2_flag;
+		}
+		index++;
+	}
+	return mapped;
+}
+
+
+int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct jfs_inode_info *jfs_inode = JFS_IP(inode);
+	unsigned int flags;
+
+	switch (cmd) {
+	case JFS_IOC_GETFLAGS:
+		flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
+		flags = jfs_map_ext2(flags, 0);
+		return put_user(flags, (int __user *) arg);
+	case JFS_IOC_SETFLAGS: {
+		unsigned int oldflags;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		flags = jfs_map_ext2(flags, 1);
+		if (!S_ISDIR(inode->i_mode))
+			flags &= ~JFS_DIRSYNC_FL;
+
+		oldflags = jfs_inode->mode2;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 */
+		if ((oldflags & JFS_IMMUTABLE_FL) ||
+			((flags ^ oldflags) &
+			(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
+		}
+
+		flags = flags & JFS_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+		jfs_inode->mode2 = flags;
+
+		jfs_set_inode_flags(inode);
+		inode->i_ctime = CURRENT_TIME_SEC;
+		mark_inode_dirty(inode);
+		return 0;
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
-- 
cgit v1.2.3


From 5b3030e39049212c975665cdb3eeabcfaf7c94ca Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Thu, 23 Feb 2006 09:47:13 -0600
Subject: JFS: kzalloc conversion

this converts fs/jfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_logmgr.c   | 9 +++------
 fs/jfs/jfs_metapage.c | 3 +--
 fs/jfs/super.c        | 3 +--
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 3113ff53bd5..0b348b13b55 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1105,11 +1105,10 @@ int lmLogOpen(struct super_block *sb)
 		}
 	}
 
-	if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
+	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
 		up(&jfs_log_sem);
 		return -ENOMEM;
 	}
-	memset(log, 0, sizeof(struct jfs_log));
 	INIT_LIST_HEAD(&log->sb_list);
 	init_waitqueue_head(&log->syncwait);
 
@@ -1181,9 +1180,8 @@ static int open_inline_log(struct super_block *sb)
 	struct jfs_log *log;
 	int rc;
 
-	if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
+	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
 		return -ENOMEM;
-	memset(log, 0, sizeof(struct jfs_log));
 	INIT_LIST_HEAD(&log->sb_list);
 	init_waitqueue_head(&log->syncwait);
 
@@ -1216,12 +1214,11 @@ static int open_dummy_log(struct super_block *sb)
 
 	down(&jfs_log_sem);
 	if (!dummy_log) {
-		dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL);
+		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
 		if (!dummy_log) {
 			up(&jfs_log_sem);
 			return -ENOMEM;
 		}
-		memset(dummy_log, 0, sizeof(struct jfs_log));
 		INIT_LIST_HEAD(&dummy_log->sb_list);
 		init_waitqueue_head(&dummy_log->syncwait);
 		dummy_log->no_integrity = 1;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 8a53981f9f2..5fbaeaadccd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -104,10 +104,9 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 	if (PagePrivate(page))
 		a = mp_anchor(page);
 	else {
-		a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
+		a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
 		if (!a)
 			return -ENOMEM;
-		memset(a, 0, sizeof(struct meta_anchor));
 		set_page_private(page, (unsigned long)a);
 		SetPagePrivate(page);
 		kmap(page);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index bd6720d807a..ab633334700 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -395,10 +395,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!new_valid_dev(sb->s_bdev->bd_dev))
 		return -EOVERFLOW;
 
-	sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOSPC;
-	memset(sbi, 0, sizeof (struct jfs_sb_info));
 	sb->s_fs_info = sbi;
 	sbi->sb = sb;
 
-- 
cgit v1.2.3


From bb8047d3540affd6b8c2adac3fe792e07143be0f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 7 Mar 2006 11:53:46 +0000
Subject: NTFS: Fix two compiler warnings on Alpha.  Thanks to Andrew Morton
 for       reporting them.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/dir.c  | 2 +-
 fs/ntfs/file.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index b0690d4c890..9d9ed3fe371 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1136,7 +1136,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (fpos == 1) {
 		ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
 				"inode 0x%lx, DT_DIR.",
-				parent_ino(filp->f_dentry));
+				(unsigned long)parent_ino(filp->f_dentry));
 		rc = filldir(dirent, "..", 2, fpos,
 				parent_ino(filp->f_dentry), DT_DIR);
 		if (rc)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5027d3d1b3f..2e5ba0c535d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -943,7 +943,8 @@ rl_not_mapped_enoent:
 		}
 		ni->runlist.rl = rl;
 		status.runlist_merged = 1;
-		ntfs_debug("Allocated cluster, lcn 0x%llx.", lcn);
+		ntfs_debug("Allocated cluster, lcn 0x%llx.",
+				(unsigned long long)lcn);
 		/* Map and lock the mft record and get the attribute record. */
 		if (!NInoAttr(ni))
 			base_ni = ni;
-- 
cgit v1.2.3


From be0bf7da19135a7a0f8c275f20c819940be218d9 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 8 Mar 2006 10:59:15 -0600
Subject: JFS: Take logsync lock before testing mp->lsn

This fixes a race where lsn could be cleared before taking the lock

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c | 7 ++-----
 fs/jfs/jfs_imap.c | 6 ++----
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 4fb3ed18492..c161c98954e 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -532,10 +532,10 @@ dbUpdatePMap(struct inode *ipbmap,
 
 		lastlblkno = lblkno;
 
+		LOGSYNC_LOCK(log, flags);
 		if (mp->lsn != 0) {
 			/* inherit older/smaller lsn */
 			logdiff(diffp, mp->lsn, log);
-			LOGSYNC_LOCK(log, flags);
 			if (difft < diffp) {
 				mp->lsn = lsn;
 
@@ -548,20 +548,17 @@ dbUpdatePMap(struct inode *ipbmap,
 			logdiff(diffp, mp->clsn, log);
 			if (difft > diffp)
 				mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log, flags);
 		} else {
 			mp->log = log;
 			mp->lsn = lsn;
 
 			/* insert bp after tblock in logsync list */
-			LOGSYNC_LOCK(log, flags);
-
 			log->count++;
 			list_add(&mp->synclist, &tblk->synclist);
 
 			mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log, flags);
 		}
+		LOGSYNC_UNLOCK(log, flags);
 	}
 
 	/* write the last buffer. */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 87dd86c34c2..b62a048b688 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2844,11 +2844,11 @@ diUpdatePMap(struct inode *ipimap,
 	 */
 	lsn = tblk->lsn;
 	log = JFS_SBI(tblk->sb)->log;
+	LOGSYNC_LOCK(log, flags);
 	if (mp->lsn != 0) {
 		/* inherit older/smaller lsn */
 		logdiff(difft, lsn, log);
 		logdiff(diffp, mp->lsn, log);
-		LOGSYNC_LOCK(log, flags);
 		if (difft < diffp) {
 			mp->lsn = lsn;
 			/* move mp after tblock in logsync list */
@@ -2860,17 +2860,15 @@ diUpdatePMap(struct inode *ipimap,
 		logdiff(diffp, mp->clsn, log);
 		if (difft > diffp)
 			mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log, flags);
 	} else {
 		mp->log = log;
 		mp->lsn = lsn;
 		/* insert mp after tblock in logsync list */
-		LOGSYNC_LOCK(log, flags);
 		log->count++;
 		list_add(&mp->synclist, &tblk->synclist);
 		mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log, flags);
 	}
+	LOGSYNC_UNLOCK(log, flags);
 	write_metapage(mp);
 	return (0);
 }
-- 
cgit v1.2.3


From 69eb66d7da7dba2696281981347698e1693c2340 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 9 Mar 2006 13:59:30 -0600
Subject: JFS: add uid, gid, and umask mount options

OS/2 doesn't initialize the uid, gid, or unix-style permission bits.  The
uid, gid, & umask mount options perform pretty much like those for the fat
file system, overriding what is stored on disk.  This is useful for users
sharing the file system with OS/2.

I implemented a little feature so that if you mask the execute bit, it
will be re-enabled on directories when the appropriate read bit is unmasked.
I didn't want to implement an fmask & dmask option.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c        |  3 +++
 fs/jfs/jfs_imap.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 fs/jfs/jfs_incore.h |  5 +++++
 fs/jfs/jfs_inode.c  |  7 +++++++
 fs/jfs/super.c      | 38 ++++++++++++++++++++++++++++++++++----
 5 files changed, 93 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e0b6fdab200..e2281300979 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -183,6 +183,9 @@ cleanup:
 		posix_acl_release(acl);
 	} else
 		inode->i_mode &= ~current->fs->umask;
+	
+	JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) |
+			       inode->i_mode;
 
 	return rc;
 }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b62a048b688..ccbe60aff83 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3074,14 +3074,40 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 
 	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
 	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
 
 	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
+	if (sbi->umask != -1) {
+		ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
+		/* For directories, add x permission if r is allowed by umask */
+		if (S_ISDIR(ip->i_mode)) {
+			if (ip->i_mode & 0400)
+				ip->i_mode |= 0100;
+			if (ip->i_mode & 0040)
+				ip->i_mode |= 0010;
+			if (ip->i_mode & 0004)
+				ip->i_mode |= 0001;
+		}
+	}
 	ip->i_nlink = le32_to_cpu(dip->di_nlink);
-	ip->i_uid = le32_to_cpu(dip->di_uid);
-	ip->i_gid = le32_to_cpu(dip->di_gid);
+
+	jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
+	if (sbi->uid == -1)
+		ip->i_uid = jfs_ip->saved_uid;
+	else {
+		ip->i_uid = sbi->uid;
+	}
+
+	jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
+	if (sbi->gid == -1)
+		ip->i_gid = jfs_ip->saved_gid;
+	else {
+		ip->i_gid = sbi->gid;
+	}
+
 	ip->i_size = le64_to_cpu(dip->di_size);
 	ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
 	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
@@ -3132,21 +3158,33 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 
 	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
-	dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
+	dip->di_inostamp = cpu_to_le32(sbi->inostamp);
 	dip->di_number = cpu_to_le32(ip->i_ino);
 	dip->di_gen = cpu_to_le32(ip->i_generation);
 	dip->di_size = cpu_to_le64(ip->i_size);
 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
-	dip->di_uid = cpu_to_le32(ip->i_uid);
-	dip->di_gid = cpu_to_le32(ip->i_gid);
+	if (sbi->uid == -1)
+		dip->di_uid = cpu_to_le32(ip->i_uid);
+	else
+		dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
+	if (sbi->gid == -1)
+		dip->di_gid = cpu_to_le32(ip->i_gid);
+	else
+		dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
 	/*
 	 * mode2 is only needed for storing the higher order bits.
 	 * Trust i_mode for the lower order ones
 	 */
-	dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode);
+	if (sbi->umask == -1)
+		dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
+					   ip->i_mode);
+	else /* Leave the original permissions alone */
+		dip->di_mode = cpu_to_le32(jfs_ip->mode2);
+
 	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
 	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
 	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index a97ead889a6..54d73716ca8 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -38,6 +38,8 @@
 struct jfs_inode_info {
 	int	fileset;	/* fileset number (always 16)*/
 	uint	mode2;		/* jfs-specific mode		*/
+	uint	saved_uid;	/* saved for uid mount option */
+	uint	saved_gid;	/* saved for gid mount option */
 	pxd_t   ixpxd;		/* inode extent descriptor	*/
 	dxd_t	acl;		/* dxd describing acl	*/
 	dxd_t	ea;		/* dxd describing ea	*/
@@ -170,6 +172,9 @@ struct jfs_sb_info {
 	uint		state;		/* mount/recovery state	*/
 	unsigned long	flag;		/* mount time flags */
 	uint		p_state;	/* state prior to going no integrity */
+	uint		uid;		/* uid to override on-disk uid */
+	uint		gid;		/* gid to override on-disk gid */
+	uint		umask;		/* umask to override on-disk umask */
 };
 
 /* jfs_sb_info commit_state */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ffd2a8a0078..495df402916 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -82,6 +82,13 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	} else
 		inode->i_gid = current->fsgid;
 
+	/*
+	 * New inodes need to save sane values on disk when
+	 * uid & gid mount options are used
+	 */
+	jfs_inode->saved_uid = inode->i_uid;
+	jfs_inode->saved_gid = inode->i_gid;
+
 	/*
 	 * Allocate inode to quota.
 	 */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ab633334700..18f69e6aa71 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -194,7 +194,7 @@ static void jfs_put_super(struct super_block *sb)
 enum {
 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
 	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
-	Opt_usrquota, Opt_grpquota
+	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
 };
 
 static match_table_t tokens = {
@@ -208,6 +208,9 @@ static match_table_t tokens = {
 	{Opt_ignore, "quota"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_grpquota, "grpquota"},
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_umask, "umask=%u"},
 	{Opt_err, NULL}
 };
 
@@ -312,7 +315,29 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			       "JFS: quota operations not supported\n");
 			break;
 #endif
-
+		case Opt_uid:
+		{
+			char *uid = args[0].from;
+			sbi->uid = simple_strtoul(uid, &uid, 0);
+			break;
+		}
+		case Opt_gid:
+		{
+			char *gid = args[0].from;
+			sbi->gid = simple_strtoul(gid, &gid, 0);
+			break;
+		}
+		case Opt_umask:
+		{
+			char *umask = args[0].from;
+			sbi->umask = simple_strtoul(umask, &umask, 8);
+			if (sbi->umask & ~0777) {
+				printk(KERN_ERR
+				       "JFS: Invalid value of umask\n");
+				goto cleanup;
+			}
+			break;
+		}
 		default:
 			printk("jfs: Unrecognized mount option \"%s\" "
 					" or missing value\n", p);
@@ -400,6 +425,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOSPC;
 	sb->s_fs_info = sbi;
 	sbi->sb = sb;
+	sbi->uid = sbi->gid = sbi->umask = -1;
 
 	/* initialize the mount flag and determine the default error handler */
 	flag = JFS_ERR_REMOUNT_RO;
@@ -562,10 +588,14 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
 
+	if (sbi->uid != -1)
+		seq_printf(seq, ",uid=%d", sbi->uid);
+	if (sbi->gid != -1)
+		seq_printf(seq, ",gid=%d", sbi->gid);
+	if (sbi->umask != -1)
+		seq_printf(seq, ",umask=%03o", sbi->umask);
 	if (sbi->flag & JFS_NOINTEGRITY)
 		seq_puts(seq, ",nointegrity");
-	else
-		seq_puts(seq, ",integrity");
 
 #if defined(CONFIG_QUOTA)
 	if (sbi->flag & JFS_USRQUOTA)
-- 
cgit v1.2.3


From 0c9512d74635198d90f349acec19381e446ba2b4 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:02:13 +1100
Subject: [XFS] find_exported_dentry().  XFS does not need to use this symbol
 as it is provided by a vector through the superblock export operations when
 the filesystem is exported by NFS. The fix is to call that vector instead of
 using the exported symbol directly.

SGI-PV: 948858
SGI-Modid: xfs-linux-melb:xfs-kern:25062a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 80eb249f2fa..821bd12dd85 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -78,7 +78,7 @@ linvfs_decode_fh(
 	}
 
 	fh = (__u32 *)&ifid;
-	return find_exported_dentry(sb, fh, parent, acceptable, context);
+	return sb->s_export_op->find_exported_dentry(sb, fh, parent, acceptable, context);
 }
 
 
-- 
cgit v1.2.3


From 9f4cbecd7e5ee6390fecd6032dc04ca8c9805dc9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:05:30 +1100
Subject: [XFS] XFS propagates MS_NOATIME through two levels internally but
 doesn't actually use it.  Kill this dead code.	Signed-off-by: Christoph
 Hellwig <hch@lst.de>

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25086a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 2 --
 fs/xfs/xfs_clnt.h            | 2 --
 fs/xfs/xfs_mount.h           | 2 --
 fs/xfs/xfs_vfsops.c          | 7 -------
 4 files changed, 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f22e426d9e4..59989f6f83e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -76,8 +76,6 @@ xfs_args_allocate(
 	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
 
 	/* Copy the already-parsed mount(2) flags we're interested in */
-	if (sb->s_flags & MS_NOATIME)
-		args->flags |= XFSMNT_NOATIME;
 	if (sb->s_flags & MS_DIRSYNC)
 		args->flags |= XFSMNT_DIRSYNC;
 	if (sb->s_flags & MS_SYNCHRONOUS)
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index f57cc9ac875..022fff62085 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -68,8 +68,6 @@ struct xfs_mount_args {
 						 * enforcement */
 #define XFSMNT_PQUOTAENF	0x00000040	/* IRIX project quota limit
 						 * enforcement */
-#define XFSMNT_NOATIME		0x00000100	/* don't modify access
-						 * times on reads */
 #define XFSMNT_NOALIGN		0x00000200	/* don't allocate at
 						 * stripe boundaries*/
 #define XFSMNT_RETERR		0x00000400	/* return error to user */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cd3cf9613a0..4c9817a8043 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -386,8 +386,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
 						   disk errors in metadata */
-#define XFS_MOUNT_NOATIME	(1ULL << 5)	/* don't modify inode access
-						   times on reads */
 #define XFS_MOUNT_RETERR	(1ULL << 6)     /* return alignment errors to
 						   user */
 #define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b6ad370fab3..2a0a9efb8cc 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -258,8 +258,6 @@ xfs_start_flags(
 		mp->m_inoadd = XFS_INO64_OFFSET;
 	}
 #endif
-	if (ap->flags & XFSMNT_NOATIME)
-		mp->m_flags |= XFS_MOUNT_NOATIME;
 	if (ap->flags & XFSMNT_RETERR)
 		mp->m_flags |= XFS_MOUNT_RETERR;
 	if (ap->flags & XFSMNT_NOALIGN)
@@ -654,11 +652,6 @@ xfs_mntupdate(
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 	int		error;
 
-	if (args->flags & XFSMNT_NOATIME)
-		mp->m_flags |= XFS_MOUNT_NOATIME;
-	else
-		mp->m_flags &= ~XFS_MOUNT_NOATIME;
-
 	if (args->flags & XFSMNT_BARRIER)
 		mp->m_flags |= XFS_MOUNT_BARRIER;
 	else
-- 
cgit v1.2.3


From 8d280b98cfe3c0b69c37d355218975c1c0279bb0 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:13:09 +1100
Subject: [XFS] On machines with more than 8 cpus, when running parallel I/O
 threads, the incore superblock lock becomes the limiting factor for buffered
 write throughput. Make the contended fields in the incore superblock use
 per-cpu counters so that there is no global lock to limit scalability.

SGI-PV: 946630
SGI-Modid: xfs-linux-melb:xfs-kern:25106a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |   5 +
 fs/xfs/xfs_fsops.c           |   1 +
 fs/xfs/xfs_mount.c           | 560 +++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_mount.h           |  34 +++
 fs/xfs/xfs_vfsops.c          |   3 +-
 5 files changed, 586 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 67389b74552..377a9f54a04 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -100,6 +100,11 @@
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
+#if CONFIG_SMP
+#define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#endif
 
 /*
  * State flag for unwritten extent buffers.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b4d971b0158..56caa88713a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,6 +462,7 @@ xfs_fs_counts(
 {
 	unsigned long	s;
 
+	xfs_icsb_sync_counters_lazy(mp);
 	s = XFS_SB_LOCK(mp);
 	cnt->freedata = mp->m_sb.sb_fdblocks;
 	cnt->freertx = mp->m_sb.sb_frextents;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 62188ea392c..9b43b7b3d76 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,11 +51,31 @@ STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
+
+#ifdef HAVE_PERCPU_SB
+STATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
+STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
+STATIC void	xfs_icsb_sync_counters(xfs_mount_t *);
+STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
+						int, int);
+STATIC int	xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
+						int, int);
+
+#else
+
+#define xfs_icsb_destroy_counters(mp)			do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b)		do { } while (0)
+#define xfs_icsb_sync_counters(mp)			do { } while (0)
+#define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)
+#define xfs_icsb_modify_counters_locked(mp, a, b, c)	do { } while (0)
+
+#endif
+
 static const struct {
-    short offset;
-    short type;     /* 0 = integer
-		* 1 = binary / string (no translation)
-		*/
+	short offset;
+	short type;	/* 0 = integer
+			 * 1 = binary / string (no translation)
+			 */
 } xfs_sb_info[] = {
     { offsetof(xfs_sb_t, sb_magicnum),   0 },
     { offsetof(xfs_sb_t, sb_blocksize),  0 },
@@ -113,7 +133,11 @@ xfs_mount_init(void)
 {
 	xfs_mount_t *mp;
 
-	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
+	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
+
+	if (xfs_icsb_init_counters(mp)) {
+		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+	}
 
 	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
 	spinlock_init(&mp->m_sb_lock, "xfs_sb");
@@ -136,8 +160,8 @@ xfs_mount_init(void)
  */
 void
 xfs_mount_free(
-	xfs_mount_t *mp,
-	int	    remove_bhv)
+	xfs_mount_t	*mp,
+	int		remove_bhv)
 {
 	if (mp->m_ihash)
 		xfs_ihash_free(mp);
@@ -177,6 +201,7 @@ xfs_mount_free(
 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	}
 
+	xfs_icsb_destroy_counters(mp);
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
@@ -527,6 +552,10 @@ xfs_readsb(xfs_mount_t *mp)
 		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 	}
 
+	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+
 	mp->m_sb_bp = bp;
 	xfs_buf_relse(bp);
 	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
@@ -1154,6 +1183,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 	sbp = xfs_getsb(mp, 0);
 	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
 		XFS_FORCED_SHUTDOWN(mp))) {
+
+		xfs_icsb_sync_counters(mp);
+
 		/*
 		 * mark shared-readonly if desired
 		 */
@@ -1227,7 +1259,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
 
 	xfs_trans_log_buf(tp, bp, first, last);
 }
-
 /*
  * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
  * a delta to a specified field in the in-core superblock.  Simply
@@ -1237,7 +1268,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
  *
  * The SB_LOCK must be held when this routine is called.
  */
-STATIC int
+int
 xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
 			int delta, int rsvd)
 {
@@ -1406,9 +1437,26 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
 	unsigned long	s;
 	int	status;
 
-	s = XFS_SB_LOCK(mp);
-	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-	XFS_SB_UNLOCK(mp, s);
+	/* check for per-cpu counters */
+	switch (field) {
+#ifdef HAVE_PERCPU_SB
+	case XFS_SBS_ICOUNT:
+	case XFS_SBS_IFREE:
+	case XFS_SBS_FDBLOCKS:
+		if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+			status = xfs_icsb_modify_counters(mp, field,
+							delta, rsvd);
+			break;
+		}
+		/* FALLTHROUGH */
+#endif
+	default:
+		s = XFS_SB_LOCK(mp);
+		status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+		XFS_SB_UNLOCK(mp, s);
+		break;
+	}
+
 	return status;
 }
 
@@ -1445,8 +1493,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 		 * from the loop so we'll fall into the undo loop
 		 * below.
 		 */
-		status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-						    msbp->msb_delta, rsvd);
+		switch (msbp->msb_field) {
+#ifdef HAVE_PERCPU_SB
+		case XFS_SBS_ICOUNT:
+		case XFS_SBS_IFREE:
+		case XFS_SBS_FDBLOCKS:
+			if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+				status = xfs_icsb_modify_counters_locked(mp,
+							msbp->msb_field,
+							msbp->msb_delta, rsvd);
+				break;
+			}
+			/* FALLTHROUGH */
+#endif
+		default:
+			status = xfs_mod_incore_sb_unlocked(mp,
+						msbp->msb_field,
+						msbp->msb_delta, rsvd);
+			break;
+		}
+
 		if (status != 0) {
 			break;
 		}
@@ -1463,8 +1529,28 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 	if (status != 0) {
 		msbp--;
 		while (msbp >= msb) {
-			status = xfs_mod_incore_sb_unlocked(mp,
-				    msbp->msb_field, -(msbp->msb_delta), rsvd);
+			switch (msbp->msb_field) {
+#ifdef HAVE_PERCPU_SB
+			case XFS_SBS_ICOUNT:
+			case XFS_SBS_IFREE:
+			case XFS_SBS_FDBLOCKS:
+				if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
+					status =
+					    xfs_icsb_modify_counters_locked(mp,
+							msbp->msb_field,
+							-(msbp->msb_delta),
+							rsvd);
+					break;
+				}
+				/* FALLTHROUGH */
+#endif
+			default:
+				status = xfs_mod_incore_sb_unlocked(mp,
+							msbp->msb_field,
+							-(msbp->msb_delta),
+							rsvd);
+				break;
+			}
 			ASSERT(status == 0);
 			msbp--;
 		}
@@ -1577,3 +1663,445 @@ xfs_mount_log_sbunit(
 	xfs_mod_sb(tp, fields);
 	xfs_trans_commit(tp, 0, NULL);
 }
+
+
+#ifdef HAVE_PERCPU_SB
+/*
+ * Per-cpu incore superblock counters
+ *
+ * Simple concept, difficult implementation
+ *
+ * Basically, replace the incore superblock counters with a distributed per cpu
+ * counter for contended fields (e.g.  free block count).
+ *
+ * Difficulties arise in that the incore sb is used for ENOSPC checking, and
+ * hence needs to be accurately read when we are running low on space. Hence
+ * there is a method to enable and disable the per-cpu counters based on how
+ * much "stuff" is available in them.
+ *
+ * Basically, a counter is enabled if there is enough free resource to justify
+ * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
+ * ENOSPC), then we disable the counters to synchronise all callers and
+ * re-distribute the available resources.
+ *
+ * If, once we redistributed the available resources, we still get a failure,
+ * we disable the per-cpu counter and go through the slow path.
+ *
+ * The slow path is the current xfs_mod_incore_sb() function.  This means that
+ * when we disable a per-cpu counter, we need to drain it's resources back to
+ * the global superblock. We do this after disabling the counter to prevent
+ * more threads from queueing up on the counter.
+ *
+ * Essentially, this means that we still need a lock in the fast path to enable
+ * synchronisation between the global counters and the per-cpu counters. This
+ * is not a problem because the lock will be local to a CPU almost all the time
+ * and have little contention except when we get to ENOSPC conditions.
+ *
+ * Basically, this lock becomes a barrier that enables us to lock out the fast
+ * path while we do things like enabling and disabling counters and
+ * synchronising the counters.
+ *
+ * Locking rules:
+ *
+ * 	1. XFS_SB_LOCK() before picking up per-cpu locks
+ * 	2. per-cpu locks always picked up via for_each_online_cpu() order
+ * 	3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
+ * 	4. modifying per-cpu counters requires holding per-cpu lock
+ * 	5. modifying global counters requires holding XFS_SB_LOCK
+ *	6. enabling or disabling a counter requires holding the XFS_SB_LOCK
+ *	   and _none_ of the per-cpu locks.
+ *
+ * Disabled counters are only ever re-enabled by a balance operation
+ * that results in more free resources per CPU than a given threshold.
+ * To ensure counters don't remain disabled, they are rebalanced when
+ * the global resource goes above a higher threshold (i.e. some hysteresis
+ * is present to prevent thrashing).
+ *
+ * Note: hotplug CPUs not yet supported
+ */
+int
+xfs_icsb_init_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
+	if (mp->m_sb_cnts == NULL)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_lock_init(&cntp->icsb_lock);
+	}
+	/*
+	 * start with all counters disabled so that the
+	 * initial balance kicks us off correctly
+	 */
+	mp->m_icsb_counters = -1;
+	return 0;
+}
+
+STATIC void
+xfs_icsb_destroy_counters(
+	xfs_mount_t	*mp)
+{
+	if (mp->m_sb_cnts)
+		free_percpu(mp->m_sb_cnts);
+}
+
+
+STATIC inline void
+xfs_icsb_lock_all_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_lock(&cntp->icsb_lock);
+	}
+}
+
+STATIC inline void
+xfs_icsb_unlock_all_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		spin_unlock(&cntp->icsb_lock);
+	}
+}
+
+STATIC void
+xfs_icsb_count(
+	xfs_mount_t	*mp,
+	xfs_icsb_cnts_t	*cnt,
+	int		flags)
+{
+	xfs_icsb_cnts_t *cntp;
+	int		i;
+
+	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
+
+	if (!(flags & XFS_ICSB_LAZY_COUNT))
+		xfs_icsb_lock_all_counters(mp);
+
+	for_each_online_cpu(i) {
+		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
+		cnt->icsb_icount += cntp->icsb_icount;
+		cnt->icsb_ifree += cntp->icsb_ifree;
+		cnt->icsb_fdblocks += cntp->icsb_fdblocks;
+	}
+
+	if (!(flags & XFS_ICSB_LAZY_COUNT))
+		xfs_icsb_unlock_all_counters(mp);
+}
+
+STATIC int
+xfs_icsb_counter_disabled(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field)
+{
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+	return test_bit(field, &mp->m_icsb_counters);
+}
+
+STATIC int
+xfs_icsb_disable_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field)
+{
+	xfs_icsb_cnts_t	cnt;
+
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+
+	xfs_icsb_lock_all_counters(mp);
+	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
+		/* drain back to superblock */
+
+		xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
+		switch(field) {
+		case XFS_SBS_ICOUNT:
+			mp->m_sb.sb_icount = cnt.icsb_icount;
+			break;
+		case XFS_SBS_IFREE:
+			mp->m_sb.sb_ifree = cnt.icsb_ifree;
+			break;
+		case XFS_SBS_FDBLOCKS:
+			mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	xfs_icsb_unlock_all_counters(mp);
+
+	return 0;
+}
+
+STATIC void
+xfs_icsb_enable_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	uint64_t	count,
+	uint64_t	resid)
+{
+	xfs_icsb_cnts_t	*cntp;
+	int		i;
+
+	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+
+	xfs_icsb_lock_all_counters(mp);
+	for_each_online_cpu(i) {
+		cntp = per_cpu_ptr(mp->m_sb_cnts, i);
+		switch (field) {
+		case XFS_SBS_ICOUNT:
+			cntp->icsb_icount = count + resid;
+			break;
+		case XFS_SBS_IFREE:
+			cntp->icsb_ifree = count + resid;
+			break;
+		case XFS_SBS_FDBLOCKS:
+			cntp->icsb_fdblocks = count + resid;
+			break;
+		default:
+			BUG();
+			break;
+		}
+		resid = 0;
+	}
+	clear_bit(field, &mp->m_icsb_counters);
+	xfs_icsb_unlock_all_counters(mp);
+}
+
+STATIC void
+xfs_icsb_sync_counters_int(
+	xfs_mount_t	*mp,
+	int		flags)
+{
+	xfs_icsb_cnts_t	cnt;
+	int		s;
+
+	/* Pass 1: lock all counters */
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		s = XFS_SB_LOCK(mp);
+
+	xfs_icsb_count(mp, &cnt, flags);
+
+	/* Step 3: update mp->m_sb fields */
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
+		mp->m_sb.sb_icount = cnt.icsb_icount;
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
+		mp->m_sb.sb_ifree = cnt.icsb_ifree;
+	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
+		mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
+
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		XFS_SB_UNLOCK(mp, s);
+}
+
+/*
+ * Accurate update of per-cpu counters to incore superblock
+ */
+STATIC void
+xfs_icsb_sync_counters(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_sync_counters_int(mp, 0);
+}
+
+/*
+ * lazy addition used for things like df, background sb syncs, etc
+ */
+void
+xfs_icsb_sync_counters_lazy(
+	xfs_mount_t	*mp)
+{
+	xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
+}
+
+/*
+ * Balance and enable/disable counters as necessary.
+ *
+ * Thresholds for re-enabling counters are somewhat magic.
+ * inode counts are chosen to be the same number as single
+ * on disk allocation chunk per CPU, and free blocks is
+ * something far enough zero that we aren't going thrash
+ * when we get near ENOSPC.
+ */
+#define XFS_ICSB_INO_CNTR_REENABLE	64
+#define XFS_ICSB_FDBLK_CNTR_REENABLE	512
+STATIC void
+xfs_icsb_balance_counter(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t  field,
+	int		flags)
+{
+	uint64_t	count, resid = 0;
+	int		weight = num_online_cpus();
+	int		s;
+
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		s = XFS_SB_LOCK(mp);
+
+	/* disable counter and sync counter */
+	xfs_icsb_disable_counter(mp, field);
+
+	/* update counters  - first CPU gets residual*/
+	switch (field) {
+	case XFS_SBS_ICOUNT:
+		count = mp->m_sb.sb_icount;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_INO_CNTR_REENABLE)
+			goto out;
+		break;
+	case XFS_SBS_IFREE:
+		count = mp->m_sb.sb_ifree;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_INO_CNTR_REENABLE)
+			goto out;
+		break;
+	case XFS_SBS_FDBLOCKS:
+		count = mp->m_sb.sb_fdblocks;
+		resid = do_div(count, weight);
+		if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
+			goto out;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	xfs_icsb_enable_counter(mp, field, count, resid);
+out:
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		XFS_SB_UNLOCK(mp, s);
+}
+
+STATIC int
+xfs_icsb_modify_counters_int(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd,
+	int		flags)
+{
+	xfs_icsb_cnts_t	*icsbp;
+	long long	lcounter;	/* long counter for 64 bit fields */
+	int		cpu, s, locked = 0;
+	int		ret = 0, balance_done = 0;
+
+again:
+	cpu = get_cpu();
+	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
+	spin_lock(&icsbp->icsb_lock);
+	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
+		goto slow_path;
+
+	switch (field) {
+	case XFS_SBS_ICOUNT:
+		lcounter = icsbp->icsb_icount;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_icount = lcounter;
+		break;
+
+	case XFS_SBS_IFREE:
+		lcounter = icsbp->icsb_ifree;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_ifree = lcounter;
+		break;
+
+	case XFS_SBS_FDBLOCKS:
+		BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
+
+		lcounter = icsbp->icsb_fdblocks;
+		lcounter += delta;
+		if (unlikely(lcounter < 0))
+			goto slow_path;
+		icsbp->icsb_fdblocks = lcounter;
+		break;
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock(&icsbp->icsb_lock);
+	put_cpu();
+	if (locked)
+		XFS_SB_UNLOCK(mp, s);
+	return 0;
+
+	/*
+	 * The slow path needs to be run with the SBLOCK
+	 * held so that we prevent other threads from
+	 * attempting to run this path at the same time.
+	 * this provides exclusion for the balancing code,
+	 * and exclusive fallback if the balance does not
+	 * provide enough resources to continue in an unlocked
+	 * manner.
+	 */
+slow_path:
+	spin_unlock(&icsbp->icsb_lock);
+	put_cpu();
+
+	/* need to hold superblock incase we need
+	 * to disable a counter */
+	if (!(flags & XFS_ICSB_SB_LOCKED)) {
+		s = XFS_SB_LOCK(mp);
+		locked = 1;
+		flags |= XFS_ICSB_SB_LOCKED;
+	}
+	if (!balance_done) {
+		xfs_icsb_balance_counter(mp, field, flags);
+		balance_done = 1;
+		goto again;
+	} else {
+		/*
+		 * we might not have enough on this local
+		 * cpu to allocate for a bulk request.
+		 * We need to drain this field from all CPUs
+		 * and disable the counter fastpath
+		 */
+		xfs_icsb_disable_counter(mp, field);
+	}
+
+	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+
+	if (locked)
+		XFS_SB_UNLOCK(mp, s);
+	return ret;
+}
+
+STATIC int
+xfs_icsb_modify_counters(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd)
+{
+	return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
+}
+
+/*
+ * Called when superblock is already locked
+ */
+STATIC int
+xfs_icsb_modify_counters_locked(
+	xfs_mount_t	*mp,
+	xfs_sb_field_t	field,
+	int		delta,
+	int		rsvd)
+{
+	return xfs_icsb_modify_counters_int(mp, field, delta,
+						rsvd, XFS_ICSB_SB_LOCKED);
+}
+#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4c9817a8043..7cca5110ca4 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -267,6 +267,32 @@ typedef struct xfs_ioops {
 #define XFS_IODONE(vfsp) \
 	(*(mp)->m_io_ops.xfs_iodone)(vfsp)
 
+#ifdef HAVE_PERCPU_SB
+
+/*
+ * Valid per-cpu incore superblock counters. Note that if you add new counters,
+ * you may need to define new counter disabled bit field descriptors as there
+ * are more possible fields in the superblock that can fit in a bitfield on a
+ * 32 bit platform. The XFS_SBS_* values for the current current counters just
+ * fit.
+ */
+typedef struct xfs_icsb_cnts {
+	uint64_t	icsb_fdblocks;
+	uint64_t	icsb_ifree;
+	uint64_t	icsb_icount;
+	spinlock_t	icsb_lock;
+} xfs_icsb_cnts_t;
+
+#define XFS_ICSB_SB_LOCKED	(1 << 0)	/* sb already locked */
+#define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
+
+extern int	xfs_icsb_init_counters(struct xfs_mount *);
+extern void	xfs_icsb_sync_counters_lazy(struct xfs_mount *);
+
+#else
+#define xfs_icsb_init_counters(mp)	(0)
+#define xfs_icsb_sync_counters_lazy(mp)	do { } while (0)
+#endif
 
 typedef struct xfs_mount {
 	bhv_desc_t		m_bhv;		/* vfs xfs behavior */
@@ -372,6 +398,10 @@ typedef struct xfs_mount {
 	struct xfs_qmops	m_qm_ops;	/* vector of XQM ops */
 	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
 	atomic_t		m_active_trans;	/* number trans frozen */
+#ifdef HAVE_PERCPU_SB
+	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
+	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
+#endif
 } xfs_mount_t;
 
 /*
@@ -409,6 +439,8 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
 #define XFS_MOUNT_COMPAT_IOSIZE	(1ULL << 22)	/* don't report large preferred
 						 * I/O size in stat() */
+#define XFS_MOUNT_NO_PERCPU_SB	(1ULL << 23)	/* don't use per-cpu superblock
+						   counters */
 
 
 /*
@@ -546,6 +578,8 @@ extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
 extern int	xfs_unmountfs_writesb(xfs_mount_t *);
 extern int	xfs_unmount_flush(xfs_mount_t *, int);
 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+extern int	xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
+			int, int);
 extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
 			uint, int);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 2a0a9efb8cc..2e104583788 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -55,7 +55,7 @@
 #include "xfs_clnt.h"
 #include "xfs_fsops.h"
 
-STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
+STATIC int	xfs_sync(bhv_desc_t *, int, cred_t *);
 
 int
 xfs_init(void)
@@ -807,6 +807,7 @@ xfs_statvfs(
 
 	statp->f_type = XFS_SB_MAGIC;
 
+	xfs_icsb_sync_counters_lazy(mp);
 	s = XFS_SB_LOCK(mp);
 	statp->f_bsize = sbp->sb_blocksize;
 	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-- 
cgit v1.2.3


From 8758280fcc6129be89503efe93bb59eaf2f85d28 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:18:19 +1100
Subject: [XFS] Cleanup the use of zones/slabs, more consistent and allows
 flags to be passed.

SGI-PV: 949073
SGI-Modid: xfs-linux-melb:xfs-kern:25122a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/kmem.h      | 91 ++++++++++++++++++++++++++------------------
 fs/xfs/linux-2.6/xfs_buf.c   |  7 ++--
 fs/xfs/linux-2.6/xfs_super.c | 38 +++++++++---------
 fs/xfs/xfs_trans.h           |  2 +-
 fs/xfs/xfs_vfsops.c          | 34 ++++++++++++-----
 5 files changed, 100 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index c64a29cdfff..f0268a84e6f 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -23,17 +23,8 @@
 #include <linux/mm.h>
 
 /*
- * memory management routines
+ * Process flags handling
  */
-#define KM_SLEEP	0x0001u
-#define KM_NOSLEEP	0x0002u
-#define KM_NOFS		0x0004u
-#define KM_MAYFAIL	0x0008u
-
-#define	kmem_zone	kmem_cache
-#define kmem_zone_t	struct kmem_cache
-
-typedef unsigned long xfs_pflags_t;
 
 #define PFLAGS_TEST_NOIO()              (current->flags & PF_NOIO)
 #define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
@@ -67,74 +58,102 @@ typedef unsigned long xfs_pflags_t;
 	*(NSTATEP) = *(OSTATEP);	\
 } while (0)
 
-static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP	0x0001u
+#define KM_NOSLEEP	0x0002u
+#define KM_NOFS		0x0004u
+#define KM_MAYFAIL	0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
 {
-	gfp_t	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
+	gfp_t	lflags;
 
-#ifdef DEBUG
-	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
-		printk(KERN_WARNING
-		    "XFS: memory allocation with wrong flags (%x)\n", flags);
-		BUG();
-	}
-#endif
+	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
 
 	if (flags & KM_NOSLEEP) {
-		lflags |= GFP_ATOMIC;
+		lflags = GFP_ATOMIC | __GFP_NOWARN;
 	} else {
-		lflags |= GFP_KERNEL;
-
-		/* avoid recusive callbacks to filesystem during transactions */
+		lflags = GFP_KERNEL | __GFP_NOWARN;
 		if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
 			lflags &= ~__GFP_FS;
 	}
-
-        return lflags;
+	return lflags;
 }
 
-static __inline kmem_zone_t *
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void  kmem_free(void *, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD	0
+
+#define kmem_zone	kmem_cache
+#define kmem_zone_t	struct kmem_cache
+
+static inline kmem_zone_t *
 kmem_zone_init(int size, char *zone_name)
 {
 	return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
 }
 
-static __inline void
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+		     void (*construct)(void *, kmem_zone_t *, unsigned long))
+{
+	return kmem_cache_create(zone_name, size, 0, flags, construct, NULL);
+}
+
+static inline void
 kmem_zone_free(kmem_zone_t *zone, void *ptr)
 {
 	kmem_cache_free(zone, ptr);
 }
 
-static __inline void
+static inline void
 kmem_zone_destroy(kmem_zone_t *zone)
 {
 	if (zone && kmem_cache_destroy(zone))
 		BUG();
 }
 
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void  kmem_free(void *, size_t);
+/*
+ * Low memory cache shrinkers
+ */
 
 typedef struct shrinker *kmem_shaker_t;
 typedef int (*kmem_shake_func_t)(int, gfp_t);
 
-static __inline kmem_shaker_t
+static inline kmem_shaker_t
 kmem_shake_register(kmem_shake_func_t sfunc)
 {
 	return set_shrinker(DEFAULT_SEEKS, sfunc);
 }
 
-static __inline void
+static inline void
 kmem_shake_deregister(kmem_shaker_t shrinker)
 {
 	remove_shrinker(shrinker);
 }
 
-static __inline int
+static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
 	return (gfp_mask & __GFP_WAIT);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bfb4f2917bb..cdb905ab4db 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1805,13 +1805,12 @@ xfs_flush_buftarg(
 int __init
 xfs_buf_init(void)
 {
-	int		error = -ENOMEM;
-
 #ifdef XFS_BUF_TRACE
 	xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
 #endif
 
-	xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+	xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+						KM_ZONE_HWALIGN, NULL);
 	if (!xfs_buf_zone)
 		goto out_free_trace_buf;
 
@@ -1839,7 +1838,7 @@ xfs_buf_init(void)
 #ifdef XFS_BUF_TRACE
 	ktrace_free(xfs_buf_trace_buf);
 #endif
-	return error;
+	return -ENOMEM;
 }
 
 void
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 59989f6f83e..0c7ed4b29c5 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -337,8 +337,8 @@ linvfs_alloc_inode(
 {
 	vnode_t			*vp;
 
-	vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
-	if (!vp)
+	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
+	if (unlikely(!vp))
 		return NULL;
 	return LINVFS_GET_IP(vp);
 }
@@ -352,23 +352,21 @@ linvfs_destroy_inode(
 
 STATIC void
 linvfs_inode_init_once(
-	void			*data,
-	kmem_cache_t		*cachep,
+	void			*vnode,
+	kmem_zone_t		*zonep,
 	unsigned long		flags)
 {
-	vnode_t			*vp = (vnode_t *)data;
-
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(LINVFS_GET_IP(vp));
+		      SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(LINVFS_GET_IP((vnode_t *)vnode));
 }
 
 STATIC int
-linvfs_init_zones(void)
+xfs_init_zones(void)
 {
-	xfs_vnode_zone = kmem_cache_create("xfs_vnode",
-				sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
-				linvfs_inode_init_once, NULL);
+	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM,
+					linvfs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
 
@@ -377,14 +375,12 @@ linvfs_init_zones(void)
 		goto out_destroy_vnode_zone;
 
 	xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
-			mempool_alloc_slab, mempool_free_slab,
-			xfs_ioend_zone);
+					mempool_alloc_slab, mempool_free_slab,
+					xfs_ioend_zone);
 	if (!xfs_ioend_pool)
 		goto out_free_ioend_zone;
-
 	return 0;
 
-
  out_free_ioend_zone:
 	kmem_zone_destroy(xfs_ioend_zone);
  out_destroy_vnode_zone:
@@ -394,7 +390,7 @@ linvfs_init_zones(void)
 }
 
 STATIC void
-linvfs_destroy_zones(void)
+xfs_destroy_zones(void)
 {
 	mempool_destroy(xfs_ioend_pool);
 	kmem_zone_destroy(xfs_vnode_zone);
@@ -405,7 +401,7 @@ linvfs_destroy_zones(void)
  * Attempt to flush the inode, this will actually fail
  * if the inode is pinned, but we dirty the inode again
  * at the point when it is unpinned after a log write,
- * since this is when the inode itself becomes flushable. 
+ * since this is when the inode itself becomes flushable.
  */
 STATIC int
 linvfs_write_inode(
@@ -963,7 +959,7 @@ init_xfs_fs( void )
 
 	ktrace_init(64);
 
-	error = linvfs_init_zones();
+	error = xfs_init_zones();
 	if (error < 0)
 		goto undo_zones;
 
@@ -986,7 +982,7 @@ undo_register:
 	xfs_buf_terminate();
 
 undo_buffers:
-	linvfs_destroy_zones();
+	xfs_destroy_zones();
 
 undo_zones:
 	return error;
@@ -1000,7 +996,7 @@ exit_xfs_fs( void )
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
 	xfs_buf_terminate();
-	linvfs_destroy_zones();
+	xfs_destroy_zones();
 	ktrace_uninit();
 }
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index d77901c07f6..e48befa4e33 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -380,7 +380,7 @@ typedef struct xfs_trans {
 	xfs_trans_header_t	t_header;	/* header for in-log trans */
 	unsigned int		t_busy_free;	/* busy descs free */
 	xfs_log_busy_chunk_t	t_busy;		/* busy/async free blocks */
-        xfs_pflags_t            t_pflags;       /* saved pflags state */
+	unsigned long		t_pflags;	/* saved process flags state */
 } xfs_trans_t;
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 2e104583788..5dd84fe609c 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -77,11 +77,12 @@ xfs_init(void)
 						 "xfs_bmap_free_item");
 	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
 					    "xfs_btree_cur");
-	xfs_inode_zone = kmem_zone_init(sizeof(xfs_inode_t), "xfs_inode");
 	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
 	xfs_da_state_zone =
 		kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
 	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
 
 	/*
 	 * The size of the zone allocated buf log item is the maximum
@@ -93,17 +94,30 @@ xfs_init(void)
 				(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
 				  NBWORD) * sizeof(int))),
 			       "xfs_buf_item");
-	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-				       ((XFS_EFD_MAX_FAST_EXTENTS - 1) * sizeof(xfs_extent_t))),
+	xfs_efd_zone =
+		kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+			       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))),
 				      "xfs_efd_item");
-	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-				       ((XFS_EFI_MAX_FAST_EXTENTS - 1) * sizeof(xfs_extent_t))),
+	xfs_efi_zone =
+		kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+			       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))),
 				      "xfs_efi_item");
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-	xfs_ili_zone = kmem_zone_init(sizeof(xfs_inode_log_item_t), "xfs_ili");
-	xfs_chashlist_zone = kmem_zone_init(sizeof(xfs_chashlist_t),
-					    "xfs_chashlist");
-	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
+
+	/*
+	 * These zones warrant special memory allocator hints
+	 */
+	xfs_inode_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD, NULL);
+	xfs_ili_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+					KM_ZONE_SPREAD, NULL);
+	xfs_chashlist_zone =
+		kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist",
+					KM_ZONE_SPREAD, NULL);
 
 	/*
 	 * Allocate global trace buffers.
-- 
cgit v1.2.3


From 20722a91921bd9e9d4ba5c8f801d143e72e58418 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:19:08 +1100
Subject: [XFS] Fix a mutex_destroy diagnostic about a locked-mutex-on-destroy
 from quota code.

SGI-PV: 949149
SGI-Modid: xfs-linux-melb:xfs-kern:25123a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7c0e39dc618..fd4abb8a32c 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -2788,9 +2788,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 		xfs_qm_dqdestroy(dqp);
 		dqp = nextdqp;
 	}
-	/*
-	 * Don't bother about unlocking.
-	 */
+	mutex_unlock(&ql->qh_lock);
 	mutex_destroy(&ql->qh_lock);
 
 	ASSERT(ql->qh_nelems == 0);
-- 
cgit v1.2.3


From e0cc2325d151c3f4f3276b2deda734faf742146f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:19:55 +1100
Subject: [XFS] Flag the XFS inode cache as in need of spreading also.

SGI-PV: 949073
SGI-Modid: xfs-linux-melb:xfs-kern:25170a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0c7ed4b29c5..4d8613f65c2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -365,7 +365,8 @@ STATIC int
 xfs_init_zones(void)
 {
 	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
-					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM,
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD,
 					linvfs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
-- 
cgit v1.2.3


From d2c32edf64a7e9bc8dfb5cb3a8f7bf7be94c93ae Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:20:13 +1100
Subject: [XFS] When compiling with gcc 4.0 and CONFIG_SMP unset, there are
 many warnings along the lines: xfs_linux.h:103:5: warning: "CONFIG_SMP" is
 not defined.

SGI-PV: 946630
SGI-Modid: xfs-linux-melb:xfs-kern:25171a

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 377a9f54a04..e2be64982bc 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -100,7 +100,7 @@
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
-#if CONFIG_SMP
+#ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
 #else
 #undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
-- 
cgit v1.2.3


From 2d0f864be3266eb0a8b4b48f36e2f777eace00b3 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:20:33 +1100
Subject: [XFS] Make headers compile for more compiler variants; minor cleanup.

SGI-PV: 949432
SGI-Modid: xfs-linux-melb:xfs-kern:25184a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2.h   | 27 +++++++++++----------------
 fs/xfs/xfs_dir_sf.h | 24 +++++++++++++-----------
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 3158f5dc431..7dd364b1e03 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -55,16 +55,16 @@ typedef	__uint32_t	xfs_dir2_db_t;
 /*
  * Byte offset in a directory.
  */
-typedef	xfs_off_t		xfs_dir2_off_t;
+typedef	xfs_off_t	xfs_dir2_off_t;
 
 /*
  * For getdents, argument struct for put routines.
  */
 typedef int (*xfs_dir2_put_t)(struct xfs_dir2_put_args *pa);
 typedef struct xfs_dir2_put_args {
-	xfs_off_t		cook;		/* cookie of (next) entry */
+	xfs_off_t	cook;		/* cookie of (next) entry */
 	xfs_intino_t	ino;		/* inode number */
-	struct xfs_dirent	*dbp;		/* buffer pointer */
+	xfs_dirent_t	*dbp;		/* buffer pointer */
 	char		*name;		/* directory entry name */
 	int		namelen;	/* length of name */
 	int		done;		/* output: set if value was stored */
@@ -75,18 +75,13 @@ typedef struct xfs_dir2_put_args {
 /*
  * Other interfaces used by the rest of the dir v2 code.
  */
-extern int
-	xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
-			    xfs_dir2_db_t *dbp);
-
-extern int
-	xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
-
-extern int
-	xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
-
-extern int
-	xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-			      struct xfs_dabuf *bp);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+				xfs_dir2_db_t *dbp);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
+				int *vp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+				struct xfs_dabuf *bp);
 
 #endif	/* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
index fe44c6f4d56..5b20b4d3f57 100644
--- a/fs/xfs/xfs_dir_sf.h
+++ b/fs/xfs/xfs_dir_sf.h
@@ -35,19 +35,21 @@ typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
  * and the elements much be memcpy'd out into a work area to get correct
  * alignment for the inode number fields.
  */
+typedef struct xfs_dir_sf_hdr {		/* constant-structure header block */
+	xfs_dir_ino_t	parent;		/* parent dir inode number */
+	__uint8_t	count;		/* count of active entries */
+} xfs_dir_sf_hdr_t;
+
+typedef struct xfs_dir_sf_entry {
+	xfs_dir_ino_t	inumber;	/* referenced inode number */
+	__uint8_t	namelen;	/* actual length of name (no NULL) */
+	__uint8_t	name[1];	/* name */
+} xfs_dir_sf_entry_t;
+
 typedef struct xfs_dir_shortform {
-	struct xfs_dir_sf_hdr {		/* constant-structure header block */
-		xfs_dir_ino_t parent;	/* parent dir inode number */
-		__uint8_t count;	/* count of active entries */
-	} hdr;
-	struct xfs_dir_sf_entry {
-		xfs_dir_ino_t inumber;	/* referenced inode number */
-		__uint8_t namelen;	/* actual length of name (no NULL) */
-		__uint8_t name[1];	/* name */
-	} list[1];			/* variable sized array */
+	xfs_dir_sf_hdr_t	hdr;
+	xfs_dir_sf_entry_t	list[1];	/* variable sized array */
 } xfs_dir_shortform_t;
-typedef struct xfs_dir_sf_hdr xfs_dir_sf_hdr_t;
-typedef struct xfs_dir_sf_entry xfs_dir_sf_entry_t;
 
 /*
  * We generate this then sort it, so that readdirs are returned in
-- 
cgit v1.2.3


From e8234a6871aa0de1ed0aeeecb5230ecf3ab414e2 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:23:52 +1100
Subject: [XFS] Add support for hotplug CPUs to the per-CPU superblock counters
 by registering a notifier callback that listens to CPU up/down events to
 modify the counters appropriately.

SGI-PV: 949726
SGI-Modid: xfs-linux-melb:xfs-kern:25214a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |  2 ++
 fs/xfs/xfs_mount.c           | 74 ++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_mount.h           |  1 +
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e2be64982bc..9fdc14cffb7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -73,6 +73,8 @@
 #include <linux/list.h>
 #include <linux/proc_fs.h>
 #include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9b43b7b3d76..a64110b9023 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -60,6 +60,7 @@ STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
 						int, int);
 STATIC int	xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
 						int, int);
+STATIC int	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 
 #else
 
@@ -1716,9 +1717,72 @@ xfs_mount_log_sbunit(
  * To ensure counters don't remain disabled, they are rebalanced when
  * the global resource goes above a higher threshold (i.e. some hysteresis
  * is present to prevent thrashing).
+ */
+
+/*
+ * hot-plug CPU notifier support.
  *
- * Note: hotplug CPUs not yet supported
+ * We cannot use the hotcpu_register() function because it does
+ * not allow notifier instances. We need a notifier per filesystem
+ * as we need to be able to identify the filesystem to balance
+ * the counters out. This is acheived by having a notifier block
+ * embedded in the xfs_mount_t and doing pointer magic to get the
+ * mount pointer from the notifier block address.
  */
+STATIC int
+xfs_icsb_cpu_notify(
+	struct notifier_block *nfb,
+	unsigned long action,
+	void *hcpu)
+{
+	xfs_icsb_cnts_t *cntp;
+	xfs_mount_t	*mp;
+	int		s;
+
+	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
+	cntp = (xfs_icsb_cnts_t *)
+			per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
+	switch (action) {
+	case CPU_UP_PREPARE:
+		/* Easy Case - initialize the area and locks, and
+		 * then rebalance when online does everything else for us. */
+		spin_lock_init(&cntp->icsb_lock);
+		cntp->icsb_icount = 0;
+		cntp->icsb_ifree = 0;
+		cntp->icsb_fdblocks = 0;
+		break;
+	case CPU_ONLINE:
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+		break;
+	case CPU_DEAD:
+		/* Disable all the counters, then fold the dead cpu's
+		 * count into the total on the global superblock and
+		 * re-enable the counters. */
+		s = XFS_SB_LOCK(mp);
+		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
+		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
+		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
+
+		mp->m_sb.sb_icount += cntp->icsb_icount;
+		mp->m_sb.sb_ifree += cntp->icsb_ifree;
+		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
+
+		cntp->icsb_icount = 0;
+		cntp->icsb_ifree = 0;
+		cntp->icsb_fdblocks = 0;
+
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
+		XFS_SB_UNLOCK(mp, s);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 int
 xfs_icsb_init_counters(
 	xfs_mount_t	*mp)
@@ -1730,6 +1794,10 @@ xfs_icsb_init_counters(
 	if (mp->m_sb_cnts == NULL)
 		return -ENOMEM;
 
+	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
+	mp->m_icsb_notifier.priority = 0;
+	register_cpu_notifier(&mp->m_icsb_notifier);
+
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
 		spin_lock_init(&cntp->icsb_lock);
@@ -1746,8 +1814,10 @@ STATIC void
 xfs_icsb_destroy_counters(
 	xfs_mount_t	*mp)
 {
-	if (mp->m_sb_cnts)
+	if (mp->m_sb_cnts) {
+		unregister_cpu_notifier(&mp->m_icsb_notifier);
 		free_percpu(mp->m_sb_cnts);
+	}
 }
 
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7cca5110ca4..9d2ffbdc37a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -401,6 +401,7 @@ typedef struct xfs_mount {
 #ifdef HAVE_PERCPU_SB
 	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
 	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
+	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 #endif
 } xfs_mount_t;
 
-- 
cgit v1.2.3


From a780143ea53d26362b7cfb6666c8d04fb989bb7a Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:24:46 +1100
Subject: [XFS] UUID endianess fix.  uu_timelow is a 32bit field and needs to
 be swapped with be32_to_cpu.

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25232a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/support/uuid.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index a3d565a6773..e157015c70f 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -21,13 +21,6 @@ static mutex_t	uuid_monitor;
 static int	uuid_table_size;
 static uuid_t	*uuid_table;
 
-void
-uuid_init(void)
-{
-	mutex_init(&uuid_monitor);
-}
-
-
 /* IRIX interpretation of an uuid_t */
 typedef struct {
 	__be32	uu_timelow;
@@ -50,7 +43,7 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 
 	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
 		   be16_to_cpu(uup->uu_timemid);
-	fsid[1] = be16_to_cpu(uup->uu_timelow);
+	fsid[1] = be32_to_cpu(uup->uu_timelow);
 }
 
 void
@@ -139,3 +132,9 @@ uuid_table_remove(uuid_t *uuid)
 	ASSERT(i < uuid_table_size);
 	mutex_unlock(&uuid_monitor);
 }
+
+void
+uuid_init(void)
+{
+	mutex_init(&uuid_monitor);
+}
-- 
cgit v1.2.3


From fcce0f1f9ae8d49fd27d418428034a505816d395 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Tue, 14 Mar 2006 13:25:02 +1100
Subject: [XFS] forgot a couple of calls to XLOG_VEC_SET_TYPE when porting from
 irix to linux.

SGI-PV: 931456
SGI-Modid: xfs-linux-melb:xfs-kern:25238a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot_item.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2ec6b441849..e4e5f05b841 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -79,9 +79,11 @@ xfs_qm_dquot_logitem_format(
 
 	logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
 	logvec->i_len  = sizeof(xfs_dq_logformat_t);
+	XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT);
 	logvec++;
 	logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
 	logvec->i_len  = sizeof(xfs_disk_dquot_t);
+	XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT);
 
 	ASSERT(2 == logitem->qli_item.li_desc->lid_size);
 	logitem->qli_format.qlf_size = 2;
-- 
cgit v1.2.3


From 02d7c92334c84897d7d2840fc25e5896535766f9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:09 +1100
Subject: [XFS] Use XFS_VFSTOM in more places instead of open coding it.

SGI-PV: 947206
SGI-Modid: xfs-linux-melb:xfs-kern:25310a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index c855d62e534..a6b4084bda8 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -295,7 +295,7 @@ bhv_remove_all_vfsops(
 	bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
 	if (!freebase)
 		return;
-	mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+	mp = XFS_VFSTOM(vfsp);
 	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	xfs_mount_free(mp, 0);
 }
-- 
cgit v1.2.3


From f51623b21fe3068d12f0c5d39e02fd2549635a99 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:27 +1100
Subject: [XFS] Move some code around to avoid prototypes and prep for future
 writepages code.

SGI-PV: 950211
SGI-Modid: xfs-linux-melb:xfs-kern:25311a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 367 ++++++++++++++++++++++----------------------
 1 file changed, 183 insertions(+), 184 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 74d8be87f98..58fc7ade90b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -43,7 +43,32 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
-STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+
+STATIC void
+xfs_count_page_state(
+	struct page		*page,
+	int			*delalloc,
+	int			*unmapped,
+	int			*unwritten)
+{
+	struct buffer_head	*bh, *head;
+
+	*delalloc = *unmapped = *unwritten = 0;
+
+	bh = head = page_buffers(page);
+	do {
+		if (buffer_uptodate(bh) && !buffer_mapped(bh))
+			(*unmapped) = 1;
+		else if (buffer_unwritten(bh) && !buffer_delay(bh))
+			clear_buffer_unwritten(bh);
+		else if (buffer_unwritten(bh))
+			(*unwritten) = 1;
+		else if (buffer_delay(bh))
+			(*delalloc) = 1;
+	} while ((bh = bh->b_this_page) != head);
+}
+
+
 
 #if defined(XFS_RW_TRACE)
 void
@@ -1040,6 +1065,154 @@ error:
 	return err;
 }
 
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ *    state is cleared before we get here. In this case is it
+ *    conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+
+STATIC int
+linvfs_writepage(
+	struct page		*page,
+	struct writeback_control *wbc)
+{
+	int			error;
+	int			need_trans;
+	int			delalloc, unmapped, unwritten;
+	struct inode		*inode = page->mapping->host;
+
+	xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+
+	/*
+	 * We need a transaction if:
+	 *  1. There are delalloc buffers on the page
+	 *  2. The page is uptodate and we have unmapped buffers
+	 *  3. The page is uptodate and we have no buffers
+	 *  4. There are unwritten buffers on the page
+	 */
+
+	if (!page_has_buffers(page)) {
+		unmapped = 1;
+		need_trans = 1;
+	} else {
+		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+		if (!PageUptodate(page))
+			unmapped = 0;
+		need_trans = delalloc + unmapped + unwritten;
+	}
+
+	/*
+	 * If we need a transaction and the process flags say
+	 * we are already in a transaction, or no IO is allowed
+	 * then mark the page dirty again and leave the page
+	 * as is.
+	 */
+	if (PFLAGS_TEST_FSTRANS() && need_trans)
+		goto out_fail;
+
+	/*
+	 * Delay hooking up buffer heads until we have
+	 * made our go/no-go decision.
+	 */
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+	/*
+	 * Convert delayed allocate, unwritten or unmapped space
+	 * to real space and flush out to disk.
+	 */
+	error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
+	if (error == -EAGAIN)
+		goto out_fail;
+	if (unlikely(error < 0))
+		goto out_unlock;
+
+	return 0;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return 0;
+out_unlock:
+	unlock_page(page);
+	return error;
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ *    to via regular I/O. buffer heads will be dirty and possibly
+ *    delalloc. If no delalloc buffer heads in this case then we
+ *    can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ *    mmap, all we need to do is ensure there is no delalloc
+ *    state in the buffer heads, if not we can let the caller
+ *    free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+	struct page		*page,
+	gfp_t			gfp_mask)
+{
+	struct inode		*inode = page->mapping->host;
+	int			dirty, delalloc, unmapped, unwritten;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 1,
+	};
+
+	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+
+	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+	if (!delalloc && !unwritten)
+		goto free_buffers;
+
+	if (!(gfp_mask & __GFP_FS))
+		return 0;
+
+	/* If we are already inside a transaction or the thread cannot
+	 * do I/O, we cannot release this page.
+	 */
+	if (PFLAGS_TEST_FSTRANS())
+		return 0;
+
+	/*
+	 * Convert delalloc space to real space, do not flush the
+	 * data out to disk, that will be done by the caller.
+	 * Never need to allocate space here - we will always
+	 * come back to writepage in that case.
+	 */
+	dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
+	if (dirty == 0 && !unwritten)
+		goto free_buffers;
+	return 0;
+
+free_buffers:
+	return try_to_free_buffers(page);
+}
+
 STATIC int
 __linvfs_get_block(
 	struct inode		*inode,
@@ -1223,6 +1396,15 @@ linvfs_direct_IO(
 	return ret;
 }
 
+STATIC int
+linvfs_prepare_write(
+	struct file		*file,
+	struct page		*page,
+	unsigned int		from,
+	unsigned int		to)
+{
+	return block_prepare_write(page, from, to, linvfs_get_block);
+}
 
 STATIC sector_t
 linvfs_bmap(
@@ -1259,118 +1441,6 @@ linvfs_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
 }
 
-STATIC void
-xfs_count_page_state(
-	struct page		*page,
-	int			*delalloc,
-	int			*unmapped,
-	int			*unwritten)
-{
-	struct buffer_head	*bh, *head;
-
-	*delalloc = *unmapped = *unwritten = 0;
-
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_uptodate(bh) && !buffer_mapped(bh))
-			(*unmapped) = 1;
-		else if (buffer_unwritten(bh) && !buffer_delay(bh))
-			clear_buffer_unwritten(bh);
-		else if (buffer_unwritten(bh))
-			(*unwritten) = 1;
-		else if (buffer_delay(bh))
-			(*delalloc) = 1;
-	} while ((bh = bh->b_this_page) != head);
-}
-
-
-/*
- * writepage: Called from one of two places:
- *
- * 1. we are flushing a delalloc buffer head.
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- *    state is cleared before we get here. In this case is it
- *    conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-
-STATIC int
-linvfs_writepage(
-	struct page		*page,
-	struct writeback_control *wbc)
-{
-	int			error;
-	int			need_trans;
-	int			delalloc, unmapped, unwritten;
-	struct inode		*inode = page->mapping->host;
-
-	xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
-
-	/*
-	 * We need a transaction if:
-	 *  1. There are delalloc buffers on the page
-	 *  2. The page is uptodate and we have unmapped buffers
-	 *  3. The page is uptodate and we have no buffers
-	 *  4. There are unwritten buffers on the page
-	 */
-
-	if (!page_has_buffers(page)) {
-		unmapped = 1;
-		need_trans = 1;
-	} else {
-		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-		if (!PageUptodate(page))
-			unmapped = 0;
-		need_trans = delalloc + unmapped + unwritten;
-	}
-
-	/*
-	 * If we need a transaction and the process flags say
-	 * we are already in a transaction, or no IO is allowed
-	 * then mark the page dirty again and leave the page
-	 * as is.
-	 */
-	if (PFLAGS_TEST_FSTRANS() && need_trans)
-		goto out_fail;
-
-	/*
-	 * Delay hooking up buffer heads until we have
-	 * made our go/no-go decision.
-	 */
-	if (!page_has_buffers(page))
-		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
-	/*
-	 * Convert delayed allocate, unwritten or unmapped space
-	 * to real space and flush out to disk.
-	 */
-	error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
-	if (error == -EAGAIN)
-		goto out_fail;
-	if (unlikely(error < 0))
-		goto out_unlock;
-
-	return 0;
-
-out_fail:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-out_unlock:
-	unlock_page(page);
-	return error;
-}
-
 STATIC int
 linvfs_invalidate_page(
 	struct page		*page,
@@ -1381,77 +1451,6 @@ linvfs_invalidate_page(
 	return block_invalidatepage(page, offset);
 }
 
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
- * have buffer heads in this call.
- *
- * Returns 0 if the page is ok to release, 1 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- *    to via regular I/O. buffer heads will be dirty and possibly
- *    delalloc. If no delalloc buffer heads in this case then we
- *    can just return zero.
- *
- * 2. We are called to release a page which has been written via
- *    mmap, all we need to do is ensure there is no delalloc
- *    state in the buffer heads, if not we can let the caller
- *    free them and we should come back later via writepage.
- */
-STATIC int
-linvfs_release_page(
-	struct page		*page,
-	gfp_t			gfp_mask)
-{
-	struct inode		*inode = page->mapping->host;
-	int			dirty, delalloc, unmapped, unwritten;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 1,
-	};
-
-	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
-
-	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-	if (!delalloc && !unwritten)
-		goto free_buffers;
-
-	if (!(gfp_mask & __GFP_FS))
-		return 0;
-
-	/* If we are already inside a transaction or the thread cannot
-	 * do I/O, we cannot release this page.
-	 */
-	if (PFLAGS_TEST_FSTRANS())
-		return 0;
-
-	/*
-	 * Convert delalloc space to real space, do not flush the
-	 * data out to disk, that will be done by the caller.
-	 * Never need to allocate space here - we will always
-	 * come back to writepage in that case.
-	 */
-	dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
-	if (dirty == 0 && !unwritten)
-		goto free_buffers;
-	return 0;
-
-free_buffers:
-	return try_to_free_buffers(page);
-}
-
-STATIC int
-linvfs_prepare_write(
-	struct file		*file,
-	struct page		*page,
-	unsigned int		from,
-	unsigned int		to)
-{
-	return block_prepare_write(page, from, to, linvfs_get_block);
-}
-
 struct address_space_operations linvfs_aops = {
 	.readpage		= linvfs_readpage,
 	.readpages		= linvfs_readpages,
-- 
cgit v1.2.3


From 87cbc49cd4b773a972bce56c5dd09c4717f3285b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:26:43 +1100
Subject: [XFS] Add xfs_map_buffer helper, use it in a couple of places.

SGI-PV: 950211
SGI-Modid: xfs-linux-melb:xfs-kern:25312a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 49 +++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 58fc7ade90b..4b6bfdb8251 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -486,6 +486,26 @@ xfs_add_to_ioend(
 	ioend->io_size += bh->b_size;
 }
 
+STATIC void
+xfs_map_buffer(
+	struct buffer_head	*bh,
+	xfs_iomap_t		*mp,
+	xfs_off_t		offset,
+	uint			block_bits)
+{
+	sector_t		bn;
+
+	ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
+
+	bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) +
+	      ((offset - mp->iomap_offset) >> block_bits);
+
+	ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
+
+	bh->b_blocknr = bn;
+	set_buffer_mapped(bh);
+}
+
 STATIC void
 xfs_map_at_offset(
 	struct buffer_head	*bh,
@@ -493,22 +513,11 @@ xfs_map_at_offset(
 	int			block_bits,
 	xfs_iomap_t		*iomapp)
 {
-	xfs_daddr_t		bn;
-	int			sector_shift;
-
 	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
 	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
-	ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
-
-	sector_shift = block_bits - BBSHIFT;
-	bn = (iomapp->iomap_bn >> sector_shift) +
-	      ((offset - iomapp->iomap_offset) >> block_bits);
-
-	ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
-	ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
 
 	lock_buffer(bh);
-	bh->b_blocknr = bn;
+	xfs_map_buffer(bh, iomapp, offset, block_bits);
 	bh->b_bdev = iomapp->iomap_target->bt_bdev;
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
@@ -1246,21 +1255,13 @@ __linvfs_get_block(
 		return 0;
 
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
-		xfs_daddr_t	bn;
-		xfs_off_t	delta;
-
-		/* For unwritten extents do not report a disk address on
+		/*
+		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
 		 */
 		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
-			delta = offset - iomap.iomap_offset;
-			delta >>= inode->i_blkbits;
-
-			bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
-			bn += delta;
-			BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
-			bh_result->b_blocknr = bn;
-			set_buffer_mapped(bh_result);
+			xfs_map_buffer(bh_result, &iomap, offset,
+				       inode->i_blkbits);
 		}
 		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
 			if (direct)
-- 
cgit v1.2.3


From 01e1b69cfcdcfdd5b405165eaba29428f8b18a7c Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 14 Mar 2006 13:29:16 +1100
Subject: =?UTF-8?q?[XFS]=20using=20a=20spinlock=20per=20cpu=20for=20superb?=
 =?UTF-8?q?lock=20counter=20exclusion=20results=20in=20a=20pre=C4=93mpt=20?=
 =?UTF-8?q?counter=20overflow=20at=20256p=20and=20above.=20Change=20the=20?=
 =?UTF-8?q?exclusion=20mechanism=20to=20use=20atomic=20bit=20operations=20?=
 =?UTF-8?q?and=20busy=20wait=20loops=20to=20emulate=20the=20spin=20lock=20?=
 =?UTF-8?q?exclusion=20mechanism=20but=20without=20the=20preempt=20count?=
 =?UTF-8?q?=20issues.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SGI-PV: 950027
SGI-Modid: xfs-linux-melb:xfs-kern:25338a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h |  1 +
 fs/xfs/xfs_mount.c           | 37 ++++++++++++++++++++++++-------------
 fs/xfs/xfs_mount.h           |  4 +++-
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 9fdc14cffb7..bd88ccb0cad 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
 #include <linux/sort.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/delay.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a64110b9023..d62aee02736 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1746,10 +1746,7 @@ xfs_icsb_cpu_notify(
 	case CPU_UP_PREPARE:
 		/* Easy Case - initialize the area and locks, and
 		 * then rebalance when online does everything else for us. */
-		spin_lock_init(&cntp->icsb_lock);
-		cntp->icsb_icount = 0;
-		cntp->icsb_ifree = 0;
-		cntp->icsb_fdblocks = 0;
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 		break;
 	case CPU_ONLINE:
 		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
@@ -1769,9 +1766,7 @@ xfs_icsb_cpu_notify(
 		mp->m_sb.sb_ifree += cntp->icsb_ifree;
 		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
 
-		cntp->icsb_icount = 0;
-		cntp->icsb_ifree = 0;
-		cntp->icsb_fdblocks = 0;
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 
 		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
 		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
@@ -1800,7 +1795,7 @@ xfs_icsb_init_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_lock_init(&cntp->icsb_lock);
+		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 	}
 	/*
 	 * start with all counters disabled so that the
@@ -1820,6 +1815,22 @@ xfs_icsb_destroy_counters(
 	}
 }
 
+STATIC inline void
+xfs_icsb_lock_cntr(
+	xfs_icsb_cnts_t	*icsbp)
+{
+	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
+		ndelay(1000);
+	}
+}
+
+STATIC inline void
+xfs_icsb_unlock_cntr(
+	xfs_icsb_cnts_t	*icsbp)
+{
+	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
+}
+
 
 STATIC inline void
 xfs_icsb_lock_all_counters(
@@ -1830,7 +1841,7 @@ xfs_icsb_lock_all_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_lock(&cntp->icsb_lock);
+		xfs_icsb_lock_cntr(cntp);
 	}
 }
 
@@ -1843,7 +1854,7 @@ xfs_icsb_unlock_all_counters(
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-		spin_unlock(&cntp->icsb_lock);
+		xfs_icsb_unlock_cntr(cntp);
 	}
 }
 
@@ -2070,7 +2081,7 @@ xfs_icsb_modify_counters_int(
 again:
 	cpu = get_cpu();
 	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
-	spin_lock(&icsbp->icsb_lock);
+	xfs_icsb_lock_cntr(icsbp);
 	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
 		goto slow_path;
 
@@ -2104,7 +2115,7 @@ again:
 		BUG();
 		break;
 	}
-	spin_unlock(&icsbp->icsb_lock);
+	xfs_icsb_unlock_cntr(icsbp);
 	put_cpu();
 	if (locked)
 		XFS_SB_UNLOCK(mp, s);
@@ -2120,7 +2131,7 @@ again:
 	 * manner.
 	 */
 slow_path:
-	spin_unlock(&icsbp->icsb_lock);
+	xfs_icsb_unlock_cntr(icsbp);
 	put_cpu();
 
 	/* need to hold superblock incase we need
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9d2ffbdc37a..29cfcf0c11b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -280,9 +280,11 @@ typedef struct xfs_icsb_cnts {
 	uint64_t	icsb_fdblocks;
 	uint64_t	icsb_ifree;
 	uint64_t	icsb_icount;
-	spinlock_t	icsb_lock;
+	unsigned long	icsb_flags;
 } xfs_icsb_cnts_t;
 
+#define XFS_ICSB_FLAG_LOCK	(1 << 0)	/* counter lock bit */
+
 #define XFS_ICSB_SB_LOCKED	(1 << 0)	/* sb already locked */
 #define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
 
-- 
cgit v1.2.3


From 9f989c9455aac417c34af9c505e6b169055251da Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:29:32 +1100
Subject: [XFS] Additional mount time superblock validation checks.

SGI-PV: 950491
SGI-Modid: xfs-linux-melb:xfs-kern:25354a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_mount.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d62aee02736..20e8abc16d1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,9 +268,12 @@ xfs_mount_validate_sb(
 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
+	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
+	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
+	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
-	    sbp->sb_imax_pct > 100)) {
+	    (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) {
 		cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
 		XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
 				     XFS_ERRLEVEL_LOW, mp, sbp);
-- 
cgit v1.2.3


From 4eea22f01bb4fdba1aab4430c33adbe88d9d4985 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 13:29:52 +1100
Subject: [XFS] 929045 567344 This mod re-organizes some of the in-core file
 extent code to prepare for an upcoming mod which will introduce multi-level
 in-core extent allocations. Although the in-core extent management is using a
 new code path in this mod, the functionality remains the same. Major changes
 include:	- Introduce 10 new subroutines which re-orgainze the existing
 code but	do NOT change functionality: xfs_iext_get_ext()	  
 xfs_iext_insert()	     xfs_iext_add()  xfs_iext_remove()	  
 xfs_iext_remove_inline() xfs_iext_remove_direct()	
 xfs_iext_realloc_direct() xfs_iext_direct_to_inline()	   
 xfs_iext_inline_to_direct() xfs_iext_destroy() - Remove 2 subroutines
 (functionality moved to new subroutines above):	    xfs_iext_realloc()
 -replaced by xfs_iext_add() and xfs_iext_remove()	     
 xfs_bmap_insert_exlist() - replaced by xfs_iext_insert()	 
 xfs_bmap_delete_exlist() - replaced by xfs_iext_remove() - Replace all
 hard-coded (indexed) extent assignments with a call to	 xfs_iext_get_ext() -
 Replace all extent record pointer arithmetic (ep++, ep--, base + lastx,..)  
 with calls to xfs_iext_get_ext() - Update comments to remove the idea of a
 single "extent list" and   introduce "extent record" terminology instead

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207390a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm.c   |   9 +-
 fs/xfs/xfs_bmap.c       | 496 +++++++++++++++++++++---------------------------
 fs/xfs/xfs_bmap.h       |   4 +-
 fs/xfs/xfs_bmap_btree.c |  10 +-
 fs/xfs/xfs_inode.c      | 470 +++++++++++++++++++++++++++++++++------------
 fs/xfs/xfs_inode.h      |  18 +-
 6 files changed, 597 insertions(+), 410 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index fd4abb8a32c..1fb757ef3f4 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1704,9 +1704,9 @@ xfs_qm_get_rtblks(
 	xfs_qcnt_t	*O_rtblks)
 {
 	xfs_filblks_t	rtblks;			/* total rt blks */
+	xfs_extnum_t	idx;			/* extent record index */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
 	xfs_extnum_t	nextents;		/* number of extent entries */
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
 	int		error;
 
@@ -1717,10 +1717,11 @@ xfs_qm_get_rtblks(
 			return error;
 	}
 	rtblks = 0;
-	nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
-	for (ep = base; ep < &base[nextents]; ep++)
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	for (idx = 0; idx < nextents; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		rtblks += xfs_bmbt_get_blockcount(ep);
+	}
 	*O_rtblks = (xfs_qcnt_t)rtblks;
 	return 0;
 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 70625e577c7..53c47a181f8 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -89,7 +89,7 @@ xfs_bmap_add_attrfork_local(
 	int			*flags);	/* inode logging flags */
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after allocating space (or doing a delayed allocation).
  */
 STATIC int				/* error */
@@ -97,7 +97,7 @@ xfs_bmap_add_extent(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
@@ -113,7 +113,7 @@ xfs_bmap_add_extent_delay_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
@@ -129,7 +129,7 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
 	int			rsvd);	/* OK to allocate reserved blocks */
 
@@ -142,7 +142,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork); /* data or attr fork */
 
@@ -155,7 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp); /* inode logging flags */
 
 /*
@@ -169,7 +169,7 @@ xfs_bmap_alloc(
 /*
  * Transform a btree format file with only one leaf node, where the
  * extents list will fit in the inode, into an extents format file.
- * Since the extent list is already in-core, all we have to do is
+ * Since the file extents are already in-core, all we have to do is
  * give up the space for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
@@ -191,7 +191,7 @@ xfs_bmap_check_extents(
 #endif
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
  */
 STATIC int				/* error */
@@ -201,7 +201,7 @@ xfs_bmap_del_extent(
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
 	int			whichfork, /* data or attr fork */
 	int			rsvd);	 /* OK to allocate reserved blocks */
@@ -216,18 +216,6 @@ xfs_bmap_del_free(
 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
 	xfs_bmap_free_item_t	*free);	/* list item to be freed */
 
-/*
- * Remove count entries from the extents array for inode "ip", starting
- * at index "idx".  Copies the remaining items down over the deleted ones,
- * and gives back the excess memory.
- */
-STATIC void
-xfs_bmap_delete_exlist(
-	xfs_inode_t	*ip,		/* incode inode pointer */
-	xfs_extnum_t	idx,		/* starting delete index */
-	xfs_extnum_t	count,		/* count of items to delete */
-	int		whichfork);	/* data or attr fork */
-
 /*
  * Convert an extents-format file into a btree-format file.
  * The new file will have a root block (in the inode) and a single child block.
@@ -243,18 +231,6 @@ xfs_bmap_extents_to_btree(
 	int			*logflagsp,	/* inode logging flags */
 	int			whichfork);	/* data or attr fork */
 
-/*
- * Insert new item(s) in the extent list for inode "ip".
- * Count new items are inserted at offset idx.
- */
-STATIC void
-xfs_bmap_insert_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting index of new items */
-	xfs_extnum_t	count,		/* number of inserted items */
-	xfs_bmbt_irec_t	*new,		/* items to insert */
-	int		whichfork);	/* data or attr fork */
-
 /*
  * Convert a local file to an extents file.
  * This code is sort of bogus, since the file data needs to get
@@ -316,7 +292,7 @@ xfs_bmap_trace_addentry(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
+ * Add bmap trace entry prior to a call to xfs_iext_remove.
  */
 STATIC void
 xfs_bmap_trace_delete(
@@ -328,7 +304,7 @@ xfs_bmap_trace_delete(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
+ * Add bmap trace entry prior to a call to xfs_iext_insert, or
  * reading in the extents list from the disk (in the btree).
  */
 STATIC void
@@ -343,7 +319,7 @@ xfs_bmap_trace_insert(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry after updating an extent list entry in place.
+ * Add bmap trace entry after updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_post_update(
@@ -354,7 +330,7 @@ xfs_bmap_trace_post_update(
 	int		whichfork);	/* data or attr fork */
 
 /*
- * Add bmap trace entry prior to updating an extent list entry in place.
+ * Add bmap trace entry prior to updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_pre_update(
@@ -413,19 +389,24 @@ STATIC int
 xfs_bmap_count_tree(
 	xfs_mount_t     *mp,
 	xfs_trans_t     *tp,
+	xfs_ifork_t	*ifp,
 	xfs_fsblock_t   blockno,
 	int             levelin,
 	int		*count);
 
 STATIC int
 xfs_bmap_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	int			numrecs,
 	int			*count);
 
 STATIC int
 xfs_bmap_disk_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_mount_t		*mp,
+	xfs_extnum_t		idx,
+	xfs_bmbt_block_t	*block,
 	int			numrecs,
 	int			*count);
 
@@ -537,7 +518,7 @@ xfs_bmap_add_attrfork_local(
 }
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after allocating space (or doing a delayed allocation).
  */
 STATIC int				/* error */
@@ -545,7 +526,7 @@ xfs_bmap_add_extent(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
@@ -578,7 +559,7 @@ xfs_bmap_add_extent(
 	if (nextents == 0) {
 		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
 			NULL, whichfork);
-		xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork);
+		xfs_iext_insert(ifp, 0, 1, new);
 		ASSERT(cur == NULL);
 		ifp->if_lastex = 0;
 		if (!ISNULLSTARTBLOCK(new->br_startblock)) {
@@ -614,7 +595,7 @@ xfs_bmap_add_extent(
 		/*
 		 * Get the record referred to by idx.
 		 */
-		xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev);
 		/*
 		 * If it's a real allocation record, and the new allocation ends
 		 * after the start of the referred to record, then we're filling
@@ -714,14 +695,13 @@ xfs_bmap_add_extent_delay_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
 	int			rsvd)	/* OK to use reserved data block allocation */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	int			diff;	/* temp value */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
@@ -730,6 +710,7 @@ xfs_bmap_add_extent_delay_real(
 	static char		fname[] = "xfs_bmap_add_extent_delay_real";
 #endif
 	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -763,8 +744,8 @@ xfs_bmap_add_extent_delay_real(
 	 * Set up a bunch of variables to make the tests simpler.
 	 */
 	cur = *curp;
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &PREV);
 	new_endoff = new->br_startoff + new->br_blockcount;
 	ASSERT(PREV.br_startoff <= new->br_startoff);
@@ -781,7 +762,7 @@ xfs_bmap_add_extent_delay_real(
 	 * Don't set contiguous if the combined extent would be too large.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &LEFT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
 	}
 	STATE_SET(LEFT_CONTIG,
@@ -798,7 +779,7 @@ xfs_bmap_add_extent_delay_real(
 	if (STATE_SET_TEST(RIGHT_VALID,
 			idx <
 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
-		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
 	}
 	STATE_SET(RIGHT_CONTIG,
@@ -825,14 +806,14 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -867,14 +848,14 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
 		else {
@@ -908,7 +889,7 @@ xfs_bmap_add_extent_delay_real(
 		ip->i_df.if_lastex = idx;
 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx + 1, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
 		else {
@@ -964,7 +945,7 @@ xfs_bmap_add_extent_delay_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
@@ -1010,7 +991,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1039,8 +1020,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock) -
 			(cur ? cur->bc_private.b.allocated : 0));
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx + 1];
+		ep = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
 			XFS_DATA_FORK);
@@ -1058,7 +1038,8 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount,
 			RIGHT.br_state);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
@@ -1098,7 +1079,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
 			new, NULL, XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1127,8 +1108,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock) -
 			(cur ? cur->bc_private.b.allocated : 0));
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx];
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
@@ -1149,7 +1129,7 @@ xfs_bmap_add_extent_delay_real(
 		r[1].br_blockcount = temp2;
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1204,13 +1184,13 @@ xfs_bmap_add_extent_delay_real(
 				}
 			}
 		}
-		base = ip->i_df.if_u1.if_extents;
-		ep = &base[idx];
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
+			NULLSTARTBLOCK((int)temp2));
 		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
 			XFS_DATA_FORK);
 		*dnew = temp + temp2;
@@ -1254,10 +1234,9 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp) /* inode logging flags */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
@@ -1265,6 +1244,7 @@ xfs_bmap_add_extent_unwritten_real(
 	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
 #endif
 	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_exntst_t		newext;	/* new extent state */
 	xfs_exntst_t		oldext;	/* old extent state */
@@ -1298,8 +1278,8 @@ xfs_bmap_add_extent_unwritten_real(
 	 */
 	error = 0;
 	cur = *curp;
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &PREV);
 	newext = new->br_state;
 	oldext = (newext == XFS_EXT_UNWRITTEN) ?
@@ -1320,7 +1300,7 @@ xfs_bmap_add_extent_unwritten_real(
 	 * Don't set contiguous if the combined extent would be too large.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &LEFT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
 	}
 	STATE_SET(LEFT_CONTIG,
@@ -1337,7 +1317,7 @@ xfs_bmap_add_extent_unwritten_real(
 	if (STATE_SET_TEST(RIGHT_VALID,
 			idx <
 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
-		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
 	}
 	STATE_SET(RIGHT_CONTIG,
@@ -1363,14 +1343,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents -= 2;
 		if (cur == NULL)
@@ -1409,14 +1389,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1456,7 +1436,7 @@ xfs_bmap_add_extent_unwritten_real(
 		ip->i_df.if_lastex = idx;
 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx + 1, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1516,7 +1496,7 @@ xfs_bmap_add_extent_unwritten_real(
 		 */
 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
@@ -1571,7 +1551,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1609,7 +1589,8 @@ xfs_bmap_add_extent_unwritten_real(
 			PREV.br_blockcount - new->br_blockcount);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount, newext);
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
@@ -1649,7 +1630,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
 			new, NULL, XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -1696,7 +1677,7 @@ xfs_bmap_add_extent_unwritten_real(
 		r[1].br_state = oldext;
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents += 2;
 		if (cur == NULL)
@@ -1770,15 +1751,15 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			rsvd)		/* OK to allocate reserved blocks */
 {
-	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
-	xfs_bmbt_rec_t		*ep;	/* extent list entry for idx */
+	xfs_bmbt_rec_t		*ep;	/* extent record for idx */
 #ifdef XFS_BMAP_TRACE
 	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
 #endif
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
 	xfs_filblks_t		oldlen=0;	/* old indirect size */
@@ -1799,15 +1780,15 @@ xfs_bmap_add_extent_hole_delay(
 				       ((state &= ~MASK(b)), 0))
 #define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
 
-	base = ip->i_df.if_u1.if_extents;
-	ep = &base[idx];
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	ep = xfs_iext_get_ext(ifp, idx);
 	state = 0;
 	ASSERT(ISNULLSTARTBLOCK(new->br_startblock));
 	/*
 	 * Check and set flags if this segment has a left neighbor
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &left);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
 	}
 	/*
@@ -1844,23 +1825,24 @@ xfs_bmap_add_extent_hole_delay(
 		/*
 		 * New allocation is contiguous with delayed allocations
 		 * on the left and on the right.
-		 * Merge all three into a single extent list entry.
+		 * Merge all three into a single extent record.
 		 */
 		temp = left.br_blockcount + new->br_blockcount +
 			right.br_blockcount;
 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1, temp);
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
 			STARTBLOCKVAL(new->br_startblock) +
 			STARTBLOCKVAL(right.br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
-		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+			NULLSTARTBLOCK((int)newlen));
 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
 			XFS_DATA_FORK);
-		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		xfs_iext_remove(ifp, idx, 1);
 		ip->i_df.if_lastex = idx - 1;
 		break;
 
@@ -1873,11 +1855,12 @@ xfs_bmap_add_extent_hole_delay(
 		temp = left.br_blockcount + new->br_blockcount;
 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmbt_set_blockcount(ep - 1, temp);
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
 			STARTBLOCKVAL(new->br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
-		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+			NULLSTARTBLOCK((int)newlen));
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
@@ -1909,7 +1892,7 @@ xfs_bmap_add_extent_hole_delay(
 		oldlen = newlen = 0;
 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
 		break;
 	}
@@ -1940,7 +1923,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork) /* data or attr fork */
 {
@@ -1970,13 +1953,13 @@ xfs_bmap_add_extent_hole_real(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
-	ep = &ifp->if_u1.if_extents[idx];
+	ep = xfs_iext_get_ext(ifp, idx);
 	state = 0;
 	/*
 	 * Check and set flags if this segment has a left neighbor.
 	 */
 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
-		xfs_bmbt_get_all(ep - 1, &left);
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
 	}
 	/*
@@ -2019,18 +2002,18 @@ xfs_bmap_add_extent_hole_real(
 		/*
 		 * New allocation is contiguous with real allocations on the
 		 * left and on the right.
-		 * Merge all three into a single extent list entry.
+		 * Merge all three into a single extent record.
 		 */
 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
 			whichfork);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount +
 			right.br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
 			whichfork);
 		xfs_bmap_trace_delete(fname, "LC|RC", ip,
 			idx, 1, whichfork);
-		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx - 1;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2062,7 +2045,7 @@ xfs_bmap_add_extent_hole_real(
 		 * Merge the new allocation with the left neighbor.
 		 */
 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
-		xfs_bmbt_set_blockcount(ep - 1,
+		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount);
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
 		ifp->if_lastex = idx - 1;
@@ -2116,7 +2099,7 @@ xfs_bmap_add_extent_hole_real(
 		 */
 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
 			whichfork);
-		xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork);
+		xfs_iext_insert(ifp, idx, 1, new);
 		ifp->if_lastex = idx;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2811,7 +2794,7 @@ xfs_bmap_alloc(
 /*
  * Transform a btree format file with only one leaf node, where the
  * extents list will fit in the inode, into an extents format file.
- * Since the extent list is already in-core, all we have to do is
+ * Since the file extents are already in-core, all we have to do is
  * give up the space for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
@@ -2868,7 +2851,7 @@ xfs_bmap_btree_to_extents(
 }
 
 /*
- * Called by xfs_bmapi to update extent list structure and the btree
+ * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
  */
 STATIC int				/* error */
@@ -2878,7 +2861,7 @@ xfs_bmap_del_extent(
 	xfs_extnum_t		idx,	/* extent number to update/delete */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
-	xfs_bmbt_irec_t		*del,	/* data to remove from extent list */
+	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork, /* data or attr fork */
 	int			rsvd)	/* OK to allocate reserved blocks */
@@ -2903,7 +2886,6 @@ xfs_bmap_del_extent(
 	xfs_filblks_t		nblks;	/* quota/sb block count */
 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
 	/* REFERENCED */
-	xfs_extnum_t		nextents;	/* number of extents in list */
 	uint			qfield;	/* quota field to update */
 	xfs_filblks_t		temp;	/* for indirect length calculations */
 	xfs_filblks_t		temp2;	/* for indirect length calculations */
@@ -2911,10 +2893,10 @@ xfs_bmap_del_extent(
 	XFS_STATS_INC(xs_del_exlist);
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT(idx >= 0 && idx < nextents);
+	ASSERT((idx >= 0) && (idx < ifp->if_bytes /
+		(uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(del->br_blockcount > 0);
-	ep = &ifp->if_u1.if_extents[idx];
+	ep = xfs_iext_get_ext(ifp, idx);
 	xfs_bmbt_get_all(ep, &got);
 	ASSERT(got.br_startoff <= del->br_startoff);
 	del_endoff = del->br_startoff + del->br_blockcount;
@@ -2990,7 +2972,7 @@ xfs_bmap_del_extent(
 		 * Matches the whole extent.  Delete the entry.
 		 */
 		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
-		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx;
 		if (delay)
 			break;
@@ -3160,7 +3142,7 @@ xfs_bmap_del_extent(
 		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
 			whichfork);
-		xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork);
+		xfs_iext_insert(ifp, idx + 1, 1, &new);
 		ifp->if_lastex = idx + 1;
 		break;
 	}
@@ -3212,31 +3194,6 @@ xfs_bmap_del_free(
 	kmem_zone_free(xfs_bmap_free_item_zone, free);
 }
 
-/*
- * Remove count entries from the extents array for inode "ip", starting
- * at index "idx".  Copies the remaining items down over the deleted ones,
- * and gives back the excess memory.
- */
-STATIC void
-xfs_bmap_delete_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting delete index */
-	xfs_extnum_t	count,		/* count of items to delete */
-	int		whichfork)	/* data or attr fork */
-{
-	xfs_bmbt_rec_t	*base;		/* base of extent list */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_extnum_t	nextents;	/* number of extents in list after */
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	base = ifp->if_u1.if_extents;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
-	memmove(&base[idx], &base[idx + count],
-		(nextents - idx) * sizeof(*base));
-	xfs_iext_realloc(ip, -count, whichfork);
-}
-
 /*
  * Convert an extents-format file into a btree-format file.
  * The new file will have a root block (in the inode) and a single child block.
@@ -3258,13 +3215,13 @@ xfs_bmap_extents_to_btree(
 	xfs_bmbt_rec_t		*arp;		/* child record pointer */
 	xfs_bmbt_block_t	*block;		/* btree root block */
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
-	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
+	xfs_bmbt_rec_t		*ep;		/* extent record pointer */
 	int			error;		/* error return value */
-	xfs_extnum_t		i, cnt;		/* extent list index */
+	xfs_extnum_t		i, cnt;		/* extent record index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
 	xfs_mount_t		*mp;		/* mount structure */
-	xfs_extnum_t		nextents;	/* extent list size */
+	xfs_extnum_t		nextents;	/* number of file extents */
 	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -3343,7 +3300,8 @@ xfs_bmap_extents_to_btree(
 	ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
+	for (cnt = i = 0; i < nextents; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
 			arp->l0 = INT_GET(ep->l0, ARCH_CONVERT);
 			arp->l1 = INT_GET(ep->l1, ARCH_CONVERT);
@@ -3372,34 +3330,6 @@ xfs_bmap_extents_to_btree(
 	return 0;
 }
 
-/*
- * Insert new item(s) in the extent list for inode "ip".
- * Count new items are inserted at offset idx.
- */
-STATIC void
-xfs_bmap_insert_exlist(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting index of new items */
-	xfs_extnum_t	count,		/* number of inserted items */
-	xfs_bmbt_irec_t	*new,		/* items to insert */
-	int		whichfork)	/* data or attr fork */
-{
-	xfs_bmbt_rec_t	*base;		/* extent list base */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_extnum_t	nextents;	/* extent list size */
-	xfs_extnum_t	to;		/* extent list index */
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	xfs_iext_realloc(ip, count, whichfork);
-	base = ifp->if_u1.if_extents;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	memmove(&base[idx + count], &base[idx],
-		(nextents - (idx + count)) * sizeof(*base));
-	for (to = idx; to < idx + count; to++, new++)
-		xfs_bmbt_set_all(&base[to], new);
-}
-
 /*
  * Helper routine to reset inode di_forkoff field when switching
  * attribute fork from local to extent format - we reset it where
@@ -3457,8 +3387,8 @@ xfs_bmap_local_to_extents(
 	error = 0;
 	if (ifp->if_bytes) {
 		xfs_alloc_arg_t	args;	/* allocation arguments */
-		xfs_buf_t	*bp;	/* buffer for extent list block */
-		xfs_bmbt_rec_t	*ep;	/* extent list pointer */
+		xfs_buf_t	*bp;	/* buffer for extent block */
+		xfs_bmbt_rec_t	*ep;	/* extent record pointer */
 
 		args.tp = tp;
 		args.mp = ip->i_mount;
@@ -3492,8 +3422,8 @@ xfs_bmap_local_to_extents(
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
-		xfs_iext_realloc(ip, 1, whichfork);
-		ep = ifp->if_u1.if_extents;
+		xfs_iext_add(ifp, 0, 1);
+		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
@@ -3518,7 +3448,7 @@ xfs_bmbt_rec_t *			/* pointer to found extent entry */
 xfs_bmap_do_search_extents(
 	xfs_bmbt_rec_t	*base,		/* base of extent list */
 	xfs_extnum_t	lastx,		/* last extent index used */
-	xfs_extnum_t	nextents,	/* extent list size */
+	xfs_extnum_t	nextents,	/* number of file extents */
 	xfs_fileoff_t	bno,		/* block number searched for */
 	int		*eofp,		/* out: end of file found */
 	xfs_extnum_t	*lastxp,	/* out: last extent index */
@@ -3569,9 +3499,9 @@ xfs_bmap_do_search_extents(
 		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
 		*eofp = 0;
 	} else {
-		/* binary search the extents array */
 		low = 0;
 		high = nextents - 1;
+		/* binary search the extents array */
 		while (low <= high) {
 			XFS_STATS_INC(xs_cmp_exlist);
 			lastx = (low + high) >> 1;
@@ -3641,8 +3571,8 @@ xfs_bmap_search_extents(
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_bmbt_rec_t  *base;          /* base of extent list */
 	xfs_extnum_t    lastx;          /* last extent index used */
-	xfs_extnum_t    nextents;       /* extent list size */
-	xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
+	xfs_extnum_t    nextents;       /* number of file extents */
+	xfs_bmbt_rec_t  *ep;            /* extent record pointer */
 	int		rt;		/* realtime flag    */
 
 	XFS_STATS_INC(xs_look_exlist);
@@ -3732,7 +3662,7 @@ xfs_bmap_trace_addentry(
 }
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
+ * Add bmap trace entry prior to a call to xfs_iext_remove.
  */
 STATIC void
 xfs_bmap_trace_delete(
@@ -3747,13 +3677,13 @@ xfs_bmap_trace_delete(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
-		cnt, &ifp->if_u1.if_extents[idx],
-		cnt == 2 ? &ifp->if_u1.if_extents[idx + 1] : NULL,
+		cnt, xfs_iext_get_ext(ifp, idx),
+		cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL,
 		whichfork);
 }
 
 /*
- * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
+ * Add bmap trace entry prior to a call to xfs_iext_insert, or
  * reading in the extents list from the disk (in the btree).
  */
 STATIC void
@@ -3783,7 +3713,7 @@ xfs_bmap_trace_insert(
 }
 
 /*
- * Add bmap trace entry after updating an extent list entry in place.
+ * Add bmap trace entry after updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_post_update(
@@ -3797,11 +3727,11 @@ xfs_bmap_trace_post_update(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
-		1, &ifp->if_u1.if_extents[idx], NULL, whichfork);
+		1, xfs_iext_get_ext(ifp, idx), NULL, whichfork);
 }
 
 /*
- * Add bmap trace entry prior to updating an extent list entry in place.
+ * Add bmap trace entry prior to updating an extent record in place.
  */
 STATIC void
 xfs_bmap_trace_pre_update(
@@ -3815,7 +3745,7 @@ xfs_bmap_trace_pre_update(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
-		&ifp->if_u1.if_extents[idx], NULL, whichfork);
+		xfs_iext_get_ext(ifp, idx), NULL, whichfork);
 }
 #endif	/* XFS_BMAP_TRACE */
 
@@ -3892,7 +3822,7 @@ xfs_bmap_add_attrfork(
 	int			rsvd)		/* xact may use reserved blks */
 {
 	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
-	xfs_bmap_free_t		flist;		/* freed extent list */
+	xfs_bmap_free_t		flist;		/* freed extent records */
 	xfs_mount_t		*mp;		/* mount structure */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	unsigned long		s;		/* spinlock spl value */
@@ -4146,7 +4076,7 @@ xfs_bmap_finish(
 	xfs_efd_log_item_t	*efd;		/* extent free data */
 	xfs_efi_log_item_t	*efi;		/* extent free intention */
 	int			error;		/* error return value */
-	xfs_bmap_free_item_t	*free;		/* free extent list item */
+	xfs_bmap_free_item_t	*free;		/* free extent item */
 	unsigned int		logres;		/* new log reservation */
 	unsigned int		logcount;	/* new log count */
 	xfs_mount_t		*mp;		/* filesystem mount structure */
@@ -4242,9 +4172,9 @@ xfs_bmap_first_unused(
 	xfs_fileoff_t	*first_unused,		/* unused block */
 	int		whichfork)		/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
 	int		error;			/* error return value */
+	int		idx;			/* extent record index */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
 	xfs_fileoff_t	lastaddr;		/* last block number seen */
 	xfs_fileoff_t	lowest;			/* lowest useful block */
@@ -4265,10 +4195,8 @@ xfs_bmap_first_unused(
 		return error;
 	lowest = *first_unused;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
-	for (lastaddr = 0, max = lowest, ep = base;
-	     ep < &base[nextents];
-	     ep++) {
+	for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		off = xfs_bmbt_get_startoff(ep);
 		/*
 		 * See if the hole before this extent will work.
@@ -4287,8 +4215,8 @@ xfs_bmap_first_unused(
 /*
  * Returns the file-relative block number of the last block + 1 before
  * last_block (input value) in the file.
- * This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
+ * This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
  */
 int						/* error */
 xfs_bmap_last_before(
@@ -4335,8 +4263,8 @@ xfs_bmap_last_before(
 
 /*
  * Returns the file-relative block number of the first block past eof in
- * the file.  This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
+ * the file.  This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
  */
 int						/* error */
 xfs_bmap_last_offset(
@@ -4345,7 +4273,6 @@ xfs_bmap_last_offset(
 	xfs_fileoff_t	*last_block,		/* last block */
 	int		whichfork)		/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;			/* base of extent array */
 	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
 	int		error;			/* error return value */
 	xfs_ifork_t	*ifp;			/* inode fork pointer */
@@ -4368,9 +4295,7 @@ xfs_bmap_last_offset(
 		*last_block = 0;
 		return 0;
 	}
-	base = &ifp->if_u1.if_extents[0];
-	ASSERT(base != NULL);
-	ep = &base[nextents - 1];
+	ep = xfs_iext_get_ext(ifp, nextents - 1);
 	*last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
 	return 0;
 }
@@ -4400,7 +4325,7 @@ xfs_bmap_one_block(
 		return 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	ep = ifp->if_u1.if_extents;
+	ep = xfs_iext_get_ext(ifp, 0);
 	xfs_bmbt_get_all(ep, &s);
 	rval = s.br_startoff == 0 && s.br_blockcount == 1;
 	if (rval && whichfork == XFS_DATA_FORK)
@@ -4435,7 +4360,6 @@ xfs_bmap_read_extents(
 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
 	/* REFERENCED */
 	xfs_extnum_t		room;	/* number of entries there's room for */
-	xfs_bmbt_rec_t		*trp;	/* target record pointer */
 
 	bno = NULLFSBLOCK;
 	mp = ip->i_mount;
@@ -4478,16 +4402,16 @@ xfs_bmap_read_extents(
 	/*
 	 * Here with bp and block set to the leftmost leaf node in the tree.
 	 */
-	room = ifp->if_bytes / (uint)sizeof(*trp);
-	trp = ifp->if_u1.if_extents;
+	room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	i = 0;
 	/*
-	 * Loop over all leaf nodes.  Copy information to the extent list.
+	 * Loop over all leaf nodes.  Copy information to the extent records.
 	 */
 	for (;;) {
-		xfs_bmbt_rec_t	*frp, *temp;
+		xfs_bmbt_rec_t	*frp, *trp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
+		xfs_extnum_t	start;
 
 
 		num_recs = be16_to_cpu(block->bb_numrecs);
@@ -4511,12 +4435,13 @@ xfs_bmap_read_extents(
 		if (nextbno != NULLFSBLOCK)
 			xfs_btree_reada_bufl(mp, nextbno, 1);
 		/*
-		 * Copy records into the extent list.
+		 * Copy records into the extent records.
 		 */
 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-		temp = trp;
-		for (j = 0; j < num_recs; j++, frp++, trp++) {
+		start = i;
+		for (j = 0; j < num_recs; j++, i++, frp++) {
+			trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = INT_GET(frp->l0, ARCH_CONVERT);
 			trp->l1 = INT_GET(frp->l1, ARCH_CONVERT);
 		}
@@ -4526,14 +4451,14 @@ xfs_bmap_read_extents(
 			 * any "older" data bmap btree records for a
 			 * set bit in the "extent flag" position.
 			 */
-			if (unlikely(xfs_check_nostate_extents(temp, num_recs))) {
+			if (unlikely(xfs_check_nostate_extents(ifp,
+					start, num_recs))) {
 				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
 						 XFS_ERRLEVEL_LOW,
 						 ip->i_mount);
 				goto error0;
 			}
 		}
-		i += num_recs;
 		xfs_trans_brelse(tp, bp);
 		bno = nextbno;
 		/*
@@ -4546,7 +4471,7 @@ xfs_bmap_read_extents(
 			return error;
 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
 	}
-	ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp));
+	ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
 	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
 	return 0;
@@ -4557,7 +4482,7 @@ error0:
 
 #ifdef XFS_BMAP_TRACE
 /*
- * Add bmap trace insert entries for all the contents of the extent list.
+ * Add bmap trace insert entries for all the contents of the extent records.
  */
 void
 xfs_bmap_trace_exlist(
@@ -4566,16 +4491,15 @@ xfs_bmap_trace_exlist(
 	xfs_extnum_t	cnt,		/* count of entries in the list */
 	int		whichfork)	/* data or attr fork */
 {
-	xfs_bmbt_rec_t	*base;		/* base of extent list */
-	xfs_bmbt_rec_t	*ep;		/* current entry in extent list */
-	xfs_extnum_t	idx;		/* extent list entry number */
+	xfs_bmbt_rec_t	*ep;		/* current extent record */
+	xfs_extnum_t	idx;		/* extent record index */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_irec_t	s;		/* extent list record */
+	xfs_bmbt_irec_t	s;		/* file extent record */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(cnt == ifp->if_bytes / (uint)sizeof(*base));
-	base = ifp->if_u1.if_extents;
-	for (idx = 0, ep = base; idx < cnt; idx++, ep++) {
+	ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+	for (idx = 0; idx < cnt; idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_get_all(ep, &s);
 		xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
 			whichfork);
@@ -4661,14 +4585,14 @@ xfs_bmapi(
 	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
 	xfs_fileoff_t	end;		/* end of mapped file region */
-	int		eof;		/* we've hit the end of extent list */
+	int		eof;		/* we've hit the end of extents */
 	char		contig;		/* allocation must be one extent */
 	char		delay;		/* this request is for delayed alloc */
 	char		exact;		/* don't do all of wasdelayed extent */
 	char		convert;	/* unwritten extent I/O completion */
-	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
 	int		error;		/* error return */
-	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_bmbt_irec_t	got;		/* current file extent record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extlen_t	indlen;		/* indirect blocks length */
 	xfs_extnum_t	lastx;		/* last useful extent number */
@@ -4680,7 +4604,7 @@ xfs_bmapi(
 	int		nallocs;	/* number of extents alloc\'d */
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	xfs_fileoff_t	obno;		/* old block number (offset) */
-	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+	xfs_bmbt_irec_t	prev;		/* previous file extent record */
 	int		tmp_logflags;	/* temp flags holder */
 	int		whichfork;	/* data or attr fork */
 	char		inhole;		/* current location is hole in file */
@@ -4805,7 +4729,7 @@ xfs_bmapi(
 				alen = (xfs_extlen_t)got.br_blockcount;
 				aoff = got.br_startoff;
 				if (lastx != NULLEXTNUM && lastx) {
-					ep = &ifp->if_u1.if_extents[lastx - 1];
+					ep = xfs_iext_get_ext(ifp, lastx - 1);
 					xfs_bmbt_get_all(ep, &prev);
 				}
 			} else if (wasdelay) {
@@ -5016,7 +4940,7 @@ xfs_bmapi(
 			if (error)
 				goto error0;
 			lastx = ifp->if_lastex;
-			ep = &ifp->if_u1.if_extents[lastx];
+			ep = xfs_iext_get_ext(ifp, lastx);
 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 			xfs_bmbt_get_all(ep, &got);
 			ASSERT(got.br_startoff <= aoff);
@@ -5112,7 +5036,7 @@ xfs_bmapi(
 			if (error)
 				goto error0;
 			lastx = ifp->if_lastex;
-			ep = &ifp->if_u1.if_extents[lastx];
+			ep = xfs_iext_get_ext(ifp, lastx);
 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 			xfs_bmbt_get_all(ep, &got);
 			/*
@@ -5168,8 +5092,7 @@ xfs_bmapi(
 		/*
 		 * Else go on to the next record.
 		 */
-		ep++;
-		lastx++;
+		ep = xfs_iext_get_ext(ifp, ++lastx);
 		if (lastx >= nextents) {
 			eof = 1;
 			prev = got;
@@ -5199,7 +5122,7 @@ xfs_bmapi(
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent list if we've converted to btree format.
+	 * logging the extent records if we've converted to btree format.
 	 */
 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
@@ -5252,12 +5175,12 @@ xfs_bmapi_single(
 	xfs_fsblock_t	*fsb,		/* output: mapped block */
 	xfs_fileoff_t	bno)		/* starting file offs. mapped */
 {
-	int		eof;		/* we've hit the end of extent list */
+	int		eof;		/* we've hit the end of extents */
 	int		error;		/* error return */
-	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_bmbt_irec_t	got;		/* current file extent record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extnum_t	lastx;		/* last useful extent number */
-	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+	xfs_bmbt_irec_t	prev;		/* previous file extent record */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (unlikely(
@@ -5312,18 +5235,18 @@ xfs_bunmapi(
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 	xfs_bmbt_irec_t		del;		/* extent being deleted */
 	int			eof;		/* is deleting at eof */
-	xfs_bmbt_rec_t		*ep;		/* extent list entry pointer */
+	xfs_bmbt_rec_t		*ep;		/* extent record pointer */
 	int			error;		/* error return value */
 	xfs_extnum_t		extno;		/* extent number in list */
-	xfs_bmbt_irec_t		got;		/* current extent list entry */
+	xfs_bmbt_irec_t		got;		/* current extent record */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	int			isrt;		/* freeing in rt area */
 	xfs_extnum_t		lastx;		/* last extent index used */
 	int			logflags;	/* transaction logging flags */
 	xfs_extlen_t		mod;		/* rt extent offset */
 	xfs_mount_t		*mp;		/* mount structure */
-	xfs_extnum_t		nextents;	/* size of extent list */
-	xfs_bmbt_irec_t		prev;		/* previous extent list entry */
+	xfs_extnum_t		nextents;	/* number of file extents */
+	xfs_bmbt_irec_t		prev;		/* previous extent record */
 	xfs_fileoff_t		start;		/* first file offset deleted */
 	int			tmp_logflags;	/* partial logging flags */
 	int			wasdel;		/* was a delayed alloc extent */
@@ -5369,7 +5292,7 @@ xfs_bunmapi(
 	 * file, back up to the last block if so...
 	 */
 	if (eof) {
-		ep = &ifp->if_u1.if_extents[--lastx];
+		ep = xfs_iext_get_ext(ifp, --lastx);
 		xfs_bmbt_get_all(ep, &got);
 		bno = got.br_startoff + got.br_blockcount - 1;
 	}
@@ -5393,7 +5316,7 @@ xfs_bunmapi(
 		if (got.br_startoff > bno) {
 			if (--lastx < 0)
 				break;
-			ep--;
+			ep = xfs_iext_get_ext(ifp, lastx);
 			xfs_bmbt_get_all(ep, &got);
 		}
 		/*
@@ -5440,7 +5363,8 @@ xfs_bunmapi(
 					del.br_blockcount : mod;
 				if (bno < got.br_startoff) {
 					if (--lastx >= 0)
-						xfs_bmbt_get_all(--ep, &got);
+						xfs_bmbt_get_all(xfs_iext_get_ext(
+							ifp, lastx), &got);
 				}
 				continue;
 			}
@@ -5500,7 +5424,8 @@ xfs_bunmapi(
 				 * try again.
 				 */
 				ASSERT(lastx > 0);
-				xfs_bmbt_get_all(ep - 1, &prev);
+				xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+						lastx - 1), &prev);
 				ASSERT(prev.br_state == XFS_EXT_NORM);
 				ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock));
 				ASSERT(del.br_startblock ==
@@ -5587,12 +5512,12 @@ nodelete:
 		 * If not done go on to the next (previous) record.
 		 * Reset ep in case the extents array was re-alloced.
 		 */
-		ep = &ifp->if_u1.if_extents[lastx];
+		ep = xfs_iext_get_ext(ifp, lastx);
 		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
 			if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
 			    xfs_bmbt_get_startoff(ep) > bno) {
-				lastx--;
-				ep--;
+				if (--lastx >= 0)
+					ep = xfs_iext_get_ext(ifp, lastx);
 			}
 			if (lastx >= 0)
 				xfs_bmbt_get_all(ep, &got);
@@ -5636,7 +5561,7 @@ nodelete:
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent list if we've converted to btree format.
+	 * logging the extent records if we've converted to btree format.
 	 */
 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
@@ -5892,9 +5817,9 @@ xfs_bmap_isaeof(
 {
 	int		error;		/* error return value */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
-	xfs_extnum_t	nextents;	/* size of extent list */
-	xfs_bmbt_irec_t	s;		/* expanded extent list entry */
+	xfs_bmbt_rec_t	*lastrec;	/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of file extents */
+	xfs_bmbt_irec_t	s;		/* expanded extent record */
 
 	ASSERT(whichfork == XFS_DATA_FORK);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -5909,7 +5834,7 @@ xfs_bmap_isaeof(
 	/*
 	 * Go to the last extent
 	 */
-	lastrec = &ifp->if_u1.if_extents[nextents - 1];
+	lastrec = xfs_iext_get_ext(ifp, nextents - 1);
 	xfs_bmbt_get_all(lastrec, &s);
 	/*
 	 * Check we are allocating in the last extent (for delayed allocations)
@@ -5936,8 +5861,8 @@ xfs_bmap_eof(
 	xfs_fsblock_t	blockcount;	/* extent block count */
 	int		error;		/* error return value */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
-	xfs_extnum_t	nextents;	/* size of extent list */
+	xfs_bmbt_rec_t	*lastrec;	/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of file extents */
 	xfs_fileoff_t	startoff;	/* extent starting file offset */
 
 	ASSERT(whichfork == XFS_DATA_FORK);
@@ -5953,7 +5878,7 @@ xfs_bmap_eof(
 	/*
 	 * Go to the last extent
 	 */
-	lastrec = &ifp->if_u1.if_extents[nextents - 1];
+	lastrec = xfs_iext_get_ext(ifp, nextents - 1);
 	startoff = xfs_bmbt_get_startoff(lastrec);
 	blockcount = xfs_bmbt_get_blockcount(lastrec);
 	*eof = endoff >= startoff + blockcount;
@@ -5969,18 +5894,21 @@ xfs_bmap_check_extents(
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	int			whichfork)	/* data or attr fork */
 {
-	xfs_bmbt_rec_t		*base;		/* base of extents list */
 	xfs_bmbt_rec_t		*ep;		/* current extent entry */
+	xfs_extnum_t		idx;		/* extent record index */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_extnum_t		nextents;	/* number of extents in list */
+	xfs_bmbt_rec_t		*nextp;		/* next extent entry */
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	base = ifp->if_u1.if_extents;
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (ep = base; ep < &base[nextents - 1]; ep++) {
+	ep = xfs_iext_get_ext(ifp, 0);
+	for (idx = 0; idx < nextents - 1; idx++) {
+		nextp = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
-			(void *)(ep + 1));
+			(void *)(nextp));
+		ep = nextp;
 	}
 }
 
@@ -6119,12 +6047,14 @@ xfs_bmap_check_leaf_extents(
 	xfs_fsblock_t		bno;	/* block # of "block" */
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
-	xfs_extnum_t		i=0;	/* index into the extents list */
+	xfs_extnum_t		i=0, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
-	xfs_bmbt_rec_t		*ep, *lastp;	/* extent pointers in block entry */
+	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
+	xfs_bmbt_rec_t		*lastp; /* pointer to previous extent */
+	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 	int			bp_release = 0;
 
 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
@@ -6194,7 +6124,6 @@ xfs_bmap_check_leaf_extents(
 	 */
 	lastp = NULL;
 	for (;;) {
-		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
 
@@ -6213,18 +6142,20 @@ xfs_bmap_check_leaf_extents(
 		 * conform with the first entry in this one.
 		 */
 
-		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+		ep = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
 			block, 1, mp->m_bmap_dmxr[0]);
-
-		for (ep = frp;ep < frp + (num_recs - 1); ep++) {
+		for (j = 1; j < num_recs; j++) {
+			nextp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+				block, j + 1, mp->m_bmap_dmxr[0]);
 			if (lastp) {
 				xfs_btree_check_rec(XFS_BTNUM_BMAP,
 					(void *)lastp, (void *)ep);
 			}
 			xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
-				(void *)(ep + 1));
+				(void *)(nextp));
+			lastp = ep;
+			ep = nextp;
 		}
-		lastp = frp + num_recs - 1; /* For the next iteration */
 
 		i += num_recs;
 		if (bp_release) {
@@ -6288,7 +6219,7 @@ xfs_bmap_count_blocks(
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		if (unlikely(xfs_bmap_count_leaves(ifp->if_u1.if_extents,
+		if (unlikely(xfs_bmap_count_leaves(ifp, 0,
 			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
 			count) < 0)) {
 			XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
@@ -6310,7 +6241,7 @@ xfs_bmap_count_blocks(
 	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
 	bno = INT_GET(*pp, ARCH_CONVERT);
 
-	if (unlikely(xfs_bmap_count_tree(mp, tp, bno, level, count) < 0)) {
+	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
 		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
 				 mp);
 		return XFS_ERROR(EFSCORRUPTED);
@@ -6327,6 +6258,7 @@ int                                     /* error */
 xfs_bmap_count_tree(
 	xfs_mount_t     *mp,            /* file system mount point */
 	xfs_trans_t     *tp,            /* transaction pointer */
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_fsblock_t   blockno,	/* file system block number */
 	int             levelin,	/* level in btree */
 	int		*count)		/* Count of blocks */
@@ -6339,7 +6271,6 @@ xfs_bmap_count_tree(
 	xfs_fsblock_t		nextbno;
 	xfs_bmbt_block_t        *block, *nextblock;
 	int			numrecs;
-	xfs_bmbt_rec_t		*frp;
 
 	if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
 		return error;
@@ -6364,7 +6295,7 @@ xfs_bmap_count_tree(
 			xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
 		bno = INT_GET(*pp, ARCH_CONVERT);
 		if (unlikely((error =
-		     xfs_bmap_count_tree(mp, tp, bno, level, count)) < 0)) {
+		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
 			xfs_trans_brelse(tp, bp);
 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -6376,9 +6307,8 @@ xfs_bmap_count_tree(
 		for (;;) {
 			nextbno = be64_to_cpu(block->bb_rightsib);
 			numrecs = be16_to_cpu(block->bb_numrecs);
-			frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
-				xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
-			if (unlikely(xfs_bmap_disk_count_leaves(frp, numrecs, count) < 0)) {
+			if (unlikely(xfs_bmap_disk_count_leaves(ifp, mp,
+					0, block, numrecs, count) < 0)) {
 				xfs_trans_brelse(tp, bp);
 				XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
 						 XFS_ERRLEVEL_LOW, mp);
@@ -6399,33 +6329,45 @@ xfs_bmap_count_tree(
 }
 
 /*
- * Count leaf blocks given a pointer to an extent list.
+ * Count leaf blocks given a range of extent records.
  */
 int
 xfs_bmap_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	int			numrecs,
 	int			*count)
 {
 	int		b;
+	xfs_bmbt_rec_t	*frp;
 
-	for ( b = 1; b <= numrecs; b++, frp++)
+	for (b = 0; b < numrecs; b++) {
+		frp = xfs_iext_get_ext(ifp, idx + b);
 		*count += xfs_bmbt_get_blockcount(frp);
+	}
 	return 0;
 }
 
 /*
- * Count leaf blocks given a pointer to an extent list originally in btree format.
+ * Count leaf blocks given a range of extent records originally
+ * in btree format.
  */
 int
 xfs_bmap_disk_count_leaves(
-	xfs_bmbt_rec_t		*frp,
+	xfs_ifork_t		*ifp,
+	xfs_mount_t		*mp,
+	xfs_extnum_t		idx,
+	xfs_bmbt_block_t	*block,
 	int			numrecs,
 	int			*count)
 {
 	int		b;
+	xfs_bmbt_rec_t	*frp;
 
-	for ( b = 1; b <= numrecs; b++, frp++)
+	for (b = 1; b <= numrecs; b++) {
+		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
+			xfs_bmbt, block, idx + b, mp->m_bmap_dmxr[0]);
 		*count += xfs_bmbt_disk_get_blockcount(frp);
+	}
 	return 0;
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 12cc63dfc2c..4c05f95452c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -20,6 +20,7 @@
 
 struct getbmap;
 struct xfs_bmbt_irec;
+struct xfs_ifork;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
@@ -347,7 +348,8 @@ xfs_bmap_count_blocks(
  */
 int
 xfs_check_nostate_extents(
-	xfs_bmbt_rec_t		*ep,
+	struct xfs_ifork	*ifp,
+	xfs_extnum_t		idx,
 	xfs_extnum_t		num);
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 3f1383d160e..bea44709afb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2754,7 +2754,7 @@ xfs_bmbt_update(
 }
 
 /*
- * Check an extent list, which has just been read, for
+ * Check extent records, which have just been read, for
  * any bit in the extent flag field. ASSERT on debug
  * kernels, as this condition should not occur.
  * Return an error condition (1) if any flags found,
@@ -2763,10 +2763,14 @@ xfs_bmbt_update(
 
 int
 xfs_check_nostate_extents(
-	xfs_bmbt_rec_t		*ep,
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
 	xfs_extnum_t		num)
 {
-	for (; num > 0; num--, ep++) {
+	xfs_bmbt_rec_t		*ep;
+
+	for (; num > 0; num--, idx++) {
+		ep = xfs_iext_get_ext(ifp, idx);
 		if ((ep->l0 >>
 		     (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
 			ASSERT(0);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1d7f5a7e063..6459395a0e4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -76,16 +76,18 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
  */
 STATIC void
 xfs_validate_extents(
-	xfs_bmbt_rec_t		*ep,
+	xfs_ifork_t		*ifp,
 	int			nrecs,
 	int			disk,
 	xfs_exntfmt_t		fmt)
 {
+	xfs_bmbt_rec_t		*ep;
 	xfs_bmbt_irec_t		irec;
 	xfs_bmbt_rec_t		rec;
 	int			i;
 
 	for (i = 0; i < nrecs; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		rec.l0 = get_unaligned((__uint64_t*)&ep->l0);
 		rec.l1 = get_unaligned((__uint64_t*)&ep->l1);
 		if (disk)
@@ -94,11 +96,10 @@ xfs_validate_extents(
 			xfs_bmbt_get_all(&rec, &irec);
 		if (fmt == XFS_EXTFMT_NOSTATE)
 			ASSERT(irec.br_state == XFS_EXT_NORM);
-		ep++;
 	}
 }
 #else /* DEBUG */
-#define xfs_validate_extents(ep, nrecs, disk, fmt)
+#define xfs_validate_extents(ifp, nrecs, disk, fmt)
 #endif /* DEBUG */
 
 /*
@@ -597,7 +598,6 @@ xfs_iformat_extents(
 	xfs_bmbt_rec_t	*ep, *dp;
 	xfs_ifork_t	*ifp;
 	int		nex;
-	int		real_size;
 	int		size;
 	int		i;
 
@@ -619,23 +619,20 @@ xfs_iformat_extents(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
-	real_size = 0;
+	ifp->if_real_bytes = 0;
 	if (nex == 0)
 		ifp->if_u1.if_extents = NULL;
 	else if (nex <= XFS_INLINE_EXTS)
 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-	else {
-		ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
-		ASSERT(ifp->if_u1.if_extents != NULL);
-		real_size = size;
-	}
+	else
+		xfs_iext_add(ifp, 0, nex);
+
 	ifp->if_bytes = size;
-	ifp->if_real_bytes = real_size;
 	if (size) {
 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
-		xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip));
-		ep = ifp->if_u1.if_extents;
-		for (i = 0; i < nex; i++, ep++, dp++) {
+		xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip));
+		for (i = 0; i < nex; i++, dp++) {
+			ep = xfs_iext_get_ext(ifp, i);
 			ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0),
 								ARCH_CONVERT);
 			ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
@@ -646,7 +643,7 @@ xfs_iformat_extents(
 		if (whichfork != XFS_DATA_FORK ||
 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
 				if (unlikely(xfs_check_nostate_extents(
-				    ifp->if_u1.if_extents, nex))) {
+				    ifp, 0, nex))) {
 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
 							 XFS_ERRLEVEL_LOW,
 							 ip->i_mount);
@@ -1015,6 +1012,7 @@ xfs_iread_extents(
 {
 	int		error;
 	xfs_ifork_t	*ifp;
+	xfs_extnum_t	nextents;
 	size_t		size;
 
 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
@@ -1022,26 +1020,24 @@ xfs_iread_extents(
 				 ip->i_mount);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
-	size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t);
+	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
+	size = nextents * sizeof(xfs_bmbt_rec_t);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
+
 	/*
 	 * We know that the size is valid (it's checked in iformat_btree)
 	 */
-	ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
-	ASSERT(ifp->if_u1.if_extents != NULL);
 	ifp->if_lastex = NULLEXTNUM;
-	ifp->if_bytes = ifp->if_real_bytes = (int)size;
+	ifp->if_bytes = ifp->if_real_bytes = 0;
 	ifp->if_flags |= XFS_IFEXTENTS;
+	xfs_iext_add(ifp, 0, nextents);
 	error = xfs_bmap_read_extents(tp, ip, whichfork);
 	if (error) {
-		kmem_free(ifp->if_u1.if_extents, size);
-		ifp->if_u1.if_extents = NULL;
-		ifp->if_bytes = ifp->if_real_bytes = 0;
+		xfs_iext_destroy(ifp);
 		ifp->if_flags &= ~XFS_IFEXTENTS;
 		return error;
 	}
-	xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents,
-		XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip));
 	return 0;
 }
 
@@ -2475,92 +2471,6 @@ xfs_iroot_realloc(
 }
 
 
-/*
- * This is called when the amount of space needed for if_extents
- * is increased or decreased.  The change in size is indicated by
- * the number of extents that need to be added or deleted in the
- * ext_diff parameter.
- *
- * If the amount of space needed has decreased below the size of the
- * inline buffer, then switch to using the inline buffer.  Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
- * to what is needed.
- *
- * ip -- the inode whose if_extents area is changing
- * ext_diff -- the change in the number of extents, positive or negative,
- *	 requested for the if_extents array.
- */
-void
-xfs_iext_realloc(
-	xfs_inode_t	*ip,
-	int		ext_diff,
-	int		whichfork)
-{
-	int		byte_diff;
-	xfs_ifork_t	*ifp;
-	int		new_size;
-	uint		rnew_size;
-
-	if (ext_diff == 0) {
-		return;
-	}
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t);
-	new_size = (int)ifp->if_bytes + byte_diff;
-	ASSERT(new_size >= 0);
-
-	if (new_size == 0) {
-		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
-			ASSERT(ifp->if_real_bytes != 0);
-			kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
-		}
-		ifp->if_u1.if_extents = NULL;
-		rnew_size = 0;
-	} else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) {
-		/*
-		 * If the valid extents can fit in if_inline_ext,
-		 * copy them from the malloc'd vector and free it.
-		 */
-		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
-			/*
-			 * For now, empty files are format EXTENTS,
-			 * so the if_extents pointer is null.
-			 */
-			if (ifp->if_u1.if_extents) {
-				memcpy(ifp->if_u2.if_inline_ext,
-					ifp->if_u1.if_extents, new_size);
-				kmem_free(ifp->if_u1.if_extents,
-					  ifp->if_real_bytes);
-			}
-			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-		}
-		rnew_size = 0;
-	} else {
-		rnew_size = new_size;
-		if ((rnew_size & (rnew_size - 1)) != 0)
-			rnew_size = xfs_iroundup(rnew_size);
-		/*
-		 * Stuck with malloc/realloc.
-		 */
-		if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
-			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
-				kmem_alloc(rnew_size, KM_SLEEP);
-			memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
-			      sizeof(ifp->if_u2.if_inline_ext));
-		} else if (rnew_size != ifp->if_real_bytes) {
-			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
-			  kmem_realloc(ifp->if_u1.if_extents,
-					rnew_size,
-					ifp->if_real_bytes,
-					KM_NOFS);
-		}
-	}
-	ifp->if_real_bytes = rnew_size;
-	ifp->if_bytes = new_size;
-}
-
-
 /*
  * This is called when the amount of space needed for if_data
  * is increased or decreased.  The change in size is indicated by
@@ -2723,9 +2633,7 @@ xfs_idestroy_fork(
 		   (ifp->if_u1.if_extents != NULL) &&
 		   (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
 		ASSERT(ifp->if_real_bytes != 0);
-		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
-		ifp->if_u1.if_extents = NULL;
-		ifp->if_real_bytes = 0;
+		xfs_iext_destroy(ifp);
 	}
 	ASSERT(ifp->if_u1.if_extents == NULL ||
 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
@@ -2902,16 +2810,15 @@ xfs_iextents_copy(
 	 * the delayed ones.  There must be at least one
 	 * non-delayed extent.
 	 */
-	ep = ifp->if_u1.if_extents;
 	dest_ep = buffer;
 	copied = 0;
 	for (i = 0; i < nrecs; i++) {
+		ep = xfs_iext_get_ext(ifp, i);
 		start_block = xfs_bmbt_get_startblock(ep);
 		if (ISNULLSTARTBLOCK(start_block)) {
 			/*
 			 * It's a delayed allocation extent, so skip it.
 			 */
-			ep++;
 			continue;
 		}
 
@@ -2921,11 +2828,10 @@ xfs_iextents_copy(
 		put_unaligned(INT_GET(ep->l1, ARCH_CONVERT),
 			      (__uint64_t*)&dest_ep->l1);
 		dest_ep++;
-		ep++;
 		copied++;
 	}
 	ASSERT(copied != 0);
-	xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip));
+	xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip));
 
 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
 }
@@ -2995,8 +2901,10 @@ xfs_iflush_fork(
 	case XFS_DINODE_FMT_EXTENTS:
 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
 		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
-		ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0));
-		ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0));
+		ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
+			(ifp->if_bytes == 0));
+		ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
+			(ifp->if_bytes > 0));
 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -3704,3 +3612,327 @@ xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
 		     NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
 }
 #endif
+
+/*
+ * Return a pointer to the extent record at file index idx.
+ */
+xfs_bmbt_rec_t *
+xfs_iext_get_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx)		/* index of target extent */
+{
+	ASSERT(idx >= 0);
+	if (ifp->if_bytes) {
+		return &ifp->if_u1.if_extents[idx];
+	} else {
+		return NULL;
+	}
+}
+
+/*
+ * Insert new item(s) into the extent records for incore inode
+ * fork 'ifp'.  'count' new items are inserted at index 'idx'.
+ */
+void
+xfs_iext_insert(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* starting index of new items */
+	xfs_extnum_t	count,		/* number of inserted items */
+	xfs_bmbt_irec_t	*new)		/* items to insert */
+{
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_extnum_t	i;		/* extent record index */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	xfs_iext_add(ifp, idx, count);
+	for (i = idx; i < idx + count; i++, new++) {
+		ep = xfs_iext_get_ext(ifp, i);
+		xfs_bmbt_set_all(ep, new);
+	}
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be increased. The ext_diff parameter stores the
+ * number of new extents being added and the idx parameter contains
+ * the extent index where the new extents will be added. If the new
+ * extents are being appended, then we just need to (re)allocate and
+ * initialize the space. Otherwise, if the new extents are being
+ * inserted into the middle of the existing entries, a bit more work
+ * is required to make room for the new extents to be inserted. The
+ * caller is responsible for filling in the new extent entries upon
+ * return.
+ */
+void
+xfs_iext_add(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin adding exts */
+	int		ext_diff)	/* nubmer of extents to add */
+{
+	int		byte_diff;	/* new bytes being added */
+	int		new_size;	/* size of extents after adding */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT((idx >= 0) && (idx <= nextents));
+	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+	new_size = ifp->if_bytes + byte_diff;
+	/*
+	 * If the new number of extents (nextents + ext_diff)
+	 * fits inside the inode, then continue to use the inline
+	 * extent buffer.
+	 */
+	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
+		if (idx < nextents) {
+			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
+				&ifp->if_u2.if_inline_ext[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
+		}
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		ifp->if_real_bytes = 0;
+	}
+	/*
+	 * Otherwise use a linear (direct) extent list.
+	 * If the extents are currently inside the inode,
+	 * xfs_iext_realloc_direct will switch us from
+	 * inline to direct extent allocation mode.
+	 */
+	else {
+		xfs_iext_realloc_direct(ifp, new_size);
+		if (idx < nextents) {
+			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
+				&ifp->if_u1.if_extents[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
+		}
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be decreased. The ext_diff parameter stores the
+ * number of extents to be removed and the idx parameter contains
+ * the extent index where the extents will be removed from.
+ */
+void
+xfs_iext_remove(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	ASSERT(ext_diff > 0);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (ifp->if_real_bytes) {
+		xfs_iext_remove_direct(ifp, idx, ext_diff);
+	} else {
+		xfs_iext_remove_inline(ifp, idx, ext_diff);
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This removes ext_diff extents from the inline buffer, beginning
+ * at extent index idx.
+ */
+void
+xfs_iext_remove_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	int		nextents;	/* number of extents in file */
+
+	ASSERT(idx < XFS_INLINE_EXTS);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(((nextents - ext_diff) > 0) &&
+		(nextents - ext_diff) < XFS_INLINE_EXTS);
+
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u2.if_inline_ext[idx],
+			&ifp->if_u2.if_inline_ext[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
+			0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	} else {
+		memset(&ifp->if_u2.if_inline_ext[idx], 0,
+			ext_diff * sizeof(xfs_bmbt_rec_t));
+	}
+}
+
+/*
+ * This removes ext_diff extents from a linear (direct) extent list,
+ * beginning at extent index idx. If the extents are being removed
+ * from the end of the list (ie. truncate) then we just need to re-
+ * allocate the list to remove the extra space. Otherwise, if the
+ * extents are being removed from the middle of the existing extent
+ * entries, then we first need to move the extent records beginning
+ * at idx + ext_diff up in the list to overwrite the records being
+ * removed, then remove the extra space via kmem_realloc.
+ */
+void
+xfs_iext_remove_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	new_size = ifp->if_bytes -
+		(ext_diff * sizeof(xfs_bmbt_rec_t));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+		return;
+	}
+	/* Move extents up in the list (if needed) */
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u1.if_extents[idx],
+			&ifp->if_u1.if_extents[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+	}
+	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
+		0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	/*
+	 * Reallocate the direct extent list. If the extents
+	 * will fit inside the inode then xfs_iext_realloc_direct
+	 * will switch from direct to inline extent allocation
+	 * mode for us.
+	 */
+	xfs_iext_realloc_direct(ifp, new_size);
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * Create, destroy, or resize a linear (direct) block of extents.
+ */
+void
+xfs_iext_realloc_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new size of extents */
+{
+	int		rnew_size;	/* real new size of extents */
+
+	rnew_size = new_size;
+
+	/* Free extent records */
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	}
+	/* Resize direct extent list and zero any new bytes */
+	else if (ifp->if_real_bytes) {
+		/* Check if extents will fit inside the inode */
+		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
+			xfs_iext_direct_to_inline(ifp, new_size /
+				(uint)sizeof(xfs_bmbt_rec_t));
+			ifp->if_bytes = new_size;
+			return;
+		}
+		if ((new_size & (new_size - 1)) != 0) {
+			rnew_size = xfs_iroundup(new_size);
+		}
+		if (rnew_size != ifp->if_real_bytes) {
+			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+				kmem_realloc(ifp->if_u1.if_extents,
+						rnew_size,
+						ifp->if_real_bytes,
+						KM_SLEEP);
+		}
+		if (rnew_size > ifp->if_real_bytes) {
+			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
+				(uint)sizeof(xfs_bmbt_rec_t)], 0,
+				rnew_size - ifp->if_real_bytes);
+		}
+	}
+	/*
+	 * Switch from the inline extent buffer to a direct
+	 * extent list. Be sure to include the inline extent
+	 * bytes in new_size.
+	 */
+	else {
+		new_size += ifp->if_bytes;
+		if ((new_size & (new_size - 1)) != 0) {
+			rnew_size = xfs_iroundup(new_size);
+		}
+		xfs_iext_inline_to_direct(ifp, rnew_size);
+	}
+	ifp->if_real_bytes = rnew_size;
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * Switch from linear (direct) extent records to inline buffer.
+ */
+void
+xfs_iext_direct_to_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	nextents)	/* number of extents in file */
+{
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	ASSERT(nextents <= XFS_INLINE_EXTS);
+	/*
+	 * The inline buffer was zeroed when we switched
+	 * from inline to direct extent allocation mode,
+	 * so we don't need to clear it here.
+	 */
+	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+		nextents * sizeof(xfs_bmbt_rec_t));
+	kmem_free(ifp->if_u1.if_extents, KM_SLEEP);
+	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	ifp->if_real_bytes = 0;
+}
+
+/*
+ * Switch from inline buffer to linear (direct) extent records.
+ * new_size should already be rounded up to the next power of 2
+ * by the caller (when appropriate), so use new_size as it is.
+ * However, since new_size may be rounded up, we can't update
+ * if_bytes here. It is the caller's responsibility to update
+ * if_bytes upon return.
+ */
+void
+xfs_iext_inline_to_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* number of extents in file */
+{
+	ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+		kmem_alloc(new_size, KM_SLEEP);
+	memset(ifp->if_u1.if_extents, 0, new_size);
+	if (ifp->if_bytes) {
+		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
+			ifp->if_bytes);
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_real_bytes = new_size;
+}
+
+/*
+ * Free incore file extents.
+ */
+void
+xfs_iext_destroy(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	if (ifp->if_real_bytes) {
+		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+	} else if (ifp->if_bytes) {
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_u1.if_extents = NULL;
+	ifp->if_real_bytes = 0;
+	ifp->if_bytes = 0;
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1cfbcf18ce8..740b73fabd2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -70,12 +70,6 @@ typedef struct xfs_ifork {
  */
 #define	XFS_IMAP_LOOKUP		0x1
 
-/*
- * Maximum number of extent pointers in if_u1.if_extents.
- */
-#define	XFS_MAX_INCORE_EXTENTS	32768
-
-
 #ifdef __KERNEL__
 struct bhv_desc;
 struct cred;
@@ -440,6 +434,18 @@ xfs_inode_t	*xfs_vtoi(struct vnode *vp);
 
 void		xfs_synchronize_atime(xfs_inode_t *);
 
+xfs_bmbt_rec_t	*xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
+void		xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
+				xfs_bmbt_irec_t *);
+void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
+void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
+void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
+void		xfs_iext_destroy(xfs_ifork_t *);
+
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
 #ifdef DEBUG
-- 
cgit v1.2.3


From 0293ce3a9fd1b34c933a96577a8ba737b681cf75 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 13:30:23 +1100
Subject: [XFS] 929045 567344 This mod introduces multi-level in-core file
 extent functionality, building upon the new layout introduced in mod
 xfs-linux:xfs-kern:207390a.  The new multi-level extent allocations are only
 required for heavily fragmented files, so the old-style linear extent list is
 used on files until the extents reach a pre-determined size of 4k. 4k buffers
 are used because this is the system page size on Linux i386 and systems with
 larger page sizes don't seem to gain much, if anything, by using their native
 page size as the extent buffer size. Also, using 4k extent buffers everywhere
 provides a consistent interface for CXFS across different platforms.  The 4k
 extent buffers are managed by an indirection array (xfs_ext_irec_t) which is
 basically just a pointer array with a bit of extra information to keep track
 of the number of extents in each buffer as well as the extent offset of each
 buffer.  Major changes include:  - Add multi-level in-core file extent
 functionality to the xfs_iext_ subroutines introduced in mod:
 xfs-linux:xfs-kern:207390a  - Introduce 13 new subroutines which add
 functionality for multi-level   in-core file extents:	
 xfs_iext_add_indirect_multi() xfs_iext_remove_indirect()	  
 xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct()	     
 xfs_iext_bno_to_irec() xfs_iext_idx_to_irec()	       xfs_iext_irec_init()
 xfs_iext_irec_new()	    xfs_iext_irec_remove() xfs_iext_irec_compact() 
 xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full()	    
 xfs_iext_irec_update_extoffs()

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207393a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c       |  59 +++-
 fs/xfs/xfs_bmap.h       |  18 ++
 fs/xfs/xfs_bmap_btree.h |   8 -
 fs/xfs/xfs_inode.c      | 715 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_inode.h      |  52 +++-
 5 files changed, 826 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c47a181f8..81a95b684b6 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3423,6 +3423,7 @@ xfs_bmap_local_to_extents(
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 		xfs_iext_add(ifp, 0, 1);
+		ASSERT((ifp->if_flags & (XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFEXTENTS);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
@@ -3551,6 +3552,54 @@ xfs_bmap_do_search_extents(
 	return ep;
 }
 
+/*
+ * Call xfs_bmap_do_search_extents() to search for the extent
+ * record containing block bno. If in multi-level in-core extent
+ * allocation mode, find and extract the target extent buffer,
+ * otherwise just use the direct extent list.
+ */
+xfs_bmbt_rec_t *			/* pointer to found extent entry */
+xfs_bmap_search_multi_extents(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number searched for */
+	int		*eofp,		/* out: end of file found */
+	xfs_extnum_t	*lastxp,	/* out: last extent index */
+	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
+	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
+{
+	xfs_bmbt_rec_t	*base;		/* base of extent records */
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	xfs_extnum_t	lastx;		/* last extent index */
+	xfs_extnum_t	nextents;	/* number of file extents */
+
+	/*
+	 * For multi-level extent allocation mode, find the
+	 * target extent list and pass only the contiguous
+	 * list to xfs_bmap_do_search_extents. Convert lastx
+	 * from a file extent index to an index within the
+	 * target extent list.
+	 */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		int	erp_idx = 0;
+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+		base = erp->er_extbuf;
+		nextents = erp->er_extcount;
+		lastx = ifp->if_lastex - erp->er_extoff;
+	} else {
+		base = &ifp->if_u1.if_extents[0];
+		nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+		lastx = ifp->if_lastex;
+	}
+	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno,
+					eofp, lastxp, gotp, prevp);
+	/* Convert lastx back to file-based index */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		*lastxp += erp->er_extoff;
+	}
+	return ep;
+}
+
 /*
  * Search the extents list for the inode, for the extent containing bno.
  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
@@ -3569,20 +3618,14 @@ xfs_bmap_search_extents(
 	xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
 {
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
-	xfs_bmbt_rec_t  *base;          /* base of extent list */
-	xfs_extnum_t    lastx;          /* last extent index used */
-	xfs_extnum_t    nextents;       /* number of file extents */
 	xfs_bmbt_rec_t  *ep;            /* extent record pointer */
 	int		rt;		/* realtime flag    */
 
 	XFS_STATS_INC(xs_look_exlist);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	lastx = ifp->if_lastex;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	base = &ifp->if_u1.if_extents[0];
 
-	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
-					  lastxp, gotp, prevp);
+	ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
+
 	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
 	if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
                 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4c05f95452c..011ccaa9a1c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -352,6 +352,24 @@ xfs_check_nostate_extents(
 	xfs_extnum_t		idx,
 	xfs_extnum_t		num);
 
+/*
+ * Call xfs_bmap_do_search_extents() to search for the extent
+ * record containing block bno. If in multi-level in-core extent
+ * allocation mode, find and extract the target extent buffer,
+ * otherwise just use the direct extent list.
+ */
+xfs_bmbt_rec_t *
+xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
+			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+
+/*
+ * Search an extent list for the extent which includes block
+ * bno.
+ */
+xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
+			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
+			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index e095a2d344a..6478cfa0e53 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -372,14 +372,6 @@ extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
 				xfs_exntst_t *, int *);
 #endif
 
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
-			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
-			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6459395a0e4..580fa075803 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2630,8 +2630,9 @@ xfs_idestroy_fork(
 			ifp->if_real_bytes = 0;
 		}
 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
-		   (ifp->if_u1.if_extents != NULL) &&
-		   (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
+		   ((ifp->if_flags & XFS_IFEXTIREC) ||
+		    ((ifp->if_u1.if_extents != NULL) &&
+		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
 		ASSERT(ifp->if_real_bytes != 0);
 		xfs_iext_destroy(ifp);
 	}
@@ -3622,7 +3623,16 @@ xfs_iext_get_ext(
 	xfs_extnum_t	idx)		/* index of target extent */
 {
 	ASSERT(idx >= 0);
-	if (ifp->if_bytes) {
+	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+		return ifp->if_u1.if_ext_irec->er_extbuf;
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_ext_irec_t	*erp;		/* irec pointer */
+		int		erp_idx = 0;	/* irec index */
+		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
+
+		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
+		return &erp->er_extbuf[page_idx];
+	} else if (ifp->if_bytes) {
 		return &ifp->if_u1.if_extents[idx];
 	} else {
 		return NULL;
@@ -3691,6 +3701,7 @@ xfs_iext_add(
 		}
 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 		ifp->if_real_bytes = 0;
+		ifp->if_lastex = nextents + ext_diff;
 	}
 	/*
 	 * Otherwise use a linear (direct) extent list.
@@ -3698,7 +3709,7 @@ xfs_iext_add(
 	 * xfs_iext_realloc_direct will switch us from
 	 * inline to direct extent allocation mode.
 	 */
-	else {
+	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
 		xfs_iext_realloc_direct(ifp, new_size);
 		if (idx < nextents) {
 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
@@ -3707,14 +3718,182 @@ xfs_iext_add(
 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
 		}
 	}
+	/* Indirection array */
+	else {
+		xfs_ext_irec_t	*erp;
+		int		erp_idx = 0;
+		int		page_idx = idx;
+
+		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
+		if (ifp->if_flags & XFS_IFEXTIREC) {
+			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
+		} else {
+			xfs_iext_irec_init(ifp);
+			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+			erp = ifp->if_u1.if_ext_irec;
+		}
+		/* Extents fit in target extent page */
+		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
+			if (page_idx < erp->er_extcount) {
+				memmove(&erp->er_extbuf[page_idx + ext_diff],
+					&erp->er_extbuf[page_idx],
+					(erp->er_extcount - page_idx) *
+					sizeof(xfs_bmbt_rec_t));
+				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
+			}
+			erp->er_extcount += ext_diff;
+			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		}
+		/* Insert a new extent page */
+		else if (erp) {
+			xfs_iext_add_indirect_multi(ifp,
+				erp_idx, page_idx, ext_diff);
+		}
+		/*
+		 * If extent(s) are being appended to the last page in
+		 * the indirection array and the new extent(s) don't fit
+		 * in the page, then erp is NULL and erp_idx is set to
+		 * the next index needed in the indirection array.
+		 */
+		else {
+			int	count = ext_diff;
+
+			while (count) {
+				erp = xfs_iext_irec_new(ifp, erp_idx);
+				erp->er_extcount = count;
+				count -= MIN(count, (int)XFS_LINEAR_EXTS);
+				if (count) {
+					erp_idx++;
+				}
+			}
+		}
+	}
 	ifp->if_bytes = new_size;
 }
 
+/*
+ * This is called when incore extents are being added to the indirection
+ * array and the new extents do not fit in the target extent list. The
+ * erp_idx parameter contains the irec index for the target extent list
+ * in the indirection array, and the idx parameter contains the extent
+ * index within the list. The number of extents being added is stored
+ * in the count parameter.
+ *
+ *    |-------|   |-------|
+ *    |       |   |       |    idx - number of extents before idx
+ *    |  idx  |   | count |
+ *    |       |   |       |    count - number of extents being inserted at idx
+ *    |-------|   |-------|
+ *    | count |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_add_indirect_multi(
+	xfs_ifork_t	*ifp,			/* inode fork pointer */
+	int		erp_idx,		/* target extent irec index */
+	xfs_extnum_t	idx,			/* index within target list */
+	int		count)			/* new extents being added */
+{
+	int		byte_diff;		/* new bytes being added */
+	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
+	xfs_extnum_t	ext_diff;		/* number of extents to add */
+	xfs_extnum_t	ext_cnt;		/* new extents still needed */
+	xfs_extnum_t	nex2;			/* extents after idx + count */
+	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
+	int		nlists;			/* number of irec's (lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	nex2 = erp->er_extcount - idx;
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/*
+	 * Save second part of target extent list
+	 * (all extents past */
+	if (nex2) {
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
+		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
+		erp->er_extcount -= nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
+		memset(&erp->er_extbuf[idx], 0, byte_diff);
+	}
+
+	/*
+	 * Add the new extents to the end of the target
+	 * list, then allocate new irec record(s) and
+	 * extent buffer(s) as needed to store the rest
+	 * of the new extents.
+	 */
+	ext_cnt = count;
+	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
+	if (ext_diff) {
+		erp->er_extcount += ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+	while (ext_cnt) {
+		erp_idx++;
+		erp = xfs_iext_irec_new(ifp, erp_idx);
+		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
+		erp->er_extcount = ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+
+	/* Add nex2 extents back to indirection array */
+	if (nex2) {
+		xfs_extnum_t	ext_avail;
+		int		i;
+
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+		i = 0;
+		/*
+		 * If nex2 extents fit in the current page, append
+		 * nex2_ep after the new extents.
+		 */
+		if (nex2 <= ext_avail) {
+			i = erp->er_extcount;
+		}
+		/*
+		 * Otherwise, check if space is available in the
+		 * next page.
+		 */
+		else if ((erp_idx < nlists - 1) &&
+			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
+			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
+			erp_idx++;
+			erp++;
+			/* Create a hole for nex2 extents */
+			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
+				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
+		}
+		/*
+		 * Final choice, create a new extent page for
+		 * nex2 extents.
+		 */
+		else {
+			erp_idx++;
+			erp = xfs_iext_irec_new(ifp, erp_idx);
+		}
+		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
+		kmem_free(nex2_ep, byte_diff);
+		erp->er_extcount += nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
+	}
+}
+
 /*
  * This is called when the amount of space required for incore file
  * extents needs to be decreased. The ext_diff parameter stores the
  * number of extents to be removed and the idx parameter contains
  * the extent index where the extents will be removed from.
+ *
+ * If the amount of space needed has decreased below the linear
+ * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
+ * extent array.  Otherwise, use kmem_realloc() to adjust the
+ * size to what is needed.
  */
 void
 xfs_iext_remove(
@@ -3731,6 +3910,8 @@ xfs_iext_remove(
 
 	if (new_size == 0) {
 		xfs_iext_destroy(ifp);
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_iext_remove_indirect(ifp, idx, ext_diff);
 	} else if (ifp->if_real_bytes) {
 		xfs_iext_remove_direct(ifp, idx, ext_diff);
 	} else {
@@ -3751,6 +3932,7 @@ xfs_iext_remove_inline(
 {
 	int		nextents;	/* number of extents in file */
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
 	ASSERT(idx < XFS_INLINE_EXTS);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	ASSERT(((nextents - ext_diff) > 0) &&
@@ -3788,6 +3970,7 @@ xfs_iext_remove_direct(
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	int		new_size;	/* size of extents after removal */
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
 	new_size = ifp->if_bytes -
 		(ext_diff * sizeof(xfs_bmbt_rec_t));
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3815,6 +3998,84 @@ xfs_iext_remove_direct(
 	ifp->if_bytes = new_size;
 }
 
+/*
+ * This is called when incore extents are being removed from the
+ * indirection array and the extents being removed span multiple extent
+ * buffers. The idx parameter contains the file extent index where we
+ * want to begin removing extents, and the count parameter contains
+ * how many extents need to be removed.
+ *
+ *    |-------|   |-------|
+ *    | nex1  |   |       |    nex1 - number of extents before idx
+ *    |-------|   | count |
+ *    |       |   |       |    count - number of extents being removed at idx
+ *    | count |   |-------|
+ *    |       |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_remove_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing extents */
+	int		count)		/* number of extents to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		erp_idx = 0;	/* indirection array index */
+	xfs_extnum_t	ext_cnt;	/* extents left to remove */
+	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
+	xfs_extnum_t	nex1;		/* number of extents before idx */
+	xfs_extnum_t	nex2;		/* extents after idx + count */
+	int		nlists;		/* entries in indirecton array */
+	int		page_idx = idx;	/* index in target extent list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
+	ASSERT(erp != NULL);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	nex1 = page_idx;
+	ext_cnt = count;
+	while (ext_cnt) {
+		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
+		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
+		/*
+		 * Check for deletion of entire list;
+		 * xfs_iext_irec_remove() updates extent offsets.
+		 */
+		if (ext_diff == erp->er_extcount) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+			ext_cnt -= ext_diff;
+			nex1 = 0;
+			if (ext_cnt) {
+				ASSERT(erp_idx < ifp->if_real_bytes /
+					XFS_IEXT_BUFSZ);
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+				nex1 = 0;
+				continue;
+			} else {
+				break;
+			}
+		}
+		/* Move extents up (if needed) */
+		if (nex2) {
+			memmove(&erp->er_extbuf[nex1],
+				&erp->er_extbuf[nex1 + ext_diff],
+				nex2 * sizeof(xfs_bmbt_rec_t));
+		}
+		/* Zero out rest of page */
+		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
+			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
+		/* Update remaining counters */
+		erp->er_extcount -= ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
+		ext_cnt -= ext_diff;
+		nex1 = 0;
+		erp_idx++;
+		erp++;
+	}
+	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
+	xfs_iext_irec_compact(ifp);
+}
+
 /*
  * Create, destroy, or resize a linear (direct) block of extents.
  */
@@ -3827,6 +4088,10 @@ xfs_iext_realloc_direct(
 
 	rnew_size = new_size;
 
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
+		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
+		 (new_size != ifp->if_real_bytes)));
+
 	/* Free extent records */
 	if (new_size == 0) {
 		xfs_iext_destroy(ifp);
@@ -3919,6 +4184,60 @@ xfs_iext_inline_to_direct(
 	ifp->if_real_bytes = new_size;
 }
 
+/*
+ * Resize an extent indirection array to new_size bytes.
+ */
+void
+xfs_iext_realloc_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new indirection array size */
+{
+	int		nlists;		/* number of irec's (ex lists) */
+	int		size;		/* current indirection array size */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	size = nlists * sizeof(xfs_ext_irec_t);
+	ASSERT(ifp->if_real_bytes);
+	ASSERT((new_size >= 0) && (new_size != size));
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else {
+		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
+			kmem_realloc(ifp->if_u1.if_ext_irec,
+				new_size, size, KM_SLEEP);
+	}
+}
+
+/*
+ * Switch from indirection array to linear (direct) extent allocations.
+ */
+void
+xfs_iext_indirect_to_direct(
+	 xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		size;		/* size of file extents */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+	size = nextents * sizeof(xfs_bmbt_rec_t);
+
+	xfs_iext_irec_compact_full(ifp);
+	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
+
+	ep = ifp->if_u1.if_ext_irec->er_extbuf;
+	kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+	ifp->if_flags &= ~XFS_IFEXTIREC;
+	ifp->if_u1.if_extents = ep;
+	ifp->if_bytes = size;
+	if (nextents < XFS_LINEAR_EXTS) {
+		xfs_iext_realloc_direct(ifp, size);
+	}
+}
+
 /*
  * Free incore file extents.
  */
@@ -3926,7 +4245,16 @@ void
 xfs_iext_destroy(
 	xfs_ifork_t	*ifp)		/* inode fork pointer */
 {
-	if (ifp->if_real_bytes) {
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		int	erp_idx;
+		int	nlists;
+
+		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+		}
+		ifp->if_flags &= ~XFS_IFEXTIREC;
+	} else if (ifp->if_real_bytes) {
 		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
 	} else if (ifp->if_bytes) {
 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
@@ -3936,3 +4264,380 @@ xfs_iext_destroy(
 	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 }
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record for filesystem block bno. Store the index of the
+ * target irec in *erp_idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_bno_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	int		*erp_idxp)	/* irec index of target ext list */
+{
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
+	xfs_extnum_t	erp_idx;	/* indirection array index */
+	int		nlists;		/* number of extent irec's (lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
+		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
+			high = erp_idx - 1;
+		} else if (erp_next && bno >=
+			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
+			low = erp_idx + 1;
+		} else {
+			break;
+		}
+	}
+	*erp_idxp = erp_idx;
+	return erp;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record at file extent index *idxp. Store the index of the
+ * target irec in *erp_idxp and store the page index of the target
+ * extent record in *idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_idx_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
+	int		*erp_idxp,	/* pointer to target irec */
+	int		realloc)	/* new bytes were just added */
+{
+	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
+	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
+	int		erp_idx;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	ASSERT(page_idx >= 0 && page_idx <=
+		ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+
+	/* Binary search extent irec's */
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		prev = erp_idx > 0 ? erp - 1 : NULL;
+		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
+		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
+			high = erp_idx - 1;
+		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
+			   (page_idx == erp->er_extoff + erp->er_extcount &&
+			    !realloc)) {
+			low = erp_idx + 1;
+		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
+			   erp->er_extcount == XFS_LINEAR_EXTS) {
+			ASSERT(realloc);
+			page_idx = 0;
+			erp_idx++;
+			erp = erp_idx < nlists ? erp + 1 : NULL;
+			break;
+		} else {
+			page_idx -= erp->er_extoff;
+			break;
+		}
+	}
+	*idxp = page_idx;
+	*erp_idxp = erp_idx;
+	return(erp);
+}
+
+/*
+ * Allocate and initialize an indirection array once the space needed
+ * for incore extents increases above XFS_IEXT_BUFSZ.
+ */
+void
+xfs_iext_irec_init(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+
+	erp = (xfs_ext_irec_t *)
+		kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
+
+	if (nextents == 0) {
+		ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+			kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+	} else if (!ifp->if_real_bytes) {
+		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
+	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
+		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
+	}
+	erp->er_extbuf = ifp->if_u1.if_extents;
+	erp->er_extcount = nextents;
+	erp->er_extoff = 0;
+
+	ifp->if_flags |= XFS_IFEXTIREC;
+	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
+	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
+	ifp->if_u1.if_ext_irec = erp;
+
+	return;
+}
+
+/*
+ * Allocate and initialize a new entry in the indirection array.
+ */
+xfs_ext_irec_t *
+xfs_iext_irec_new(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* index for new irec */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/* Resize indirection array */
+	xfs_iext_realloc_indirect(ifp, ++nlists *
+				  sizeof(xfs_ext_irec_t));
+	/*
+	 * Move records down in the array so the
+	 * new page can use erp_idx.
+	 */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = nlists - 1; i > erp_idx; i--) {
+		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
+	}
+	ASSERT(i == erp_idx);
+
+	/* Initialize new extent record */
+	erp = ifp->if_u1.if_ext_irec;
+	erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *)
+		kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
+	erp[erp_idx].er_extcount = 0;
+	erp[erp_idx].er_extoff = erp_idx > 0 ?
+		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
+	return (&erp[erp_idx]);
+}
+
+/*
+ * Remove a record from the indirection array.
+ */
+void
+xfs_iext_irec_remove(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* irec index to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	if (erp->er_extbuf) {
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
+			-erp->er_extcount);
+		kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+	}
+	/* Compact extent records */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = erp_idx; i < nlists - 1; i++) {
+		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
+	}
+	/*
+	 * Manually free the last extent record from the indirection
+	 * array.  A call to xfs_iext_realloc_indirect() with a size
+	 * of zero would result in a call to xfs_iext_destroy() which
+	 * would in turn call this function again, creating a nasty
+	 * infinite loop.
+	 */
+	if (--nlists) {
+		xfs_iext_realloc_indirect(ifp,
+			nlists * sizeof(xfs_ext_irec_t));
+	} else {
+		kmem_free(ifp->if_u1.if_ext_irec,
+			sizeof(xfs_ext_irec_t));
+	}
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+}
+
+/*
+ * This is called to clean up large amounts of unused memory allocated
+ * by the indirection array.  Before compacting anything though, verify
+ * that the indirection array is still needed and switch back to the
+ * linear extent list (or even the inline buffer) if possible.  The
+ * compaction policy is as follows:
+ *
+ *    Full Compaction: Extents fit into a single page (or inline buffer)
+ *    Full Compaction: Extents occupy less than 10% of allocated space
+ * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ *      No Compaction: Extents occupy at least 50% of allocated space
+ */
+void
+xfs_iext_irec_compact(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (nextents == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (nextents <= XFS_INLINE_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+		xfs_iext_direct_to_inline(ifp, nextents);
+	} else if (nextents <= XFS_LINEAR_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
+		xfs_iext_irec_compact_full(ifp);
+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
+		xfs_iext_irec_compact_pages(ifp);
+	}
+}
+
+/*
+ * Combine extents from neighboring extent pages.
+ */
+void
+xfs_iext_irec_compact_pages(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
+	int		erp_idx = 0;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	while (erp_idx < nlists - 1) {
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp + 1;
+		if (erp_next->er_extcount <=
+		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
+			memmove(&erp->er_extbuf[erp->er_extcount],
+				erp_next->er_extbuf, erp_next->er_extcount *
+				sizeof(xfs_bmbt_rec_t));
+			erp->er_extcount += erp_next->er_extcount;
+			/*
+			 * Free page before removing extent record
+			 * so er_extoffs don't get modified in
+			 * xfs_iext_irec_remove.
+			 */
+			kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+			erp_next->er_extbuf = NULL;
+			xfs_iext_irec_remove(ifp, erp_idx + 1);
+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		} else {
+			erp_idx++;
+		}
+	}
+}
+
+/*
+ * Fully compact the extent records managed by the indirection array.
+ */
+void
+xfs_iext_irec_compact_full(
+	xfs_ifork_t	*ifp)			/* inode fork pointer */
+{
+	xfs_bmbt_rec_t	*ep, *ep_next;		/* extent record pointers */
+	xfs_ext_irec_t	*erp, *erp_next;	/* extent irec pointers */
+	int		erp_idx = 0;		/* extent irec index */
+	int		ext_avail;		/* empty entries in ex list */
+	int		ext_diff;		/* number of exts to add */
+	int		nlists;			/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp = ifp->if_u1.if_ext_irec;
+	ep = &erp->er_extbuf[erp->er_extcount];
+	erp_next = erp + 1;
+	ep_next = erp_next->er_extbuf;
+	while (erp_idx < nlists - 1) {
+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+		ext_diff = MIN(ext_avail, erp_next->er_extcount);
+		memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
+		erp->er_extcount += ext_diff;
+		erp_next->er_extcount -= ext_diff;
+		/* Remove next page */
+		if (erp_next->er_extcount == 0) {
+			/*
+			 * Free page before removing extent record
+			 * so er_extoffs don't get modified in
+			 * xfs_iext_irec_remove.
+			 */
+			kmem_free(erp_next->er_extbuf,
+				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+			erp_next->er_extbuf = NULL;
+			xfs_iext_irec_remove(ifp, erp_idx + 1);
+			erp = &ifp->if_u1.if_ext_irec[erp_idx];
+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		/* Update next page */
+		} else {
+			/* Move rest of page up to become next new page */
+			memmove(erp_next->er_extbuf, ep_next,
+				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+			ep_next = erp_next->er_extbuf;
+			memset(&ep_next[erp_next->er_extcount], 0,
+				(XFS_LINEAR_EXTS - erp_next->er_extcount) *
+				sizeof(xfs_bmbt_rec_t));
+		}
+		if (erp->er_extcount == XFS_LINEAR_EXTS) {
+			erp_idx++;
+			if (erp_idx < nlists)
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+			else
+				break;
+		}
+		ep = &erp->er_extbuf[erp->er_extcount];
+		erp_next = erp + 1;
+		ep_next = erp_next->er_extbuf;
+	}
+}
+
+/*
+ * This is called to update the er_extoff field in the indirection
+ * array when extents have been added or removed from one of the
+ * extent lists. erp_idx contains the irec index to begin updating
+ * at and ext_diff contains the number of extents that were added
+ * or removed.
+ */
+void
+xfs_iext_irec_update_extoffs(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx,	/* irec index to update */
+	int		ext_diff)	/* number of new extents */
+{
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	for (i = erp_idx; i < nlists; i++) {
+		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
+	}
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 740b73fabd2..3c1df1d642f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -24,11 +24,38 @@
 #define	XFS_DATA_FORK	0
 #define	XFS_ATTR_FORK	1
 
+/*
+ * The following xfs_ext_irec_t struct introduces a second (top) level
+ * to the in-core extent allocation scheme. These structs are allocated
+ * in a contiguous block, creating an indirection array where each entry
+ * (irec) contains a pointer to a buffer of in-core extent records which
+ * it manages. Each extent buffer is 4k in size, since 4k is the system
+ * page size on Linux i386 and systems with larger page sizes don't seem
+ * to gain much, if anything, by using their native page size as the
+ * extent buffer size. Also, using 4k extent buffers everywhere provides
+ * a consistent interface for CXFS across different platforms.
+ *
+ * There is currently no limit on the number of irec's (extent lists)
+ * allowed, so heavily fragmented files may require an indirection array
+ * which spans multiple system pages of memory. The number of extents
+ * which would require this amount of contiguous memory is very large
+ * and should not cause problems in the foreseeable future. However,
+ * if the memory needed for the contiguous array ever becomes a problem,
+ * it is possible that a third level of indirection may be required.
+ */
+typedef struct xfs_ext_irec {
+	xfs_bmbt_rec_t	*er_extbuf;	/* block of extent records */
+	xfs_extnum_t	er_extoff;	/* extent offset in file */
+	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
+} xfs_ext_irec_t;
+
 /*
  * File incore extent information, present for each of data & attr forks.
  */
-#define	XFS_INLINE_EXTS	2
-#define	XFS_INLINE_DATA	32
+#define	XFS_IEXT_BUFSZ		4096
+#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
+#define	XFS_INLINE_EXTS		2
+#define	XFS_INLINE_DATA		32
 typedef struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
 	int			if_real_bytes;	/* bytes allocated in if_u1 */
@@ -39,6 +66,7 @@ typedef struct xfs_ifork {
 	xfs_extnum_t		if_lastex;	/* last if_extents used */
 	union {
 		xfs_bmbt_rec_t	*if_extents;	/* linear map file exts */
+		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
 		char		*if_data;	/* inline file data */
 	} if_u1;
 	union {
@@ -61,9 +89,10 @@ typedef struct xfs_ifork {
 /*
  * Per-fork incore inode flags.
  */
-#define	XFS_IFINLINE	0x0001	/* Inline data is read in */
-#define	XFS_IFEXTENTS	0x0002	/* All extent pointers are read in */
-#define	XFS_IFBROOT	0x0004	/* i_broot points to the bmap b-tree root */
+#define	XFS_IFINLINE	0x01	/* Inline data is read in */
+#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
+#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
+#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
 
 /*
  * Flags for xfs_imap() and xfs_dilocate().
@@ -438,13 +467,26 @@ xfs_bmbt_rec_t	*xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
 				xfs_bmbt_irec_t *);
 void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
 void		xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
+void		xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
 void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
+void		xfs_iext_realloc_indirect(xfs_ifork_t *, int);
+void		xfs_iext_indirect_to_direct(xfs_ifork_t *);
 void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
 void		xfs_iext_destroy(xfs_ifork_t *);
+xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
+xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
+void		xfs_iext_irec_init(xfs_ifork_t *);
+xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
+void		xfs_iext_irec_remove(xfs_ifork_t *, int);
+void		xfs_iext_irec_compact(xfs_ifork_t *);
+void		xfs_iext_irec_compact_pages(xfs_ifork_t *);
+void		xfs_iext_irec_compact_full(xfs_ifork_t *);
+void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
 
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
-- 
cgit v1.2.3


From 1f6553f9f9b6e41375c605769a75bd1646685a1b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:30:48 +1100
Subject: [XFS] Dynamically allocate local kiocb structures in readv/writev
 routines to reduce stack footprint.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25358a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ced4404339c..269995ddfbd 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -145,17 +145,22 @@ __linvfs_readv(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct		kiocb kiocb;
+	struct kiocb	*kiocb;
 	ssize_t		rval;
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
+	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
+	if (unlikely(!kiocb))
+		return -ENOMEM;
+
+	init_sync_kiocb(kiocb, file);
+	kiocb->ki_pos = *ppos;
 
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	VOP_READ(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb.ki_pos;
+	*ppos = kiocb->ki_pos;
+	kfree(kiocb);
 	return rval;
 }
 
@@ -190,17 +195,22 @@ __linvfs_writev(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct		kiocb kiocb;
+	struct kiocb	*kiocb;
 	ssize_t		rval;
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = *ppos;
+	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
+	if (unlikely(!kiocb))
+		return -ENOMEM;
+
+	init_sync_kiocb(kiocb, file);
+	kiocb->ki_pos = *ppos;
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 
-	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	VOP_WRITE(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb.ki_pos;
+	*ppos = kiocb->ki_pos;
+	kfree(kiocb);
 	return rval;
 }
 
@@ -435,7 +445,7 @@ linvfs_ioctl(
 	unsigned long	arg)
 {
 	int		error;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 
 	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
@@ -457,7 +467,7 @@ linvfs_ioctl_invis(
 	unsigned long	arg)
 {
 	int		error;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 
 	ASSERT(vp);
-- 
cgit v1.2.3


From f6d75cbed997dffb41e3d473bd4c0f899abc3776 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:24 +1100
Subject: [XFS] Dynamically allocate xfs_dir2_put_args_t structure to reduce
 stack pressure in xfs_dir2_leaf_getdents routine.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25359a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_leaf.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index d342b6b5523..e4e07c8f7a7 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -766,7 +766,7 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 	int			eof;		/* reached end of directory */
-	int			error=0;		/* error return value */
+	int			error = 0;	/* error return value */
 	int			i;		/* temporary loop index */
 	int			j;		/* temporary loop index */
 	int			length;		/* temporary length value */
@@ -778,8 +778,8 @@ xfs_dir2_leaf_getdents(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
 	int			nmap;		/* mappings to ask xfs_bmapi */
-	xfs_dir2_put_args_t	p;		/* formatting arg bundle */
-	char			*ptr=NULL;		/* pointer to current data */
+	xfs_dir2_put_args_t	*p;		/* formatting arg bundle */
+	char			*ptr = NULL;	/* pointer to current data */
 	int			ra_current;	/* number of read-ahead blks */
 	int			ra_index;	/* *map index for read-ahead */
 	int			ra_offset;	/* map entry offset for ra */
@@ -797,9 +797,10 @@ xfs_dir2_leaf_getdents(
 	/*
 	 * Setup formatting arguments.
 	 */
-	p.dbp = dbp;
-	p.put = put;
-	p.uio = uio;
+	p = kmem_alloc(sizeof(*p), KM_SLEEP);
+	p->dbp = dbp;
+	p->put = put;
+	p->uio = uio;
 	/*
 	 * Set up to bmap a number of blocks based on the caller's
 	 * buffer size, the directory block size, and the filesystem
@@ -1092,24 +1093,24 @@ xfs_dir2_leaf_getdents(
 		 */
 		dep = (xfs_dir2_data_entry_t *)ptr;
 
-		p.namelen = dep->namelen;
+		p->namelen = dep->namelen;
 
-		length = XFS_DIR2_DATA_ENTSIZE(p.namelen);
+		length = XFS_DIR2_DATA_ENTSIZE(p->namelen);
 
-		p.cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
+		p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
 
-		p.ino = INT_GET(dep->inumber, ARCH_CONVERT);
+		p->ino = INT_GET(dep->inumber, ARCH_CONVERT);
 #if XFS_BIG_INUMS
-		p.ino += mp->m_inoadd;
+		p->ino += mp->m_inoadd;
 #endif
-		p.name = (char *)dep->name;
+		p->name = (char *)dep->name;
 
-		error = p.put(&p);
+		error = p->put(p);
 
 		/*
 		 * Won't fit.  Return to caller.
 		 */
-		if (!p.done) {
+		if (!p->done) {
 			eof = 0;
 			break;
 		}
@@ -1129,6 +1130,7 @@ xfs_dir2_leaf_getdents(
 	else
 		uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
 	kmem_free(map, map_size * sizeof(*map));
+	kmem_free(p, sizeof(*p));
 	if (bp)
 		xfs_da_brelse(tp, bp);
 	return error;
-- 
cgit v1.2.3


From 8f79405527b50fe27cffcb7081890b5c68439b4f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:41 +1100
Subject: [XFS] Reduce complexity in xfs_trans_init by pushing complex macros
 out into functions and hence reduce the stack footprint there.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25360a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_trans.c | 187 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d3d714e6b32..6a2a1e07505 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -55,9 +55,140 @@ STATIC void	xfs_trans_committed(xfs_trans_t *, int);
 STATIC void	xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
 STATIC void	xfs_trans_free(xfs_trans_t *);
 
-kmem_zone_t		*xfs_trans_zone;
+kmem_zone_t	*xfs_trans_zone;
 
 
+/*
+ * Reservation functions here avoid a huge stack in xfs_trans_init
+ * due to register overflow from temporaries in the calculations.
+ */
+
+STATIC uint
+xfs_calc_write_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_rename_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_link_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_LINK_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_remove_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_symlink_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_create_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_ifree_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_ichange_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_growdata_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWDATA_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_swrite_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_SWRITE_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_writeid_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_WRITEID_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_addafork_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+}
+
+STATIC uint
+xfs_calc_attrset_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+}
+
+STATIC uint
+xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+{
+	return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+}
+
 /*
  * Initialize the precomputed transaction reservation values
  * in the mount structure.
@@ -69,39 +200,27 @@ xfs_trans_init(
 	xfs_trans_reservations_t	*resp;
 
 	resp = &(mp->m_reservations);
-	resp->tr_write =
-		(uint)(XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_itruncate =
-		(uint)(XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_rename =
-		(uint)(XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_link = (uint)XFS_CALC_LINK_LOG_RES(mp);
-	resp->tr_remove =
-		(uint)(XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_symlink =
-		(uint)(XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_create =
-		(uint)(XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_mkdir =
-		(uint)(XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_ifree =
-		(uint)(XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_ichange =
-		(uint)(XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_growdata = (uint)XFS_CALC_GROWDATA_LOG_RES(mp);
-	resp->tr_swrite = (uint)XFS_CALC_SWRITE_LOG_RES(mp);
-	resp->tr_writeid = (uint)XFS_CALC_WRITEID_LOG_RES(mp);
-	resp->tr_addafork =
-		(uint)(XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_attrinval = (uint)XFS_CALC_ATTRINVAL_LOG_RES(mp);
-	resp->tr_attrset =
-		(uint)(XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_attrrm =
-		(uint)(XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
-	resp->tr_clearagi = (uint)XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
-	resp->tr_growrtalloc = (uint)XFS_CALC_GROWRTALLOC_LOG_RES(mp);
-	resp->tr_growrtzero = (uint)XFS_CALC_GROWRTZERO_LOG_RES(mp);
-	resp->tr_growrtfree = (uint)XFS_CALC_GROWRTFREE_LOG_RES(mp);
+	resp->tr_write = xfs_calc_write_reservation(mp);
+	resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
+	resp->tr_rename = xfs_calc_rename_reservation(mp);
+	resp->tr_link = xfs_calc_link_reservation(mp);
+	resp->tr_remove = xfs_calc_remove_reservation(mp);
+	resp->tr_symlink = xfs_calc_symlink_reservation(mp);
+	resp->tr_create = xfs_calc_create_reservation(mp);
+	resp->tr_mkdir = xfs_calc_mkdir_reservation(mp);
+	resp->tr_ifree = xfs_calc_ifree_reservation(mp);
+	resp->tr_ichange = xfs_calc_ichange_reservation(mp);
+	resp->tr_growdata = xfs_calc_growdata_reservation(mp);
+	resp->tr_swrite = xfs_calc_swrite_reservation(mp);
+	resp->tr_writeid = xfs_calc_writeid_reservation(mp);
+	resp->tr_addafork = xfs_calc_addafork_reservation(mp);
+	resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
+	resp->tr_attrset = xfs_calc_attrset_reservation(mp);
+	resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
+	resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
+	resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
+	resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
+	resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
 }
 
 /*
-- 
cgit v1.2.3


From 9b94c2eddf407ad8faa5672ffa691e2076167564 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:32:54 +1100
Subject: [XFS] Take a dentry structure off the stack into the data segment.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25361a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 821bd12dd85..53ed9911236 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -25,6 +25,8 @@
 #include "xfs_mount.h"
 #include "xfs_export.h"
 
+STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
+
 /*
  * XFS encodes and decodes the fileid portion of NFS filehandles
  * itself instead of letting the generic NFS code do it.  This
@@ -160,11 +162,6 @@ linvfs_get_parent(
 	int			error;
 	vnode_t			*vp, *cvp;
 	struct dentry		*parent;
-	struct dentry		dotdot;
-
-	dotdot.d_name.name = "..";
-	dotdot.d_name.len = 2;
-	dotdot.d_inode = NULL;
 
 	cvp = NULL;
 	vp = LINVFS_GET_VP(child->d_inode);
-- 
cgit v1.2.3


From 220b5284139be6ecbc39b353fd76f0923eccc3d6 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:33:36 +1100
Subject: [XFS] Dynamically allocate vattr in places it makes sense to do so,
 to reduce stack use.  Also re-use vattr in some places so that multiple
 copies are not held on-stack.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25369a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c  |  13 +++-
 fs/xfs/linux-2.6/xfs_ioctl.c | 128 +++++++++++++++++-------------
 fs/xfs/linux-2.6/xfs_iops.c  | 180 +++++++++++++++++++++++++------------------
 fs/xfs/linux-2.6/xfs_vnode.c |  29 ++++---
 fs/xfs/linux-2.6/xfs_vnode.h |   1 +
 5 files changed, 209 insertions(+), 142 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 269995ddfbd..ce8fe40e162 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -420,7 +420,7 @@ linvfs_file_mmap(
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		va = { .va_mask = XFS_AT_UPDATIME };
+	vattr_t		*vattr;
 	int		error;
 
 	vma->vm_ops = &linvfs_file_vm_ops;
@@ -431,9 +431,14 @@ linvfs_file_mmap(
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
-	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
-	if (!error)
-		vn_revalidate(vp);	/* update Linux inode flags */
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+	vattr->va_mask = XFS_AT_UPDATIME;
+	VOP_SETATTR(vp, vattr, XFS_AT_UPDATIME, NULL, error);
+	if (likely(!error))
+		__vn_revalidate(vp, vattr);	/* update flags */
+	kfree(vattr);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4db47790415..f182721ec9a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1160,105 +1160,129 @@ xfs_ioc_xattr(
 	void			__user *arg)
 {
 	struct fsxattr		fa;
-	vattr_t			va;
-	int			error;
+	struct vattr		*vattr;
+	int			error = 0;
 	int			attr_flags;
 	unsigned int		flags;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	switch (cmd) {
 	case XFS_IOC_FSGETXATTR: {
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
-			     XFS_AT_NEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, &va, 0, NULL, error);
-		if (error)
-			return -error;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+				 XFS_AT_NEXTENTS | XFS_AT_PROJID;
+		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		if (unlikely(error)) {
+			error = -error;
+			break;
+		}
 
-		fa.fsx_xflags	= va.va_xflags;
-		fa.fsx_extsize	= va.va_extsize;
-		fa.fsx_nextents = va.va_nextents;
-		fa.fsx_projid	= va.va_projid;
+		fa.fsx_xflags	= vattr->va_xflags;
+		fa.fsx_extsize	= vattr->va_extsize;
+		fa.fsx_nextents = vattr->va_nextents;
+		fa.fsx_projid	= vattr->va_projid;
 
-		if (copy_to_user(arg, &fa, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+		if (copy_to_user(arg, &fa, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
+		break;
 	}
 
 	case XFS_IOC_FSSETXATTR: {
-		if (copy_from_user(&fa, arg, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
+		if (copy_from_user(&fa, arg, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
 
 		attr_flags = 0;
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
-		va.va_xflags  = fa.fsx_xflags;
-		va.va_extsize = fa.fsx_extsize;
-		va.va_projid  = fa.fsx_projid;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
+		vattr->va_xflags  = fa.fsx_xflags;
+		vattr->va_extsize = fa.fsx_extsize;
+		vattr->va_projid  = fa.fsx_projid;
 
-		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
-		if (!error)
-			vn_revalidate(vp);	/* update Linux inode flags */
-		return -error;
+		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		if (likely(!error))
+			__vn_revalidate(vp, vattr);	/* update flags */
+		error = -error;
+		break;
 	}
 
 	case XFS_IOC_FSGETXATTRA: {
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
-			     XFS_AT_ANEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, &va, 0, NULL, error);
-		if (error)
-			return -error;
+		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+				 XFS_AT_ANEXTENTS | XFS_AT_PROJID;
+		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		if (unlikely(error)) {
+			error = -error;
+			break;
+		}
 
-		fa.fsx_xflags	= va.va_xflags;
-		fa.fsx_extsize	= va.va_extsize;
-		fa.fsx_nextents = va.va_anextents;
-		fa.fsx_projid	= va.va_projid;
+		fa.fsx_xflags	= vattr->va_xflags;
+		fa.fsx_extsize	= vattr->va_extsize;
+		fa.fsx_nextents = vattr->va_anextents;
+		fa.fsx_projid	= vattr->va_projid;
 
-		if (copy_to_user(arg, &fa, sizeof(fa)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+		if (copy_to_user(arg, &fa, sizeof(fa))) {
+			error = -EFAULT;
+			break;
+		}
+		break;
 	}
 
 	case XFS_IOC_GETXFLAGS: {
 		flags = xfs_di2lxflags(ip->i_d.di_flags);
 		if (copy_to_user(arg, &flags, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+			error = -EFAULT;
+		break;
 	}
 
 	case XFS_IOC_SETXFLAGS: {
-		if (copy_from_user(&flags, arg, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
+		if (copy_from_user(&flags, arg, sizeof(flags))) {
+			error = -EFAULT;
+			break;
+		}
 
 		if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
 			      LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
-			      LINUX_XFLAG_SYNC))
-			return -XFS_ERROR(EOPNOTSUPP);
+			      LINUX_XFLAG_SYNC)) {
+			error = -EOPNOTSUPP;
+			break;
+		}
 
 		attr_flags = 0;
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS;
-		va.va_xflags = xfs_merge_ioc_xflags(flags,
-				xfs_ip2xflags(ip));
+		vattr->va_mask = XFS_AT_XFLAGS;
+		vattr->va_xflags = xfs_merge_ioc_xflags(flags,
+							xfs_ip2xflags(ip));
 
-		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
-		if (!error)
-			vn_revalidate(vp);	/* update Linux inode flags */
-		return -error;
+		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		if (likely(!error))
+			__vn_revalidate(vp, vattr);	/* update flags */
+		error = -error;
+		break;
 	}
 
 	case XFS_IOC_GETVERSION: {
 		flags = LINVFS_GET_IP(vp)->i_generation;
 		if (copy_to_user(arg, &flags, sizeof(flags)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
+			error = -EFAULT;
+		break;
 	}
 
 	default:
-		return -ENOTTY;
+		error = -ENOTTY;
+		break;
 	}
+
+	kfree(vattr);
+	return error;
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d7f6f2d8ac8..b1219195c6e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -198,22 +198,22 @@ xfs_ichgtime_fast(
  * Pull the link count and size up from the xfs inode to the linux inode
  */
 STATIC void
-validate_fields(
-	struct inode	*ip)
+__linvfs_validate_fields(
+	struct inode	*ip,
+	struct vattr	*vattr)
 {
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		va;
 	int		error;
 
-	va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
-	VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
-	if (likely(!error)) {
-		ip->i_nlink = va.va_nlink;
-		ip->i_blocks = va.va_nblocks;
+	vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+	VOP_GETATTR(vp, vattr, ATTR_LAZY, NULL, error);
+  	if (likely(!error)) {
+		ip->i_nlink = vattr->va_nlink;
+		ip->i_blocks = vattr->va_nblocks;
 
-		/* we're under i_mutex so i_size can't change under us */
-		if (i_size_read(ip) != va.va_size)
-			i_size_write(ip, va.va_size);
+		/* we're under i_sem so i_size can't change under us */
+		if (i_size_read(ip) != vattr->va_size)
+			i_size_write(ip, vattr->va_size);
 	}
 }
 
@@ -224,7 +224,7 @@ validate_fields(
  * inode, of course, such that log replay can't cause these to be lost).
  */
 STATIC int
-linvfs_init_security(
+__linvfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
@@ -257,23 +257,23 @@ linvfs_init_security(
  * XXX(hch):  nfsd is broken, better fix it instead.
  */
 STATIC inline int
-has_fs_struct(struct task_struct *task)
+__linvfs_has_fs_struct(struct task_struct *task)
 {
 	return (task->fs != init_task.fs);
 }
 
 STATIC inline void
-cleanup_inode(
+__linvfs_cleanup_inode(
 	vnode_t		*dvp,
 	vnode_t		*vp,
-	struct dentry	*dentry,	
+	struct dentry	*dentry,
 	int		mode)
 {
 	struct dentry   teardown = {};
-	int             err2;
+	int             error;
 
 	/* Oh, the horror.
-	 * If we can't add the ACL or we fail in 
+	 * If we can't add the ACL or we fail in
 	 * linvfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
@@ -281,9 +281,9 @@ cleanup_inode(
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
-	  	VOP_RMDIR(dvp, &teardown, NULL, err2);
+	  	VOP_RMDIR(dvp, &teardown, NULL, error);
 	else
-		VOP_REMOVE(dvp, &teardown, NULL, err2);
+		VOP_REMOVE(dvp, &teardown, NULL, error);
 	VN_RELE(vp);
 }
 
@@ -295,7 +295,7 @@ linvfs_mknod(
 	dev_t		rdev)
 {
 	struct inode	*ip;
-	vattr_t		va;
+	vattr_t		*vattr;
 	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
@@ -305,70 +305,76 @@ linvfs_mknod(
 	 * Irix uses Missed'em'V split, but doesn't want to see
 	 * the upper 5 bits of (14bit) major.
 	 */
-	if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	if (test_default_acl && test_default_acl(dvp)) {
-		if (!_ACL_ALLOC(default_acl))
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
+	if (unlikely(test_default_acl && test_default_acl(dvp))) {
+		if (!_ACL_ALLOC(default_acl)) {
+			kfree(vattr);
 			return -ENOMEM;
+		}
 		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
 			_ACL_FREE(default_acl);
 			default_acl = NULL;
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl && __linvfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
-	memset(&va, 0, sizeof(va));
-	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-	va.va_mode = mode;
+	memset(vattr, 0, sizeof(*vattr));
+	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr->va_mode = mode;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		va.va_rdev = sysv_encode_dev(rdev);
-		va.va_mask |= XFS_AT_RDEV;
+		vattr->va_rdev = sysv_encode_dev(rdev);
+		vattr->va_mask |= XFS_AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
-		VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+		VOP_CREATE(dvp, dentry, vattr, &vp, NULL, error);
 		break;
 	case S_IFDIR:
-		VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+		VOP_MKDIR(dvp, dentry, vattr, &vp, NULL, error);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
-	if (!error)
-	{
-		error = linvfs_init_security(vp, dir);
+	if (unlikely(!error)) {
+		error = __linvfs_init_security(vp, dir);
 		if (error)
-			cleanup_inode(dvp, vp, dentry, mode);
+			__linvfs_cleanup_inode(dvp, vp, dentry, mode);
 	}
 
-	if (default_acl) {
+	if (unlikely(default_acl)) {
 		if (!error) {
-			error = _ACL_INHERIT(vp, &va, default_acl);
-			if (!error) 
+			error = _ACL_INHERIT(vp, vattr, default_acl);
+			if (!error)
 				VMODIFY(vp);
 			else
-				cleanup_inode(dvp, vp, dentry, mode);
+				__linvfs_cleanup_inode(dvp, vp, dentry, mode);
 		}
 		_ACL_FREE(default_acl);
 	}
 
-	if (!error) {
+	if (likely(!error)) {
 		ASSERT(vp);
 		ip = LINVFS_GET_IP(vp);
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			validate_fields(ip);
+			__linvfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
-		validate_fields(dir);
+		__linvfs_validate_fields(dir, vattr);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -423,22 +429,28 @@ linvfs_link(
 	struct inode	*ip;	/* inode of guy being linked to */
 	vnode_t		*tdvp;	/* target directory for new name/link */
 	vnode_t		*vp;	/* vp of name being linked */
+	vattr_t		*vattr;
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	tdvp = LINVFS_GET_VP(dir);
 	vp = LINVFS_GET_VP(ip);
 
 	VOP_LINK(tdvp, vp, dentry, NULL, error);
-	if (!error) {
+	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		validate_fields(ip);
+		__linvfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -449,17 +461,22 @@ linvfs_unlink(
 {
 	struct inode	*inode;
 	vnode_t		*dvp;	/* directory containing name to remove */
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	inode = dentry->d_inode;
 	dvp = LINVFS_GET_VP(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
-	if (!error) {
-		validate_fields(dir);	/* For size only */
-		validate_fields(inode);
+	if (likely(!error)) {
+		__linvfs_validate_fields(dir, vattr);	/* size needs update */
+		__linvfs_validate_fields(inode, vattr);
 	}
-
+	kfree(vattr);
 	return -error;
 }
 
@@ -470,7 +487,7 @@ linvfs_symlink(
 	const char	*symname)
 {
 	struct inode	*ip;
-	vattr_t		va;
+	vattr_t		*vattr;
 	vnode_t		*dvp;	/* directory containing name of symlink */
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
@@ -478,22 +495,27 @@ linvfs_symlink(
 	dvp = LINVFS_GET_VP(dir);
 	cvp = NULL;
 
-	memset(&va, 0, sizeof(va));
-	va.va_mode = S_IFLNK |
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
+	memset(vattr, 0, sizeof(*vattr));
+	vattr->va_mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
-	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
-		error = linvfs_init_security(cvp, dir);
+		error = __linvfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			validate_fields(dir);
-			validate_fields(ip);
+			__linvfs_validate_fields(dir, vattr);
+			__linvfs_validate_fields(ip, vattr);
 		}
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -504,13 +526,19 @@ linvfs_rmdir(
 {
 	struct inode	*inode = dentry->d_inode;
 	vnode_t		*dvp = LINVFS_GET_VP(dir);
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	VOP_RMDIR(dvp, dentry, NULL, error);
-	if (!error) {
-		validate_fields(inode);
-		validate_fields(dir);
+	if (likely(!error)) {
+		__linvfs_validate_fields(inode, vattr);
+		__linvfs_validate_fields(dir, vattr);
 	}
+	kfree(vattr);
 	return -error;
 }
 
@@ -524,22 +552,26 @@ linvfs_rename(
 	struct inode	*new_inode = ndentry->d_inode;
 	vnode_t		*fvp;	/* from directory */
 	vnode_t		*tvp;	/* target directory */
+	vattr_t		*vattr;
 	int		error;
 
+	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+	if (unlikely(!vattr))
+		return -ENOMEM;
+
 	fvp = LINVFS_GET_VP(odir);
 	tvp = LINVFS_GET_VP(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
-	if (error)
-		return -error;
-
-	if (new_inode)
-		validate_fields(new_inode);
-
-	validate_fields(odir);
-	if (ndir != odir)
-		validate_fields(ndir);
-	return 0;
+	if (likely(!error)) {
+		if (new_inode)
+			__linvfs_validate_fields(new_inode, vattr);
+		__linvfs_validate_fields(odir, vattr);
+		if (ndir != odir)
+			__linvfs_validate_fields(ndir, vattr);
+	}
+	kfree(vattr);
+	return -error;
 }
 
 /*
@@ -653,11 +685,10 @@ linvfs_setattr(
 	struct inode	*inode = dentry->d_inode;
 	unsigned int	ia_valid = attr->ia_valid;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	vattr_t		vattr;
+	vattr_t		vattr = { 0 };
 	int		flags = 0;
 	int		error;
 
-	memset(&vattr, 0, sizeof(vattr_t));
 	if (ia_valid & ATTR_UID) {
 		vattr.va_mask |= XFS_AT_UID;
 		vattr.va_uid = attr->ia_uid;
@@ -699,10 +730,9 @@ linvfs_setattr(
 #endif
 
 	VOP_SETATTR(vp, &vattr, flags, NULL, error);
-	if (error)
-		return -error;
-	vn_revalidate(vp);
-	return error;
+	if (likely(!error))
+		__vn_revalidate(vp, &vattr);
+	return -error;
 }
 
 STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 260dd8415dd..225e7dd8b21 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -83,7 +83,7 @@ vn_initialize(
 	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
 #endif	/* XFS_VNODE_TRACE */
 
-	vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+	vn_trace_exit(vp, __FUNCTION__, (inst_t *)__return_address);
 	return vp;
 }
 
@@ -129,24 +129,31 @@ vn_revalidate_core(
  * Revalidate the Linux inode from the vnode.
  */
 int
-vn_revalidate(
-	struct vnode	*vp)
+__vn_revalidate(
+	struct vnode	*vp,
+	struct vattr	*vattr)
 {
-	vattr_t		va;
 	int		error;
 
-	vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
-	ASSERT(vp->v_fbhv != NULL);
-
-	va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
-	VOP_GETATTR(vp, &va, 0, NULL, error);
-	if (!error) {
-		vn_revalidate_core(vp, &va);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+	vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS;
+	VOP_GETATTR(vp, vattr, 0, NULL, error);
+	if (likely(!error)) {
+		vn_revalidate_core(vp, vattr);
 		VUNMODIFY(vp);
 	}
 	return -error;
 }
 
+int
+vn_revalidate(
+	struct vnode	*vp)
+{
+	vattr_t		vattr;
+
+	return __vn_revalidate(vp, &vattr);
+}
+
 /*
  * Add a reference to a referenced vnode.
  */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 0fe2419461d..0cf92ca80ce 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -490,6 +490,7 @@ typedef struct vnode_map {
 			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
 
 extern int	vn_revalidate(struct vnode *);
+extern int	__vn_revalidate(struct vnode *, vattr_t *);
 extern void	vn_revalidate_core(struct vnode *, vattr_t *);
 
 extern void	vn_iowait(struct vnode *vp);
-- 
cgit v1.2.3


From 39269e29d4aad04252e0debec4c9b01bac16a257 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:33:50 +1100
Subject: [XFS] Reduce xfs_bmapi stack use by removing some local state
 variables, and directly testing flags instead.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25370a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 79 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 81a95b684b6..da8fa0cd79c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4629,10 +4629,6 @@ xfs_bmapi(
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
 	xfs_fileoff_t	end;		/* end of mapped file region */
 	int		eof;		/* we've hit the end of extents */
-	char		contig;		/* allocation must be one extent */
-	char		delay;		/* this request is for delayed alloc */
-	char		exact;		/* don't do all of wasdelayed extent */
-	char		convert;	/* unwritten extent I/O completion */
 	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
 	int		error;		/* error return */
 	xfs_bmbt_irec_t	got;		/* current file extent record */
@@ -4651,13 +4647,9 @@ xfs_bmapi(
 	int		tmp_logflags;	/* temp flags holder */
 	int		whichfork;	/* data or attr fork */
 	char		inhole;		/* current location is hole in file */
-	char		stateless;	/* ignore state flag set */
-	char		trim;		/* output trimmed to match range */
-	char		userdata;	/* allocating non-metadata */
 	char		wasdelay;	/* old extent was delayed */
 	char		wr;		/* this is a write request */
 	char		rt;		/* this is a realtime file */
-	char		rsvd;		/* OK to allocate reserved blocks */
 #ifdef DEBUG
 	xfs_fileoff_t	orig_bno;	/* original block number value */
 	int		orig_flags;	/* original flags arg value */
@@ -4694,15 +4686,8 @@ xfs_bmapi(
 		XFS_STATS_INC(xs_blk_mapw);
 	else
 		XFS_STATS_INC(xs_blk_mapr);
-	delay = (flags & XFS_BMAPI_DELAY) != 0;
-	trim = (flags & XFS_BMAPI_ENTIRE) == 0;
-	userdata = (flags & XFS_BMAPI_METADATA) == 0;
-	convert = (flags & XFS_BMAPI_CONVERT) != 0;
-	exact = (flags & XFS_BMAPI_EXACT) != 0;
-	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
-	contig = (flags & XFS_BMAPI_CONTIG) != 0;
 	/*
-	 * stateless is used to combine extents which
+	 * IGSTATE flag is used to combine extents which
 	 * differ only due to the state of the extents.
 	 * This technique is used from xfs_getbmap()
 	 * when the caller does not wish to see the
@@ -4718,10 +4703,9 @@ xfs_bmapi(
 	 * xfs_strat_comp(), where the xfs_bmapi() call
 	 * is transactioned, and the extents combined.
 	 */
-	stateless = (flags & XFS_BMAPI_IGSTATE) != 0;
-	if (stateless && wr)	/* if writing unwritten space, no */
-		wr = 0;		/* allocations are allowed */
-	ASSERT(wr || !delay);
+	if ((flags & XFS_BMAPI_IGSTATE) && wr)	/* if writing unwritten space */
+		wr = 0;				/* no allocations are allowed */
+	ASSERT(wr || !(flags & XFS_BMAPI_DELAY));
 	logflags = 0;
 	nallocs = 0;
 	cur = NULL;
@@ -4756,7 +4740,7 @@ xfs_bmapi(
 		if (eof && !wr)
 			got.br_startoff = end;
 		inhole = eof || got.br_startoff > bno;
-		wasdelay = wr && !inhole && !delay &&
+		wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) &&
 			ISNULLSTARTBLOCK(got.br_startblock);
 		/*
 		 * First, deal with the hole before the allocated space
@@ -4768,7 +4752,7 @@ xfs_bmapi(
 			 * allocate the stuff asked for in this bmap call
 			 * but that wouldn't be as good.
 			 */
-			if (wasdelay && !exact) {
+			if (wasdelay && !(flags & XFS_BMAPI_EXACT)) {
 				alen = (xfs_extlen_t)got.br_blockcount;
 				aoff = got.br_startoff;
 				if (lastx != NULLEXTNUM && lastx) {
@@ -4790,8 +4774,8 @@ xfs_bmapi(
 							got.br_startoff - bno);
 				aoff = bno;
 			}
-			minlen = contig ? alen : 1;
-			if (delay) {
+			minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1;
+			if (flags & XFS_BMAPI_DELAY) {
 				xfs_extlen_t	extsz;
 
 				/* Figure out the extent size, adjust alen */
@@ -4804,7 +4788,9 @@ xfs_bmapi(
 				if (extsz) {
 					error = xfs_bmap_extsize_align(mp,
 							&got, &prev, extsz,
-							rt, eof, delay, convert,
+							rt, eof,
+							flags&XFS_BMAPI_DELAY,
+							flags&XFS_BMAPI_CONVERT,
 							&aoff, &alen);
 					ASSERT(!error);
 				}
@@ -4842,24 +4828,29 @@ xfs_bmapi(
 				if (rt) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
-							-(extsz), rsvd);
+							-(extsz), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				} else {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							-(alen), rsvd);
+							-(alen), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				}
 				if (!error) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							-(indlen), rsvd);
+							-(indlen), (flags &
+							XFS_BMAPI_RSVBLOCKS));
 					if (error && rt)
 						xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
-							extsz, rsvd);
+							extsz, (flags &
+							XFS_BMAPI_RSVBLOCKS));
 					else if (error)
 						xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
-							alen, rsvd);
+							alen, (flags &
+							XFS_BMAPI_RSVBLOCKS));
 				}
 
 				if (error) {
@@ -4892,7 +4883,7 @@ xfs_bmapi(
 				/* Indicate if this is the first user data
 				 * in the file, or just any user data.
 				 */
-				if (userdata) {
+				if (!(flags & XFS_BMAPI_METADATA)) {
 					bma.userdata = (aoff == 0) ?
 						XFS_ALLOC_INITIAL_USER_DATA :
 						XFS_ALLOC_USERDATA;
@@ -4904,7 +4895,7 @@ xfs_bmapi(
 				bma.firstblock = *firstblock;
 				bma.alen = alen;
 				bma.off = aoff;
-				bma.conv = convert;
+				bma.conv = (flags & XFS_BMAPI_CONVERT);
 				bma.wasdel = wasdelay;
 				bma.minlen = minlen;
 				bma.low = flist->xbf_low;
@@ -4915,7 +4906,8 @@ xfs_bmapi(
 				 * is larger than a stripe unit.
 				 */
 				if (mp->m_dalign && alen >= mp->m_dalign &&
-				    userdata && whichfork == XFS_DATA_FORK) {
+				    (!(flags & XFS_BMAPI_METADATA)) &&
+				    (whichfork == XFS_DATA_FORK)) {
 					if ((error = xfs_bmap_isaeof(ip, aoff,
 							whichfork, &bma.aeof)))
 						goto error0;
@@ -4978,7 +4970,7 @@ xfs_bmapi(
 			}
 			error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
 				firstblock, flist, &tmp_logflags, whichfork,
-				rsvd);
+				(flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -4990,7 +4982,7 @@ xfs_bmapi(
 			ASSERT(got.br_startoff + got.br_blockcount >=
 				aoff + alen);
 #ifdef DEBUG
-			if (delay) {
+			if (flags & XFS_BMAPI_DELAY) {
 				ASSERT(ISNULLSTARTBLOCK(got.br_startblock));
 				ASSERT(STARTBLOCKVAL(got.br_startblock) > 0);
 			}
@@ -5019,14 +5011,15 @@ xfs_bmapi(
 		 * Then deal with the allocated space we found.
 		 */
 		ASSERT(ep != NULL);
-		if (trim && (got.br_startoff + got.br_blockcount > obno)) {
+		if (!(flags & XFS_BMAPI_ENTIRE) &&
+		    (got.br_startoff + got.br_blockcount > obno)) {
 			if (obno > bno)
 				bno = obno;
 			ASSERT((bno >= obno) || (n == 0));
 			ASSERT(bno < end);
 			mval->br_startoff = bno;
 			if (ISNULLSTARTBLOCK(got.br_startblock)) {
-				ASSERT(!wr || delay);
+				ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
 				mval->br_startblock = DELAYSTARTBLOCK;
 			} else
 				mval->br_startblock =
@@ -5048,7 +5041,7 @@ xfs_bmapi(
 		} else {
 			*mval = got;
 			if (ISNULLSTARTBLOCK(mval->br_startblock)) {
-				ASSERT(!wr || delay);
+				ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
 				mval->br_startblock = DELAYSTARTBLOCK;
 			}
 		}
@@ -5074,7 +5067,7 @@ xfs_bmapi(
 			mval->br_state = XFS_EXT_NORM;
 			error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
 				firstblock, flist, &tmp_logflags, whichfork,
-				rsvd);
+				(flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -5091,9 +5084,10 @@ xfs_bmapi(
 				continue;
 		}
 
-		ASSERT(!trim ||
+		ASSERT((flags & XFS_BMAPI_ENTIRE) ||
 		       ((mval->br_startoff + mval->br_blockcount) <= end));
-		ASSERT(!trim || (mval->br_blockcount <= len) ||
+		ASSERT((flags & XFS_BMAPI_ENTIRE) ||
+		       (mval->br_blockcount <= len) ||
 		       (mval->br_startoff < obno));
 		bno = mval->br_startoff + mval->br_blockcount;
 		len = end - bno;
@@ -5108,7 +5102,8 @@ xfs_bmapi(
 			   mval[-1].br_startblock != HOLESTARTBLOCK &&
 			   mval->br_startblock ==
 			   mval[-1].br_startblock + mval[-1].br_blockcount &&
-			   (stateless || mval[-1].br_state == mval->br_state)) {
+			   ((flags & XFS_BMAPI_IGSTATE) ||
+				mval[-1].br_state == mval->br_state)) {
 			ASSERT(mval->br_startoff ==
 			       mval[-1].br_startoff + mval[-1].br_blockcount);
 			mval[-1].br_blockcount += mval->br_blockcount;
-- 
cgit v1.2.3


From a365bdd5e8fae9c592b9e4851d931016f9fdd868 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:34:16 +1100
Subject: [XFS] Reduce stack usage within xfs_bmapi by rearranging some code,
 splitting realtime/btree allocators apart.  Based on Glens original patches.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25372a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 668 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 345 insertions(+), 323 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index da8fa0cd79c..9f0ed4869d4 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2294,25 +2294,15 @@ xfs_bmap_extsize_align(
 
 #define XFS_ALLOC_GAP_UNITS	4
 
-/*
- * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
- * It figures out where to ask the underlying allocator to put the new extent.
- */
 STATIC int
-xfs_bmap_alloc(
+xfs_bmap_adjacent(
 	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
 {
 	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
-	xfs_alloctype_t	atype=0;	/* type for allocation routines */
-	int		error;		/* error return value */
 	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
 	xfs_mount_t	*mp;		/* mount point structure */
 	int		nullfb;		/* true if ap->firstblock isn't set */
 	int		rt;		/* true if inode is realtime */
-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
-	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtx;
 
 #define	ISVALID(x,y)	\
 	(rt ? \
@@ -2321,75 +2311,10 @@ xfs_bmap_alloc(
 		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
 		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
 
-	/*
-	 * Set up variables.
-	 */
 	mp = ap->ip->i_mount;
 	nullfb = ap->firstblock == NULLFSBLOCK;
 	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-	if (rt) {
-		align = ap->ip->i_d.di_extsize ?
-			ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
-		/* Set prod to match the extent size */
-		prod = align / mp->m_sb.sb_rextsize;
-
-		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-						align, rt, ap->eof, 0,
-						ap->conv, &ap->off, &ap->alen);
-		if (error)
-			return error;
-		ASSERT(ap->alen);
-		ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
-
-		/*
-		 * If the offset & length are not perfectly aligned
-		 * then kill prod, it will just get us in trouble.
-		 */
-		if (do_mod(ap->off, align) || ap->alen % align)
-			prod = 1;
-		/*
-		 * Set ralen to be the actual requested length in rtextents.
-		 */
-		ralen = ap->alen / mp->m_sb.sb_rextsize;
-		/*
-		 * If the old value was close enough to MAXEXTLEN that
-		 * we rounded up to it, cut it back so it's valid again.
-		 * Note that if it's a really large request (bigger than
-		 * MAXEXTLEN), we don't hear about that number, and can't
-		 * adjust the starting point to match it.
-		 */
-		if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
-			ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
-		/*
-		 * If it's an allocation to an empty file at offset 0,
-		 * pick an extent that will space things out in the rt area.
-		 */
-		if (ap->eof && ap->off == 0) {
-			error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
-			if (error)
-				return error;
-			ap->rval = rtx * mp->m_sb.sb_rextsize;
-		} else
-			ap->rval = 0;
-	} else {
-		align = (ap->userdata && ap->ip->i_d.di_extsize &&
-			(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
-			ap->ip->i_d.di_extsize : 0;
-		if (unlikely(align)) {
-			error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
-							align, rt,
-							ap->eof, 0, ap->conv,
-							&ap->off, &ap->alen);
-			ASSERT(!error);
-			ASSERT(ap->alen);
-		}
-		if (nullfb)
-			ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-		else
-			ap->rval = ap->firstblock;
-	}
-
 	/*
 	 * If allocating at eof, and there's a previous real block,
 	 * try to use it's last block as our starting point.
@@ -2514,281 +2439,378 @@ xfs_bmap_alloc(
 		else if (gotbno != NULLFSBLOCK)
 			ap->rval = gotbno;
 	}
+#undef ISVALID
+	return 0;
+}
+
+STATIC int
+xfs_bmap_rtalloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
+	int		error;		/* error return value */
+	xfs_mount_t	*mp;		/* mount point structure */
+	xfs_extlen_t	prod = 0;	/* product factor for allocators */
+	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_rtblock_t	rtx;		/* realtime extent number */
+	xfs_rtblock_t	rtb;
+
+	mp = ap->ip->i_mount;
+	align = ap->ip->i_d.di_extsize ?
+		ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+	prod = align / mp->m_sb.sb_rextsize;
+	error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+					align, 1, ap->eof, 0,
+					ap->conv, &ap->off, &ap->alen);
+	if (error)
+		return error;
+	ASSERT(ap->alen);
+	ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
+	/*
+	 * If the offset & length are not perfectly aligned
+	 * then kill prod, it will just get us in trouble.
+	 */
+	if (do_mod(ap->off, align) || ap->alen % align)
+		prod = 1;
+	/*
+	 * Set ralen to be the actual requested length in rtextents.
+	 */
+	ralen = ap->alen / mp->m_sb.sb_rextsize;
+	/*
+	 * If the old value was close enough to MAXEXTLEN that
+	 * we rounded up to it, cut it back so it's valid again.
+	 * Note that if it's a really large request (bigger than
+	 * MAXEXTLEN), we don't hear about that number, and can't
+	 * adjust the starting point to match it.
+	 */
+	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
+		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
+	/*
+	 * If it's an allocation to an empty file at offset 0,
+	 * pick an extent that will space things out in the rt area.
+	 */
+	if (ap->eof && ap->off == 0) {
+		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
+		if (error)
+			return error;
+		ap->rval = rtx * mp->m_sb.sb_rextsize;
+	} else {
+		ap->rval = 0;
+	}
+
+	xfs_bmap_adjacent(ap);
+
+	/*
+	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 */
+	atype = ap->rval == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+	do_div(ap->rval, mp->m_sb.sb_rextsize);
+	rtb = ap->rval;
+	ap->alen = ralen;
+	if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
+				&ralen, atype, ap->wasdel, prod, &rtb)))
+		return error;
+	if (rtb == NULLFSBLOCK && prod > 1 &&
+	    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
+					   ap->alen, &ralen, atype,
+					   ap->wasdel, 1, &rtb)))
+		return error;
+	ap->rval = rtb;
+	if (ap->rval != NULLFSBLOCK) {
+		ap->rval *= mp->m_sb.sb_rextsize;
+		ralen *= mp->m_sb.sb_rextsize;
+		ap->alen = ralen;
+		ap->ip->i_d.di_nblocks += ralen;
+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+		if (ap->wasdel)
+			ap->ip->i_delayed_blks -= ralen;
+		/*
+		 * Adjust the disk quota also. This was reserved
+		 * earlier.
+		 */
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
+					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
+	} else {
+		ap->alen = 0;
+	}
+	return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	xfs_mount_t	*mp;		/* mount point structure */
+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_agnumber_t	ag;
+	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
+	xfs_agnumber_t	startag;
+	xfs_alloc_arg_t	args;
+	xfs_extlen_t	blen;
+	xfs_extlen_t	delta;
+	xfs_extlen_t	longest;
+	xfs_extlen_t	need;
+	xfs_extlen_t	nextminlen = 0;
+	xfs_perag_t	*pag;
+	int		nullfb;		/* true if ap->firstblock isn't set */
+	int		isaligned;
+	int		notinit;
+	int		tryagain;
+	int		error;
+
+	mp = ap->ip->i_mount;
+	align = (ap->userdata && ap->ip->i_d.di_extsize &&
+		(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+		ap->ip->i_d.di_extsize : 0;
+	if (unlikely(align)) {
+		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+						align, 0, ap->eof, 0, ap->conv,
+						&ap->off, &ap->alen);
+		ASSERT(!error);
+		ASSERT(ap->alen);
+	}
+	nullfb = ap->firstblock == NULLFSBLOCK;
+	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+	if (nullfb)
+		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+	else
+		ap->rval = ap->firstblock;
+
+	xfs_bmap_adjacent(ap);
+
 	/*
 	 * If allowed, use ap->rval; otherwise must use firstblock since
 	 * it's in the right allocation group.
 	 */
-	if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
 		;
 	else
 		ap->rval = ap->firstblock;
 	/*
-	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 * Normal allocation, done through xfs_alloc_vextent.
 	 */
-	if (rt) {
-#ifndef __KERNEL__
-		ASSERT(0);
-#else
-		xfs_rtblock_t	rtb;
-
-		atype = ap->rval == 0 ?
-			XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
-		do_div(ap->rval, mp->m_sb.sb_rextsize);
-		rtb = ap->rval;
-		ap->alen = ralen;
-		if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
-				&ralen, atype, ap->wasdel, prod, &rtb)))
-			return error;
-		if (rtb == NULLFSBLOCK && prod > 1 &&
-		    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
-						   ap->alen, &ralen, atype,
-						   ap->wasdel, 1, &rtb)))
-			return error;
-		ap->rval = rtb;
-		if (ap->rval != NULLFSBLOCK) {
-			ap->rval *= mp->m_sb.sb_rextsize;
-			ralen *= mp->m_sb.sb_rextsize;
-			ap->alen = ralen;
-			ap->ip->i_d.di_nblocks += ralen;
-			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-			if (ap->wasdel)
-				ap->ip->i_delayed_blks -= ralen;
+	tryagain = isaligned = 0;
+	args.tp = ap->tp;
+	args.mp = mp;
+	args.fsbno = ap->rval;
+	args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+	blen = 0;
+	if (nullfb) {
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.total = ap->total;
+		/*
+		 * Find the longest available space.
+		 * We're going to try for the whole allocation at once.
+		 */
+		startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+		notinit = 0;
+		down_read(&mp->m_peraglock);
+		while (blen < ap->alen) {
+			pag = &mp->m_perag[ag];
+			if (!pag->pagf_init &&
+			    (error = xfs_alloc_pagf_init(mp, args.tp,
+				    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
+				up_read(&mp->m_peraglock);
+				return error;
+			}
 			/*
-			 * Adjust the disk quota also. This was reserved
-			 * earlier.
+			 * See xfs_alloc_fix_freelist...
 			 */
-			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
-				ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
-						XFS_TRANS_DQ_RTBCOUNT,
-				(long) ralen);
-		} else
-			ap->alen = 0;
-#endif	/* __KERNEL__ */
+			if (pag->pagf_init) {
+				need = XFS_MIN_FREELIST_PAG(pag, mp);
+				delta = need > pag->pagf_flcount ?
+					need - pag->pagf_flcount : 0;
+				longest = (pag->pagf_longest > delta) ?
+					(pag->pagf_longest - delta) :
+					(pag->pagf_flcount > 0 ||
+					 pag->pagf_longest > 0);
+				if (blen < longest)
+					blen = longest;
+			} else
+				notinit = 1;
+			if (++ag == mp->m_sb.sb_agcount)
+				ag = 0;
+			if (ag == startag)
+				break;
+		}
+		up_read(&mp->m_peraglock);
+		/*
+		 * Since the above loop did a BUF_TRYLOCK, it is
+		 * possible that there is space for this request.
+		 */
+		if (notinit || blen < ap->minlen)
+			args.minlen = ap->minlen;
+		/*
+		 * If the best seen length is less than the request
+		 * length, use the best as the minimum.
+		 */
+		else if (blen < ap->alen)
+			args.minlen = blen;
+		/*
+		 * Otherwise we've seen an extent as big as alen,
+		 * use that as the minimum.
+		 */
+		else
+			args.minlen = ap->alen;
+	} else if (ap->low) {
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.total = args.minlen = ap->minlen;
+	} else {
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.total = ap->total;
+		args.minlen = ap->minlen;
+	}
+	if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+		    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
+		args.prod = ap->ip->i_d.di_extsize;
+		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
+			args.mod = (xfs_extlen_t)(args.prod - args.mod);
+	} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
+		args.prod = 1;
+		args.mod = 0;
+	} else {
+		args.prod = NBPP >> mp->m_sb.sb_blocklog;
+		if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
+			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	}
 	/*
-	 * Normal allocation, done through xfs_alloc_vextent.
+	 * If we are not low on available data blocks, and the
+	 * underlying logical volume manager is a stripe, and
+	 * the file offset is zero then try to allocate data
+	 * blocks on stripe unit boundary.
+	 * NOTE: ap->aeof is only set if the allocation length
+	 * is >= the stripe unit and the allocation offset is
+	 * at the end of file.
 	 */
-	else {
-		xfs_agnumber_t	ag;
-		xfs_alloc_arg_t	args;
-		xfs_extlen_t	blen;
-		xfs_extlen_t	delta;
-		int		isaligned;
-		xfs_extlen_t	longest;
-		xfs_extlen_t	need;
-		xfs_extlen_t	nextminlen=0;
-		int		notinit;
-		xfs_perag_t	*pag;
-		xfs_agnumber_t	startag;
-		int		tryagain;
-
-		tryagain = isaligned = 0;
-		args.tp = ap->tp;
-		args.mp = mp;
-		args.fsbno = ap->rval;
-		args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
-		blen = 0;
-		if (nullfb) {
-			args.type = XFS_ALLOCTYPE_START_BNO;
-			args.total = ap->total;
-			/*
-			 * Find the longest available space.
-			 * We're going to try for the whole allocation at once.
-			 */
-			startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
-			notinit = 0;
-			down_read(&mp->m_peraglock);
-			while (blen < ap->alen) {
-				pag = &mp->m_perag[ag];
-				if (!pag->pagf_init &&
-				    (error = xfs_alloc_pagf_init(mp, args.tp,
-					    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
-					up_read(&mp->m_peraglock);
-					return error;
-				}
-				/*
-				 * See xfs_alloc_fix_freelist...
-				 */
-				if (pag->pagf_init) {
-					need = XFS_MIN_FREELIST_PAG(pag, mp);
-					delta = need > pag->pagf_flcount ?
-						need - pag->pagf_flcount : 0;
-					longest = (pag->pagf_longest > delta) ?
-						(pag->pagf_longest - delta) :
-						(pag->pagf_flcount > 0 ||
-						 pag->pagf_longest > 0);
-					if (blen < longest)
-						blen = longest;
-				} else
-					notinit = 1;
-				if (++ag == mp->m_sb.sb_agcount)
-					ag = 0;
-				if (ag == startag)
-					break;
-			}
-			up_read(&mp->m_peraglock);
+	if (!ap->low && ap->aeof) {
+		if (!ap->off) {
+			args.alignment = mp->m_dalign;
+			atype = args.type;
+			isaligned = 1;
 			/*
-			 * Since the above loop did a BUF_TRYLOCK, it is
-			 * possible that there is space for this request.
+			 * Adjust for alignment
 			 */
-			if (notinit || blen < ap->minlen)
-				args.minlen = ap->minlen;
+			if (blen > args.alignment && blen <= ap->alen)
+				args.minlen = blen - args.alignment;
+			args.minalignslop = 0;
+		} else {
 			/*
-			 * If the best seen length is less than the request
-			 * length, use the best as the minimum.
+			 * First try an exact bno allocation.
+			 * If it fails then do a near or start bno
+			 * allocation with alignment turned on.
 			 */
-			else if (blen < ap->alen)
-				args.minlen = blen;
+			atype = args.type;
+			tryagain = 1;
+			args.type = XFS_ALLOCTYPE_THIS_BNO;
+			args.alignment = 1;
 			/*
-			 * Otherwise we've seen an extent as big as alen,
-			 * use that as the minimum.
+			 * Compute the minlen+alignment for the
+			 * next case.  Set slop so that the value
+			 * of minlen+alignment+slop doesn't go up
+			 * between the calls.
 			 */
+			if (blen > mp->m_dalign && blen <= ap->alen)
+				nextminlen = blen - mp->m_dalign;
 			else
-				args.minlen = ap->alen;
-		} else if (ap->low) {
-			args.type = XFS_ALLOCTYPE_FIRST_AG;
-			args.total = args.minlen = ap->minlen;
-		} else {
-			args.type = XFS_ALLOCTYPE_NEAR_BNO;
-			args.total = ap->total;
-			args.minlen = ap->minlen;
-		}
-		if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
-			    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
-			args.prod = ap->ip->i_d.di_extsize;
-			if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
-				args.mod = (xfs_extlen_t)(args.prod - args.mod);
-		} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
-			args.prod = 1;
-			args.mod = 0;
-		} else {
-			args.prod = NBPP >> mp->m_sb.sb_blocklog;
-			if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
-				args.mod = (xfs_extlen_t)(args.prod - args.mod);
+				nextminlen = args.minlen;
+			if (nextminlen + mp->m_dalign > args.minlen + 1)
+				args.minalignslop =
+					nextminlen + mp->m_dalign -
+					args.minlen - 1;
+			else
+				args.minalignslop = 0;
 		}
+	} else {
+		args.alignment = 1;
+		args.minalignslop = 0;
+	}
+	args.minleft = ap->minleft;
+	args.wasdel = ap->wasdel;
+	args.isfl = 0;
+	args.userdata = ap->userdata;
+	if ((error = xfs_alloc_vextent(&args)))
+		return error;
+	if (tryagain && args.fsbno == NULLFSBLOCK) {
 		/*
-		 * If we are not low on available data blocks, and the
-		 * underlying logical volume manager is a stripe, and
-		 * the file offset is zero then try to allocate data
-		 * blocks on stripe unit boundary.
-		 * NOTE: ap->aeof is only set if the allocation length
-		 * is >= the stripe unit and the allocation offset is
-		 * at the end of file.
+		 * Exact allocation failed. Now try with alignment
+		 * turned on.
 		 */
-		if (!ap->low && ap->aeof) {
-			if (!ap->off) {
-				args.alignment = mp->m_dalign;
-				atype = args.type;
-				isaligned = 1;
-				/*
-				 * Adjust for alignment
-				 */
-				if (blen > args.alignment && blen <= ap->alen)
-					args.minlen = blen - args.alignment;
-				args.minalignslop = 0;
-			} else {
-				/*
-				 * First try an exact bno allocation.
-				 * If it fails then do a near or start bno
-				 * allocation with alignment turned on.
-				 */
-				atype = args.type;
-				tryagain = 1;
-				args.type = XFS_ALLOCTYPE_THIS_BNO;
-				args.alignment = 1;
-				/*
-				 * Compute the minlen+alignment for the
-				 * next case.  Set slop so that the value
-				 * of minlen+alignment+slop doesn't go up
-				 * between the calls.
-				 */
-				if (blen > mp->m_dalign && blen <= ap->alen)
-					nextminlen = blen - mp->m_dalign;
-				else
-					nextminlen = args.minlen;
-				if (nextminlen + mp->m_dalign > args.minlen + 1)
-					args.minalignslop =
-						nextminlen + mp->m_dalign -
-						args.minlen - 1;
-				else
-					args.minalignslop = 0;
-			}
-		} else {
-			args.alignment = 1;
-			args.minalignslop = 0;
-		}
-		args.minleft = ap->minleft;
-		args.wasdel = ap->wasdel;
-		args.isfl = 0;
-		args.userdata = ap->userdata;
+		args.type = atype;
+		args.fsbno = ap->rval;
+		args.alignment = mp->m_dalign;
+		args.minlen = nextminlen;
+		args.minalignslop = 0;
+		isaligned = 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+	if (isaligned && args.fsbno == NULLFSBLOCK) {
+		/*
+		 * allocation failed, so turn off alignment and
+		 * try again.
+		 */
+		args.type = atype;
+		args.fsbno = ap->rval;
+		args.alignment = 0;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+	if (args.fsbno == NULLFSBLOCK && nullfb &&
+	    args.minlen > ap->minlen) {
+		args.minlen = ap->minlen;
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.fsbno = ap->rval;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
-		if (tryagain && args.fsbno == NULLFSBLOCK) {
-			/*
-			 * Exact allocation failed. Now try with alignment
-			 * turned on.
-			 */
-			args.type = atype;
-			args.fsbno = ap->rval;
-			args.alignment = mp->m_dalign;
-			args.minlen = nextminlen;
-			args.minalignslop = 0;
-			isaligned = 1;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (isaligned && args.fsbno == NULLFSBLOCK) {
-			/*
-			 * allocation failed, so turn off alignment and
-			 * try again.
-			 */
-			args.type = atype;
-			args.fsbno = ap->rval;
-			args.alignment = 0;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (args.fsbno == NULLFSBLOCK && nullfb &&
-		    args.minlen > ap->minlen) {
-			args.minlen = ap->minlen;
-			args.type = XFS_ALLOCTYPE_START_BNO;
-			args.fsbno = ap->rval;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-		}
-		if (args.fsbno == NULLFSBLOCK && nullfb) {
-			args.fsbno = 0;
-			args.type = XFS_ALLOCTYPE_FIRST_AG;
-			args.total = ap->minlen;
-			args.minleft = 0;
-			if ((error = xfs_alloc_vextent(&args)))
-				return error;
-			ap->low = 1;
-		}
-		if (args.fsbno != NULLFSBLOCK) {
-			ap->firstblock = ap->rval = args.fsbno;
-			ASSERT(nullfb || fb_agno == args.agno ||
-			       (ap->low && fb_agno < args.agno));
-			ap->alen = args.len;
-			ap->ip->i_d.di_nblocks += args.len;
-			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-			if (ap->wasdel)
-				ap->ip->i_delayed_blks -= args.len;
-			/*
-			 * Adjust the disk quota also. This was reserved
-			 * earlier.
-			 */
-			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
-				ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
-						XFS_TRANS_DQ_BCOUNT,
-				(long) args.len);
-		} else {
-			ap->rval = NULLFSBLOCK;
-			ap->alen = 0;
-		}
+	}
+	if (args.fsbno == NULLFSBLOCK && nullfb) {
+		args.fsbno = 0;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.total = ap->minlen;
+		args.minleft = 0;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+		ap->low = 1;
+	}
+	if (args.fsbno != NULLFSBLOCK) {
+		ap->firstblock = ap->rval = args.fsbno;
+		ASSERT(nullfb || fb_agno == args.agno ||
+		       (ap->low && fb_agno < args.agno));
+		ap->alen = args.len;
+		ap->ip->i_d.di_nblocks += args.len;
+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+		if (ap->wasdel)
+			ap->ip->i_delayed_blks -= args.len;
+		/*
+		 * Adjust the disk quota also. This was reserved
+		 * earlier.
+		 */
+		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
+					XFS_TRANS_DQ_BCOUNT,
+			(long) args.len);
+	} else {
+		ap->rval = NULLFSBLOCK;
+		ap->alen = 0;
 	}
 	return 0;
-#undef	ISVALID
+}
+
+/*
+ * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
+ * It figures out where to ask the underlying allocator to put the new extent.
+ */
+STATIC int
+xfs_bmap_alloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	if ((ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata)
+		return xfs_bmap_rtalloc(ap);
+	return xfs_bmap_btalloc(ap);
 }
 
 /*
-- 
cgit v1.2.3


From b8b0f546569871b365a5e3b3cc3f667af658dd49 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:47:32 +1100
Subject: [XFS] Remove a couple of no-longer-used macros/types from XFS.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25377a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_mount.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 29cfcf0c11b..ebd73960e9d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -505,11 +505,6 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 #define XFS_CORRUPT_INCORE	0x8	/* Corrupt in-memory data structures */
 #define XFS_SHUTDOWN_REMOTE_REQ 0x10	/* Shutdown came from remote cell */
 
-/*
- * xflags for xfs_syncsub
- */
-#define XFS_XSYNC_RELOC		0x01
-
 /*
  * Flags for xfs_mountfs
  */
-- 
cgit v1.2.3


From e4c573bb6a8477a26b3d5471fd116d258760a13a Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 13:54:26 +1100
Subject: [XFS] Switch over from linvfs names for address space ops for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25378a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c  | 63 +++++++++++++++++++++-----------------------
 fs/xfs/linux-2.6/xfs_aops.h  |  4 +--
 fs/xfs/linux-2.6/xfs_iops.c  |  2 +-
 fs/xfs/linux-2.6/xfs_super.c |  4 +--
 4 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4b6bfdb8251..2b610c7ba32 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -43,7 +43,6 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
-
 STATIC void
 xfs_count_page_state(
 	struct page		*page,
@@ -68,8 +67,6 @@ xfs_count_page_state(
 	} while ((bh = bh->b_this_page) != head);
 }
 
-
-
 #if defined(XFS_RW_TRACE)
 void
 xfs_page_trace(
@@ -1095,7 +1092,7 @@ error:
  */
 
 STATIC int
-linvfs_writepage(
+xfs_vm_writepage(
 	struct page		*page,
 	struct writeback_control *wbc)
 {
@@ -1181,7 +1178,7 @@ out_unlock:
  *    free them and we should come back later via writepage.
  */
 STATIC int
-linvfs_release_page(
+xfs_vm_release_page(
 	struct page		*page,
 	gfp_t			gfp_mask)
 {
@@ -1223,7 +1220,7 @@ free_buffers:
 }
 
 STATIC int
-__linvfs_get_block(
+__xfs_get_block(
 	struct inode		*inode,
 	sector_t		iblock,
 	unsigned long		blocks,
@@ -1304,30 +1301,30 @@ __linvfs_get_block(
 }
 
 int
-linvfs_get_block(
+xfs_get_block(
 	struct inode		*inode,
 	sector_t		iblock,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __linvfs_get_block(inode, iblock, 0, bh_result,
+	return __xfs_get_block(inode, iblock, 0, bh_result,
 					create, 0, BMAPI_WRITE);
 }
 
 STATIC int
-linvfs_get_blocks_direct(
+xfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
 	unsigned long		max_blocks,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+	return __xfs_get_block(inode, iblock, max_blocks, bh_result,
 					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
 STATIC void
-linvfs_end_io_direct(
+xfs_end_io_direct(
 	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
@@ -1365,7 +1362,7 @@ linvfs_end_io_direct(
 }
 
 STATIC ssize_t
-linvfs_direct_IO(
+xfs_vm_direct_IO(
 	int			rw,
 	struct kiocb		*iocb,
 	const struct iovec	*iov,
@@ -1389,8 +1386,8 @@ linvfs_direct_IO(
 	ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
 		iomap.iomap_target->bt_bdev,
 		iov, offset, nr_segs,
-		linvfs_get_blocks_direct,
-		linvfs_end_io_direct);
+		xfs_get_blocks_direct,
+		xfs_end_io_direct);
 
 	if (unlikely(ret <= 0 && iocb->private))
 		xfs_destroy_ioend(iocb->private);
@@ -1398,17 +1395,17 @@ linvfs_direct_IO(
 }
 
 STATIC int
-linvfs_prepare_write(
+xfs_vm_prepare_write(
 	struct file		*file,
 	struct page		*page,
 	unsigned int		from,
 	unsigned int		to)
 {
-	return block_prepare_write(page, from, to, linvfs_get_block);
+	return block_prepare_write(page, from, to, xfs_get_block);
 }
 
 STATIC sector_t
-linvfs_bmap(
+xfs_vm_bmap(
 	struct address_space	*mapping,
 	sector_t		block)
 {
@@ -1416,34 +1413,34 @@ linvfs_bmap(
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 	int			error;
 
-	vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 
 	VOP_RWLOCK(vp, VRWLOCK_READ);
 	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
 	VOP_RWUNLOCK(vp, VRWLOCK_READ);
-	return generic_block_bmap(mapping, block, linvfs_get_block);
+	return generic_block_bmap(mapping, block, xfs_get_block);
 }
 
 STATIC int
-linvfs_readpage(
+xfs_vm_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, linvfs_get_block);
+	return mpage_readpage(page, xfs_get_block);
 }
 
 STATIC int
-linvfs_readpages(
+xfs_vm_readpages(
 	struct file		*unused,
 	struct address_space	*mapping,
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
 }
 
 STATIC int
-linvfs_invalidate_page(
+xfs_vm_invalidate_page(
 	struct page		*page,
 	unsigned long		offset)
 {
@@ -1452,16 +1449,16 @@ linvfs_invalidate_page(
 	return block_invalidatepage(page, offset);
 }
 
-struct address_space_operations linvfs_aops = {
-	.readpage		= linvfs_readpage,
-	.readpages		= linvfs_readpages,
-	.writepage		= linvfs_writepage,
+struct address_space_operations xfs_address_space_operations = {
+	.readpage		= xfs_vm_readpage,
+	.readpages		= xfs_vm_readpages,
+	.writepage		= xfs_vm_writepage,
 	.sync_page		= block_sync_page,
-	.releasepage		= linvfs_release_page,
-	.invalidatepage		= linvfs_invalidate_page,
-	.prepare_write		= linvfs_prepare_write,
+	.releasepage		= xfs_vm_release_page,
+	.invalidatepage		= xfs_vm_invalidate_page,
+	.prepare_write		= xfs_vm_prepare_write,
 	.commit_write		= generic_commit_write,
-	.bmap			= linvfs_bmap,
-	.direct_IO		= linvfs_direct_IO,
+	.bmap			= xfs_vm_bmap,
+	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
 };
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 55339dd5a30..795699f121d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -40,7 +40,7 @@ typedef struct xfs_ioend {
 	struct work_struct	io_work;	/* xfsdatad work queue */
 } xfs_ioend_t;
 
-extern struct address_space_operations linvfs_aops;
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index b1219195c6e..52b02bd5c28 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -739,7 +739,7 @@ STATIC void
 linvfs_truncate(
 	struct inode	*inode)
 {
-	block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4d8613f65c2..fb76c53f9c4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -129,7 +129,7 @@ xfs_set_inodeops(
 	case S_IFREG:
 		inode->i_op = &linvfs_file_inode_operations;
 		inode->i_fop = &linvfs_file_operations;
-		inode->i_mapping->a_ops = &linvfs_aops;
+		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		inode->i_op = &linvfs_dir_inode_operations;
@@ -138,7 +138,7 @@ xfs_set_inodeops(
 	case S_IFLNK:
 		inode->i_op = &linvfs_symlink_inode_operations;
 		if (inode->i_blocks)
-			inode->i_mapping->a_ops = &linvfs_aops;
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	default:
 		inode->i_op = &linvfs_file_inode_operations;
-- 
cgit v1.2.3


From 3562fd45658fbb696f4546479332d5249c3ad90f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:00:35 +1100
Subject: [XFS] Switch over from linvfs names for file operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25379a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c  | 140 +++++++++++++++++++++----------------------
 fs/xfs/linux-2.6/xfs_ioctl.c |   2 +-
 fs/xfs/linux-2.6/xfs_iops.h  |   6 +-
 fs/xfs/linux-2.6/xfs_super.c |   4 +-
 4 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ce8fe40e162..b050e407942 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,13 +43,13 @@
 #include <linux/dcache.h>
 #include <linux/smp_lock.h>
 
-static struct vm_operations_struct linvfs_file_vm_ops;
+static struct vm_operations_struct xfs_file_vm_ops;
 #ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops;
+static struct vm_operations_struct xfs_dmapi_file_vm_ops;
 #endif
 
 STATIC inline ssize_t
-__linvfs_read(
+__xfs_file_read(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	int			ioflags,
@@ -71,28 +71,28 @@ __linvfs_read(
 
 
 STATIC ssize_t
-linvfs_aio_read(
+xfs_file_aio_read(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
 }
 
 STATIC ssize_t
-linvfs_aio_read_invis(
+xfs_file_aio_read_invis(
 	struct kiocb		*iocb,
 	char			__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_write(
+__xfs_file_write(
 	struct kiocb	*iocb,
 	const char	__user *buf,
 	int		ioflags,
@@ -115,28 +115,28 @@ __linvfs_write(
 
 
 STATIC ssize_t
-linvfs_aio_write(
+xfs_file_aio_write(
 	struct kiocb		*iocb,
 	const char		__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
+	return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
 }
 
 STATIC ssize_t
-linvfs_aio_write_invis(
+xfs_file_aio_write_invis(
 	struct kiocb		*iocb,
 	const char		__user *buf,
 	size_t			count,
 	loff_t			pos)
 {
-	return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+	return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_readv(
+__xfs_file_readv(
 	struct file		*file,
 	const struct iovec 	*iov,
 	int			ioflags,
@@ -165,28 +165,28 @@ __linvfs_readv(
 }
 
 STATIC ssize_t
-linvfs_readv(
+xfs_file_readv(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+	return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_readv_invis(
+xfs_file_readv_invis(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+	return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
 }
 
 
 STATIC inline ssize_t
-__linvfs_writev(
+__xfs_file_writev(
 	struct file		*file,
 	const struct iovec 	*iov,
 	int			ioflags,
@@ -216,27 +216,27 @@ __linvfs_writev(
 
 
 STATIC ssize_t
-linvfs_writev(
+xfs_file_writev(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+	return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_writev_invis(
+xfs_file_writev_invis(
 	struct file		*file,
 	const struct iovec 	*iov,
 	unsigned long		nr_segs,
 	loff_t			*ppos)
 {
-	return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+	return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
 }
 
 STATIC ssize_t
-linvfs_sendfile(
+xfs_file_sendfile(
 	struct file		*filp,
 	loff_t			*ppos,
 	size_t			count,
@@ -252,7 +252,7 @@ linvfs_sendfile(
 
 
 STATIC int
-linvfs_open(
+xfs_file_open(
 	struct inode	*inode,
 	struct file	*filp)
 {
@@ -269,7 +269,7 @@ linvfs_open(
 
 
 STATIC int
-linvfs_release(
+xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
 {
@@ -283,7 +283,7 @@ linvfs_release(
 
 
 STATIC int
-linvfs_fsync(
+xfs_file_fsync(
 	struct file	*filp,
 	struct dentry	*dentry,
 	int		datasync)
@@ -302,7 +302,7 @@ linvfs_fsync(
 }
 
 /*
- * linvfs_readdir maps to VOP_READDIR().
+ * xfs_file_readdir maps to VOP_READDIR().
  * We need to build a uio, cred, ...
  */
 
@@ -311,7 +311,7 @@ linvfs_fsync(
 #ifdef CONFIG_XFS_DMAPI
 
 STATIC struct page *
-linvfs_filemap_nopage(
+xfs_vm_nopage(
 	struct vm_area_struct	*area,
 	unsigned long		address,
 	int			*type)
@@ -334,7 +334,7 @@ linvfs_filemap_nopage(
 
 
 STATIC int
-linvfs_readdir(
+xfs_file_readdir(
 	struct file	*filp,
 	void		*dirent,
 	filldir_t	filldir)
@@ -414,7 +414,7 @@ done:
 
 
 STATIC int
-linvfs_file_mmap(
+xfs_file_mmap(
 	struct file	*filp,
 	struct vm_area_struct *vma)
 {
@@ -423,11 +423,11 @@ linvfs_file_mmap(
 	vattr_t		*vattr;
 	int		error;
 
-	vma->vm_ops = &linvfs_file_vm_ops;
+	vma->vm_ops = &xfs_file_vm_ops;
 
 #ifdef CONFIG_XFS_DMAPI
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-		vma->vm_ops = &linvfs_dmapi_file_vm_ops;
+		vma->vm_ops = &xfs_dmapi_file_vm_ops;
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
@@ -444,7 +444,7 @@ linvfs_file_mmap(
 
 
 STATIC long
-linvfs_ioctl(
+xfs_file_ioctl(
 	struct file	*filp,
 	unsigned int	cmd,
 	unsigned long	arg)
@@ -466,7 +466,7 @@ linvfs_ioctl(
 }
 
 STATIC long
-linvfs_ioctl_invis(
+xfs_file_ioctl_invis(
 	struct file	*filp,
 	unsigned int	cmd,
 	unsigned long	arg)
@@ -491,7 +491,7 @@ linvfs_ioctl_invis(
 #ifdef CONFIG_XFS_DMAPI
 #ifdef HAVE_VMOP_MPROTECT
 STATIC int
-linvfs_mprotect(
+xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
@@ -518,7 +518,7 @@ linvfs_mprotect(
  * it back online.
  */
 STATIC int
-linvfs_open_exec(
+xfs_file_open_exec(
 	struct inode	*inode)
 {
 	vnode_t		*vp = LINVFS_GET_VP(inode);
@@ -542,69 +542,69 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-struct file_operations linvfs_file_operations = {
+struct file_operations xfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.readv		= linvfs_readv,
-	.writev		= linvfs_writev,
-	.aio_read	= linvfs_aio_read,
-	.aio_write	= linvfs_aio_write,
-	.sendfile	= linvfs_sendfile,
-	.unlocked_ioctl	= linvfs_ioctl,
+	.readv		= xfs_file_readv,
+	.writev		= xfs_file_writev,
+	.aio_read	= xfs_file_aio_read,
+	.aio_write	= xfs_file_aio_write,
+	.sendfile	= xfs_file_sendfile,
+	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_ioctl,
+	.compat_ioctl	= xfs_file_compat_ioctl,
 #endif
-	.mmap		= linvfs_file_mmap,
-	.open		= linvfs_open,
-	.release	= linvfs_release,
-	.fsync		= linvfs_fsync,
+	.mmap		= xfs_file_mmap,
+	.open		= xfs_file_open,
+	.release	= xfs_file_release,
+	.fsync		= xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
-	.open_exec	= linvfs_open_exec,
+	.open_exec	= xfs_file_open_exec,
 #endif
 };
 
-struct file_operations linvfs_invis_file_operations = {
+struct file_operations xfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.readv		= linvfs_readv_invis,
-	.writev		= linvfs_writev_invis,
-	.aio_read	= linvfs_aio_read_invis,
-	.aio_write	= linvfs_aio_write_invis,
-	.sendfile	= linvfs_sendfile,
-	.unlocked_ioctl	= linvfs_ioctl_invis,
+	.readv		= xfs_file_readv_invis,
+	.writev		= xfs_file_writev_invis,
+	.aio_read	= xfs_file_aio_read_invis,
+	.aio_write	= xfs_file_aio_write_invis,
+	.sendfile	= xfs_file_sendfile,
+	.unlocked_ioctl	= xfs_file_ioctl_invis,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_invis_ioctl,
+	.compat_ioctl	= xfs_file_compat_invis_ioctl,
 #endif
-	.mmap		= linvfs_file_mmap,
-	.open		= linvfs_open,
-	.release	= linvfs_release,
-	.fsync		= linvfs_fsync,
+	.mmap		= xfs_file_mmap,
+	.open		= xfs_file_open,
+	.release	= xfs_file_release,
+	.fsync		= xfs_file_fsync,
 };
 
 
-struct file_operations linvfs_dir_operations = {
+struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
-	.readdir	= linvfs_readdir,
-	.unlocked_ioctl	= linvfs_ioctl,
+	.readdir	= xfs_file_readdir,
+	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= linvfs_compat_ioctl,
+	.compat_ioctl	= xfs_file_compat_ioctl,
 #endif
-	.fsync		= linvfs_fsync,
+	.fsync		= xfs_file_fsync,
 };
 
-static struct vm_operations_struct linvfs_file_vm_ops = {
+static struct vm_operations_struct xfs_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
 };
 
 #ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
-	.nopage		= linvfs_filemap_nopage,
+static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
+	.nopage		= xfs_vm_nopage,
 	.populate	= filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
-	.mprotect	= linvfs_mprotect,
+	.mprotect	= xfs_vm_mprotect,
 #endif
 };
 #endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f182721ec9a..e435ad17419 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -344,7 +344,7 @@ xfs_open_by_handle(
 		return -XFS_ERROR(-PTR_ERR(filp));
 	}
 	if (inode->i_mode & S_IFREG)
-		filp->f_op = &linvfs_invis_file_operations;
+		filp->f_op = &xfs_invis_file_operations;
 
 	fd_install(new_fd, filp);
 	return new_fd;
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 6899a6b4a50..8b5275e4b83 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,9 +22,9 @@ extern struct inode_operations linvfs_file_inode_operations;
 extern struct inode_operations linvfs_dir_inode_operations;
 extern struct inode_operations linvfs_symlink_inode_operations;
 
-extern struct file_operations linvfs_file_operations;
-extern struct file_operations linvfs_invis_file_operations;
-extern struct file_operations linvfs_dir_operations;
+extern struct file_operations xfs_file_operations;
+extern struct file_operations xfs_dir_file_operations;
+extern struct file_operations xfs_invis_file_operations;
 
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fb76c53f9c4..9eac4b49a19 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -128,12 +128,12 @@ xfs_set_inodeops(
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &linvfs_file_inode_operations;
-		inode->i_fop = &linvfs_file_operations;
+		inode->i_fop = &xfs_file_operations;
 		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		inode->i_op = &linvfs_dir_inode_operations;
-		inode->i_fop = &linvfs_dir_operations;
+		inode->i_fop = &xfs_dir_file_operations;
 		break;
 	case S_IFLNK:
 		inode->i_op = &linvfs_symlink_inode_operations;
-- 
cgit v1.2.3


From 416c6d5bcfe8ac2c65a955be62bc42d8b8d5b014 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:00:51 +1100
Subject: [XFS] Switch over from linvfs names for inode operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25381a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c  | 160 +++++++++++++++++++++----------------------
 fs/xfs/linux-2.6/xfs_iops.h  |   6 +-
 fs/xfs/linux-2.6/xfs_super.c |   8 +--
 3 files changed, 87 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 52b02bd5c28..93b9e6e43f2 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -198,7 +198,7 @@ xfs_ichgtime_fast(
  * Pull the link count and size up from the xfs inode to the linux inode
  */
 STATIC void
-__linvfs_validate_fields(
+xfs_validate_fields(
 	struct inode	*ip,
 	struct vattr	*vattr)
 {
@@ -224,7 +224,7 @@ __linvfs_validate_fields(
  * inode, of course, such that log replay can't cause these to be lost).
  */
 STATIC int
-__linvfs_init_security(
+xfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
@@ -257,13 +257,13 @@ __linvfs_init_security(
  * XXX(hch):  nfsd is broken, better fix it instead.
  */
 STATIC inline int
-__linvfs_has_fs_struct(struct task_struct *task)
+xfs_has_fs_struct(struct task_struct *task)
 {
 	return (task->fs != init_task.fs);
 }
 
 STATIC inline void
-__linvfs_cleanup_inode(
+xfs_cleanup_inode(
 	vnode_t		*dvp,
 	vnode_t		*vp,
 	struct dentry	*dentry,
@@ -274,7 +274,7 @@ __linvfs_cleanup_inode(
 
 	/* Oh, the horror.
 	 * If we can't add the ACL or we fail in
-	 * linvfs_init_security we must back out.
+	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
 	teardown.d_inode = LINVFS_GET_IP(vp);
@@ -288,7 +288,7 @@ __linvfs_cleanup_inode(
 }
 
 STATIC int
-linvfs_mknod(
+xfs_vn_mknod(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode,
@@ -323,7 +323,7 @@ linvfs_mknod(
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && __linvfs_has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
 	memset(vattr, 0, sizeof(*vattr));
@@ -347,9 +347,9 @@ linvfs_mknod(
 	}
 
 	if (unlikely(!error)) {
-		error = __linvfs_init_security(vp, dir);
+		error = xfs_init_security(vp, dir);
 		if (error)
-			__linvfs_cleanup_inode(dvp, vp, dentry, mode);
+			xfs_cleanup_inode(dvp, vp, dentry, mode);
 	}
 
 	if (unlikely(default_acl)) {
@@ -358,7 +358,7 @@ linvfs_mknod(
 			if (!error)
 				VMODIFY(vp);
 			else
-				__linvfs_cleanup_inode(dvp, vp, dentry, mode);
+				xfs_cleanup_inode(dvp, vp, dentry, mode);
 		}
 		_ACL_FREE(default_acl);
 	}
@@ -370,35 +370,35 @@ linvfs_mknod(
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			__linvfs_validate_fields(ip, vattr);
+			xfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
-		__linvfs_validate_fields(dir, vattr);
+		xfs_validate_fields(dir, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_create(
+xfs_vn_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode,
 	struct nameidata *nd)
 {
-	return linvfs_mknod(dir, dentry, mode, 0);
+	return xfs_vn_mknod(dir, dentry, mode, 0);
 }
 
 STATIC int
-linvfs_mkdir(
+xfs_vn_mkdir(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	int		mode)
 {
-	return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 }
 
 STATIC struct dentry *
-linvfs_lookup(
+xfs_vn_lookup(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	struct nameidata *nd)
@@ -421,7 +421,7 @@ linvfs_lookup(
 }
 
 STATIC int
-linvfs_link(
+xfs_vn_link(
 	struct dentry	*old_dentry,
 	struct inode	*dir,
 	struct dentry	*dentry)
@@ -447,7 +447,7 @@ linvfs_link(
 	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		__linvfs_validate_fields(ip, vattr);
+		xfs_validate_fields(ip, vattr);
 		d_instantiate(dentry, ip);
 	}
 	kfree(vattr);
@@ -455,7 +455,7 @@ linvfs_link(
 }
 
 STATIC int
-linvfs_unlink(
+xfs_vn_unlink(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
@@ -473,15 +473,15 @@ linvfs_unlink(
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		__linvfs_validate_fields(dir, vattr);	/* size needs update */
-		__linvfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, vattr);	/* size needs update */
+		xfs_validate_fields(inode, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_symlink(
+xfs_vn_symlink(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	const char	*symname)
@@ -507,12 +507,12 @@ linvfs_symlink(
 	error = 0;
 	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
-		error = __linvfs_init_security(cvp, dir);
+		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			__linvfs_validate_fields(dir, vattr);
-			__linvfs_validate_fields(ip, vattr);
+			xfs_validate_fields(dir, vattr);
+			xfs_validate_fields(ip, vattr);
 		}
 	}
 	kfree(vattr);
@@ -520,7 +520,7 @@ linvfs_symlink(
 }
 
 STATIC int
-linvfs_rmdir(
+xfs_vn_rmdir(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
@@ -535,15 +535,15 @@ linvfs_rmdir(
 
 	VOP_RMDIR(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		__linvfs_validate_fields(inode, vattr);
-		__linvfs_validate_fields(dir, vattr);
+		xfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, vattr);
 	}
 	kfree(vattr);
 	return -error;
 }
 
 STATIC int
-linvfs_rename(
+xfs_vn_rename(
 	struct inode	*odir,
 	struct dentry	*odentry,
 	struct inode	*ndir,
@@ -565,10 +565,10 @@ linvfs_rename(
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
 		if (new_inode)
-			__linvfs_validate_fields(new_inode, vattr);
-		__linvfs_validate_fields(odir, vattr);
+			xfs_validate_fields(new_inode, vattr);
+		xfs_validate_fields(odir, vattr);
 		if (ndir != odir)
-			__linvfs_validate_fields(ndir, vattr);
+			xfs_validate_fields(ndir, vattr);
 	}
 	kfree(vattr);
 	return -error;
@@ -580,7 +580,7 @@ linvfs_rename(
  * uio is kmalloced for this reason...
  */
 STATIC void *
-linvfs_follow_link(
+xfs_vn_follow_link(
 	struct dentry		*dentry,
 	struct nameidata	*nd)
 {
@@ -631,7 +631,7 @@ linvfs_follow_link(
 }
 
 STATIC void
-linvfs_put_link(
+xfs_vn_put_link(
 	struct dentry	*dentry,
 	struct nameidata *nd,
 	void		*p)
@@ -644,7 +644,7 @@ linvfs_put_link(
 
 #ifdef CONFIG_XFS_POSIX_ACL
 STATIC int
-linvfs_permission(
+xfs_vn_permission(
 	struct inode	*inode,
 	int		mode,
 	struct nameidata *nd)
@@ -657,11 +657,11 @@ linvfs_permission(
 	return -error;
 }
 #else
-#define linvfs_permission NULL
+#define xfs_vn_permission NULL
 #endif
 
 STATIC int
-linvfs_getattr(
+xfs_vn_getattr(
 	struct vfsmount	*mnt,
 	struct dentry	*dentry,
 	struct kstat	*stat)
@@ -678,7 +678,7 @@ linvfs_getattr(
 }
 
 STATIC int
-linvfs_setattr(
+xfs_vn_setattr(
 	struct dentry	*dentry,
 	struct iattr	*attr)
 {
@@ -736,14 +736,14 @@ linvfs_setattr(
 }
 
 STATIC void
-linvfs_truncate(
+xfs_vn_truncate(
 	struct inode	*inode)
 {
 	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
 }
 
 STATIC int
-linvfs_setxattr(
+xfs_vn_setxattr(
 	struct dentry	*dentry,
 	const char	*name,
 	const void	*data,
@@ -774,7 +774,7 @@ linvfs_setxattr(
 }
 
 STATIC ssize_t
-linvfs_getxattr(
+xfs_vn_getxattr(
 	struct dentry	*dentry,
 	const char	*name,
 	void		*data,
@@ -804,7 +804,7 @@ linvfs_getxattr(
 }
 
 STATIC ssize_t
-linvfs_listxattr(
+xfs_vn_listxattr(
 	struct dentry		*dentry,
 	char			*data,
 	size_t			size)
@@ -824,7 +824,7 @@ linvfs_listxattr(
 }
 
 STATIC int
-linvfs_removexattr(
+xfs_vn_removexattr(
 	struct dentry	*dentry,
 	const char	*name)
 {
@@ -846,45 +846,45 @@ linvfs_removexattr(
 }
 
 
-struct inode_operations linvfs_file_inode_operations = {
-	.permission		= linvfs_permission,
-	.truncate		= linvfs_truncate,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+struct inode_operations xfs_inode_operations = {
+	.permission		= xfs_vn_permission,
+	.truncate		= xfs_vn_truncate,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
 
-struct inode_operations linvfs_dir_inode_operations = {
-	.create			= linvfs_create,
-	.lookup			= linvfs_lookup,
-	.link			= linvfs_link,
-	.unlink			= linvfs_unlink,
-	.symlink		= linvfs_symlink,
-	.mkdir			= linvfs_mkdir,
-	.rmdir			= linvfs_rmdir,
-	.mknod			= linvfs_mknod,
-	.rename			= linvfs_rename,
-	.permission		= linvfs_permission,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+struct inode_operations xfs_dir_inode_operations = {
+	.create			= xfs_vn_create,
+	.lookup			= xfs_vn_lookup,
+	.link			= xfs_vn_link,
+	.unlink			= xfs_vn_unlink,
+	.symlink		= xfs_vn_symlink,
+	.mkdir			= xfs_vn_mkdir,
+	.rmdir			= xfs_vn_rmdir,
+	.mknod			= xfs_vn_mknod,
+	.rename			= xfs_vn_rename,
+	.permission		= xfs_vn_permission,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
 
-struct inode_operations linvfs_symlink_inode_operations = {
+struct inode_operations xfs_symlink_inode_operations = {
 	.readlink		= generic_readlink,
-	.follow_link		= linvfs_follow_link,
-	.put_link		= linvfs_put_link,
-	.permission		= linvfs_permission,
-	.getattr		= linvfs_getattr,
-	.setattr		= linvfs_setattr,
-	.setxattr		= linvfs_setxattr,
-	.getxattr		= linvfs_getxattr,
-	.listxattr		= linvfs_listxattr,
-	.removexattr		= linvfs_removexattr,
+	.follow_link		= xfs_vn_follow_link,
+	.put_link		= xfs_vn_put_link,
+	.permission		= xfs_vn_permission,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
 };
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b5275e4b83..a8417d7af5f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,9 +18,9 @@
 #ifndef __XFS_IOPS_H__
 #define __XFS_IOPS_H__
 
-extern struct inode_operations linvfs_file_inode_operations;
-extern struct inode_operations linvfs_dir_inode_operations;
-extern struct inode_operations linvfs_symlink_inode_operations;
+extern struct inode_operations xfs_inode_operations;
+extern struct inode_operations xfs_dir_inode_operations;
+extern struct inode_operations xfs_symlink_inode_operations;
 
 extern struct file_operations xfs_file_operations;
 extern struct file_operations xfs_dir_file_operations;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9eac4b49a19..cdbaea9e857 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -127,21 +127,21 @@ xfs_set_inodeops(
 {
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
-		inode->i_op = &linvfs_file_inode_operations;
+		inode->i_op = &xfs_inode_operations;
 		inode->i_fop = &xfs_file_operations;
 		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
-		inode->i_op = &linvfs_dir_inode_operations;
+		inode->i_op = &xfs_dir_inode_operations;
 		inode->i_fop = &xfs_dir_file_operations;
 		break;
 	case S_IFLNK:
-		inode->i_op = &linvfs_symlink_inode_operations;
+		inode->i_op = &xfs_symlink_inode_operations;
 		if (inode->i_blocks)
 			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	default:
-		inode->i_op = &linvfs_file_inode_operations;
+		inode->i_op = &xfs_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 		break;
 	}
-- 
cgit v1.2.3


From a50cd2692617cfb796140a62c0082bce0a7306c7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:06:18 +1100
Subject: [XFS] Switch over from linvfs names for sb/quotactl operations for
 consistent naming.

SGI-PV: 950556
SGI-Modid: xfs-linux-melb:xfs-kern:25382a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_export.c  |  18 +++----
 fs/xfs/linux-2.6/xfs_ioctl32.c |  10 ++--
 fs/xfs/linux-2.6/xfs_ioctl32.h |   4 +-
 fs/xfs/linux-2.6/xfs_super.c   | 104 ++++++++++++++++++++---------------------
 fs/xfs/linux-2.6/xfs_super.h   |   2 +-
 5 files changed, 69 insertions(+), 69 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 53ed9911236..391c1353cd4 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -39,7 +39,7 @@ STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
  */
 
 STATIC struct dentry *
-linvfs_decode_fh(
+xfs_fs_decode_fh(
 	struct super_block	*sb,
 	__u32			*fh,
 	int			fh_len,
@@ -85,7 +85,7 @@ linvfs_decode_fh(
 
 
 STATIC int
-linvfs_encode_fh(
+xfs_fs_encode_fh(
 	struct dentry		*dentry,
 	__u32			*fh,
 	int			*max_len,
@@ -132,7 +132,7 @@ linvfs_encode_fh(
 }
 
 STATIC struct dentry *
-linvfs_get_dentry(
+xfs_fs_get_dentry(
 	struct super_block	*sb,
 	void			*data)
 {
@@ -156,7 +156,7 @@ linvfs_get_dentry(
 }
 
 STATIC struct dentry *
-linvfs_get_parent(
+xfs_fs_get_parent(
 	struct dentry		*child)
 {
 	int			error;
@@ -177,9 +177,9 @@ linvfs_get_parent(
 	return parent;
 }
 
-struct export_operations linvfs_export_ops = {
-	.decode_fh		= linvfs_decode_fh,
-	.encode_fh		= linvfs_encode_fh,
-	.get_parent		= linvfs_get_parent,
-	.get_dentry		= linvfs_get_dentry,
+struct export_operations xfs_export_operations = {
+	.decode_fh		= xfs_fs_decode_fh,
+	.encode_fh		= xfs_fs_encode_fh,
+	.get_parent		= xfs_fs_get_parent,
+	.get_dentry		= xfs_fs_get_dentry,
 };
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a7c9ba1a9f7..e9da0bde36a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -107,7 +107,7 @@ xfs_ioctl32_bulkstat(
 #endif
 
 STATIC long
-__linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 {
 	int		error;
 	struct		inode *inode = f->f_dentry->d_inode;
@@ -196,19 +196,19 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 }
 
 long
-linvfs_compat_ioctl(
+xfs_file_compat_ioctl(
 	struct file		*f,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return __linvfs_compat_ioctl(0, f, cmd, arg);
+	return xfs_compat_ioctl(0, f, cmd, arg);
 }
 
 long
-linvfs_compat_invis_ioctl(
+xfs_file_compat_invis_ioctl(
 	struct file		*f,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return __linvfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+	return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 011c273bec5..8bdb33ffc03 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,7 @@
 #ifndef __XFS_IOCTL32_H__
 #define __XFS_IOCTL32_H__
 
-extern long linvfs_compat_ioctl(struct file *, unsigned, unsigned long);
-extern long linvfs_compat_invis_ioctl(struct file *f, unsigned, unsigned long);
+extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
+extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned);
 
 #endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cdbaea9e857..352aa3d40c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -59,8 +59,8 @@
 #include <linux/writeback.h>
 #include <linux/kthread.h>
 
-STATIC struct quotactl_ops linvfs_qops;
-STATIC struct super_operations linvfs_sops;
+STATIC struct quotactl_ops xfs_quotactl_operations;
+STATIC struct super_operations xfs_super_operations;
 STATIC kmem_zone_t *xfs_vnode_zone;
 STATIC kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
@@ -332,7 +332,7 @@ xfs_blkdev_issue_flush(
 }
 
 STATIC struct inode *
-linvfs_alloc_inode(
+xfs_fs_alloc_inode(
 	struct super_block	*sb)
 {
 	vnode_t			*vp;
@@ -344,14 +344,14 @@ linvfs_alloc_inode(
 }
 
 STATIC void
-linvfs_destroy_inode(
+xfs_fs_destroy_inode(
 	struct inode		*inode)
 {
 	kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
 }
 
 STATIC void
-linvfs_inode_init_once(
+xfs_fs_inode_init_once(
 	void			*vnode,
 	kmem_zone_t		*zonep,
 	unsigned long		flags)
@@ -367,7 +367,7 @@ xfs_init_zones(void)
 	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
 					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
 					KM_ZONE_SPREAD,
-					linvfs_inode_init_once);
+					xfs_fs_inode_init_once);
 	if (!xfs_vnode_zone)
 		goto out;
 
@@ -405,7 +405,7 @@ xfs_destroy_zones(void)
  * since this is when the inode itself becomes flushable.
  */
 STATIC int
-linvfs_write_inode(
+xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
@@ -429,13 +429,13 @@ linvfs_write_inode(
 }
 
 STATIC void
-linvfs_clear_inode(
+xfs_fs_clear_inode(
 	struct inode		*inode)
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 	int			error, cache;
 
-	vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
@@ -608,7 +608,7 @@ xfssyncd(
 }
 
 STATIC int
-linvfs_start_syncd(
+xfs_fs_start_syncd(
 	vfs_t			*vfsp)
 {
 	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
@@ -620,20 +620,20 @@ linvfs_start_syncd(
 }
 
 STATIC void
-linvfs_stop_syncd(
+xfs_fs_stop_syncd(
 	vfs_t			*vfsp)
 {
 	kthread_stop(vfsp->vfs_sync_task);
 }
 
 STATIC void
-linvfs_put_super(
+xfs_fs_put_super(
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
 	int			error;
 
-	linvfs_stop_syncd(vfsp);
+	xfs_fs_stop_syncd(vfsp);
 	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
 	if (!error)
 		VFS_UNMOUNT(vfsp, 0, NULL, error);
@@ -647,7 +647,7 @@ linvfs_put_super(
 }
 
 STATIC void
-linvfs_write_super(
+xfs_fs_write_super(
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
@@ -663,7 +663,7 @@ linvfs_write_super(
 }
 
 STATIC int
-linvfs_sync_super(
+xfs_fs_sync_super(
 	struct super_block	*sb,
 	int			wait)
 {
@@ -702,7 +702,7 @@ linvfs_sync_super(
 }
 
 STATIC int
-linvfs_statfs(
+xfs_fs_statfs(
 	struct super_block	*sb,
 	struct kstatfs		*statp)
 {
@@ -714,7 +714,7 @@ linvfs_statfs(
 }
 
 STATIC int
-linvfs_remount(
+xfs_fs_remount(
 	struct super_block	*sb,
 	int			*flags,
 	char			*options)
@@ -731,14 +731,14 @@ linvfs_remount(
 }
 
 STATIC void
-linvfs_freeze_fs(
+xfs_fs_lockfs(
 	struct super_block	*sb)
 {
 	VFS_FREEZE(LINVFS_GET_VFS(sb));
 }
 
 STATIC int
-linvfs_show_options(
+xfs_fs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
@@ -750,7 +750,7 @@ linvfs_show_options(
 }
 
 STATIC int
-linvfs_quotasync(
+xfs_fs_quotasync(
 	struct super_block	*sb,
 	int			type)
 {
@@ -762,7 +762,7 @@ linvfs_quotasync(
 }
 
 STATIC int
-linvfs_getxstate(
+xfs_fs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
 {
@@ -774,7 +774,7 @@ linvfs_getxstate(
 }
 
 STATIC int
-linvfs_setxstate(
+xfs_fs_setxstate(
 	struct super_block	*sb,
 	unsigned int		flags,
 	int			op)
@@ -787,7 +787,7 @@ linvfs_setxstate(
 }
 
 STATIC int
-linvfs_getxquota(
+xfs_fs_getxquota(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -803,7 +803,7 @@ linvfs_getxquota(
 }
 
 STATIC int
-linvfs_setxquota(
+xfs_fs_setxquota(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -819,7 +819,7 @@ linvfs_setxquota(
 }
 
 STATIC int
-linvfs_fill_super(
+xfs_fs_fill_super(
 	struct super_block	*sb,
 	void			*data,
 	int			silent)
@@ -844,10 +844,10 @@ linvfs_fill_super(
 
 	sb_min_blocksize(sb, BBSIZE);
 #ifdef CONFIG_XFS_EXPORT
-	sb->s_export_op = &linvfs_export_ops;
+	sb->s_export_op = &xfs_export_operations;
 #endif
-	sb->s_qcop = &linvfs_qops;
-	sb->s_op = &linvfs_sops;
+	sb->s_qcop = &xfs_quotactl_operations;
+	sb->s_op = &xfs_super_operations;
 
 	VFS_MOUNT(vfsp, args, NULL, error);
 	if (error) {
@@ -880,7 +880,7 @@ linvfs_fill_super(
 		error = EINVAL;
 		goto fail_vnrele;
 	}
-	if ((error = linvfs_start_syncd(vfsp)))
+	if ((error = xfs_fs_start_syncd(vfsp)))
 		goto fail_vnrele;
 	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
 
@@ -905,41 +905,41 @@ fail_vfsop:
 }
 
 STATIC struct super_block *
-linvfs_get_sb(
+xfs_fs_get_sb(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
 	void			*data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
-}
-
-STATIC struct super_operations linvfs_sops = {
-	.alloc_inode		= linvfs_alloc_inode,
-	.destroy_inode		= linvfs_destroy_inode,
-	.write_inode		= linvfs_write_inode,
-	.clear_inode		= linvfs_clear_inode,
-	.put_super		= linvfs_put_super,
-	.write_super		= linvfs_write_super,
-	.sync_fs		= linvfs_sync_super,
-	.write_super_lockfs	= linvfs_freeze_fs,
-	.statfs			= linvfs_statfs,
-	.remount_fs		= linvfs_remount,
-	.show_options		= linvfs_show_options,
+	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+STATIC struct super_operations xfs_super_operations = {
+	.alloc_inode		= xfs_fs_alloc_inode,
+	.destroy_inode		= xfs_fs_destroy_inode,
+	.write_inode		= xfs_fs_write_inode,
+	.clear_inode		= xfs_fs_clear_inode,
+	.put_super		= xfs_fs_put_super,
+	.write_super		= xfs_fs_write_super,
+	.sync_fs		= xfs_fs_sync_super,
+	.write_super_lockfs	= xfs_fs_lockfs,
+	.statfs			= xfs_fs_statfs,
+	.remount_fs		= xfs_fs_remount,
+	.show_options		= xfs_fs_show_options,
 };
 
-STATIC struct quotactl_ops linvfs_qops = {
-	.quota_sync		= linvfs_quotasync,
-	.get_xstate		= linvfs_getxstate,
-	.set_xstate		= linvfs_setxstate,
-	.get_xquota		= linvfs_getxquota,
-	.set_xquota		= linvfs_setxquota,
+STATIC struct quotactl_ops xfs_quotactl_operations = {
+	.quota_sync		= xfs_fs_quotasync,
+	.get_xstate		= xfs_fs_getxstate,
+	.set_xstate		= xfs_fs_setxstate,
+	.get_xquota		= xfs_fs_getxquota,
+	.set_xquota		= xfs_fs_setxquota,
 };
 
 STATIC struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
-	.get_sb			= linvfs_get_sb,
+	.get_sb			= xfs_fs_get_sb,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index df59408dca0..5e1b525cb1e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -120,6 +120,6 @@ extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
 extern void xfs_blkdev_put(struct block_device *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
-extern struct export_operations linvfs_export_ops;
+extern struct export_operations xfs_export_operations;
 
 #endif	/* __XFS_SUPER_H__ */
-- 
cgit v1.2.3


From f020b67f3c0c071ed5b2281bfa717125c8a39ad6 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Tue, 14 Mar 2006 14:07:24 +1100
Subject: [XFS] Fix assert to check that in-core extents are inline only.

SGI-PV: 950678
SGI-Modid: xfs-linux-melb:xfs-kern:207634a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 9f0ed4869d4..64a02eaf1df 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3414,7 +3414,8 @@ xfs_bmap_local_to_extents(
 
 		args.tp = tp;
 		args.mp = ip->i_mount;
-		ASSERT(ifp->if_flags & XFS_IFINLINE);
+		ASSERT((ifp->if_flags &
+			(XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
 		/*
 		 * Allocate a block.  We know we need only one, since the
 		 * file currently fits in an inode.
@@ -3445,7 +3446,6 @@ xfs_bmap_local_to_extents(
 		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 		xfs_iext_add(ifp, 0, 1);
-		ASSERT((ifp->if_flags & (XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFEXTENTS);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
-- 
cgit v1.2.3


From f30a1211119741d2c1063ad613bec8434fb9d099 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:07:36 +1100
Subject: [XFS] Dynamically allocate the xfs_dinode_core_t structure to reduce
 our stack footprint in xfs_ialloc_ag_alloc.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25420a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 8f3fae1aa98..0024892841a 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -138,8 +138,6 @@ xfs_ialloc_ag_alloc(
 	int		version;	/* inode version number to use */
 	int		isaligned;	/* inode allocation at stripe unit */
 					/* boundary */
-	xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
-					/* inodes */
 
 	args.tp = tp;
 	args.mp = tp->t_mountp;
@@ -250,10 +248,6 @@ xfs_ialloc_ag_alloc(
 	else
 		version = XFS_DINODE_VERSION_1;
 
-	memset(&dic, 0, sizeof(xfs_dinode_core_t));
-	INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
-	INT_SET(dic.di_version, ARCH_CONVERT, version);
-
 	for (j = 0; j < nbufs; j++) {
 		/*
 		 * Get the block.
@@ -266,12 +260,13 @@ xfs_ialloc_ag_alloc(
 		ASSERT(fbuf);
 		ASSERT(!XFS_BUF_GETERROR(fbuf));
 		/*
-		 * Loop over the inodes in this buffer.
+		 * Set initial values for the inodes in this buffer.
 		 */
-
+		xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
 		for (i = 0; i < ninodes; i++) {
 			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
-			memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
+			INT_SET(free->di_core.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+			INT_SET(free->di_core.di_version, ARCH_CONVERT, version);
 			INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
 			xfs_ialloc_log_di(tp, fbuf, i,
 				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
-- 
cgit v1.2.3


From 524fbf5dd1b25acffe6f8a4ed5f3cce1023cfdb8 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 14 Mar 2006 14:07:53 +1100
Subject: [XFS] Revert kiocb and vattr stack changes, theory is the AIO rework
 will help here and vattr may be small enough.

SGI-PV: 947312
SGI-Modid: xfs-linux-melb:xfs-kern:25423a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 42 +++++++--------------
 fs/xfs/linux-2.6/xfs_iops.c | 89 ++++++++++++++-------------------------------
 2 files changed, 42 insertions(+), 89 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index b050e407942..c271c993649 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -145,22 +145,17 @@ __xfs_file_readv(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct kiocb	*kiocb;
+	struct kiocb	kiocb;
 	ssize_t		rval;
 
-	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
-	if (unlikely(!kiocb))
-		return -ENOMEM;
-
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
 
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
+	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb->ki_pos;
-	kfree(kiocb);
+	*ppos = kiocb.ki_pos;
 	return rval;
 }
 
@@ -195,22 +190,17 @@ __xfs_file_writev(
 {
 	struct inode	*inode = file->f_mapping->host;
 	vnode_t		*vp = LINVFS_GET_VP(inode);
-	struct kiocb	*kiocb;
+	struct kiocb	kiocb;
 	ssize_t		rval;
 
-	kiocb = kmalloc(sizeof(*kiocb), GFP_KERNEL);
-	if (unlikely(!kiocb))
-		return -ENOMEM;
-
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 
-	VOP_WRITE(vp, kiocb, iov, nr_segs, &kiocb->ki_pos, ioflags, NULL, rval);
+	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
 
-	*ppos = kiocb->ki_pos;
-	kfree(kiocb);
+	*ppos = kiocb.ki_pos;
 	return rval;
 }
 
@@ -420,7 +410,7 @@ xfs_file_mmap(
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
 	vnode_t		*vp = LINVFS_GET_VP(ip);
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
 	vma->vm_ops = &xfs_file_vm_ops;
@@ -431,14 +421,10 @@ xfs_file_mmap(
 	}
 #endif /* CONFIG_XFS_DMAPI */
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-	vattr->va_mask = XFS_AT_UPDATIME;
-	VOP_SETATTR(vp, vattr, XFS_AT_UPDATIME, NULL, error);
+	vattr.va_mask = XFS_AT_UPDATIME;
+	VOP_SETATTR(vp, &vattr, XFS_AT_UPDATIME, NULL, error);
 	if (likely(!error))
-		__vn_revalidate(vp, vattr);	/* update flags */
-	kfree(vattr);
+		__vn_revalidate(vp, &vattr);	/* update flags */
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 93b9e6e43f2..0a508580e57 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -295,7 +295,7 @@ xfs_vn_mknod(
 	dev_t		rdev)
 {
 	struct inode	*ip;
-	vattr_t		*vattr;
+	vattr_t		vattr = { 0 };
 	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
@@ -308,13 +308,8 @@ xfs_vn_mknod(
 	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	if (unlikely(test_default_acl && test_default_acl(dvp))) {
 		if (!_ACL_ALLOC(default_acl)) {
-			kfree(vattr);
 			return -ENOMEM;
 		}
 		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
@@ -326,20 +321,19 @@ xfs_vn_mknod(
 	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
 		mode &= ~current->fs->umask;
 
-	memset(vattr, 0, sizeof(*vattr));
-	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-	vattr->va_mode = mode;
+	vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr.va_mode = mode;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
-		vattr->va_rdev = sysv_encode_dev(rdev);
-		vattr->va_mask |= XFS_AT_RDEV;
+		vattr.va_rdev = sysv_encode_dev(rdev);
+		vattr.va_mask |= XFS_AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
-		VOP_CREATE(dvp, dentry, vattr, &vp, NULL, error);
+		VOP_CREATE(dvp, dentry, &vattr, &vp, NULL, error);
 		break;
 	case S_IFDIR:
-		VOP_MKDIR(dvp, dentry, vattr, &vp, NULL, error);
+		VOP_MKDIR(dvp, dentry, &vattr, &vp, NULL, error);
 		break;
 	default:
 		error = EINVAL;
@@ -354,7 +348,7 @@ xfs_vn_mknod(
 
 	if (unlikely(default_acl)) {
 		if (!error) {
-			error = _ACL_INHERIT(vp, vattr, default_acl);
+			error = _ACL_INHERIT(vp, &vattr, default_acl);
 			if (!error)
 				VMODIFY(vp);
 			else
@@ -370,11 +364,10 @@ xfs_vn_mknod(
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
 		else if (S_ISDIR(mode))
-			xfs_validate_fields(ip, vattr);
+			xfs_validate_fields(ip, &vattr);
 		d_instantiate(dentry, ip);
-		xfs_validate_fields(dir, vattr);
+		xfs_validate_fields(dir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -429,17 +422,13 @@ xfs_vn_link(
 	struct inode	*ip;	/* inode of guy being linked to */
 	vnode_t		*tdvp;	/* target directory for new name/link */
 	vnode_t		*vp;	/* vp of name being linked */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	tdvp = LINVFS_GET_VP(dir);
 	vp = LINVFS_GET_VP(ip);
 
@@ -447,10 +436,9 @@ xfs_vn_link(
 	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
-		xfs_validate_fields(ip, vattr);
+		xfs_validate_fields(ip, &vattr);
 		d_instantiate(dentry, ip);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -461,22 +449,17 @@ xfs_vn_unlink(
 {
 	struct inode	*inode;
 	vnode_t		*dvp;	/* directory containing name to remove */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	inode = dentry->d_inode;
 	dvp = LINVFS_GET_VP(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		xfs_validate_fields(dir, vattr);	/* size needs update */
-		xfs_validate_fields(inode, vattr);
+		xfs_validate_fields(dir, &vattr);	/* size needs update */
+		xfs_validate_fields(inode, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -487,7 +470,7 @@ xfs_vn_symlink(
 	const char	*symname)
 {
 	struct inode	*ip;
-	vattr_t		*vattr;
+	vattr_t		vattr = { 0 };
 	vnode_t		*dvp;	/* directory containing name of symlink */
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
@@ -495,27 +478,21 @@ xfs_vn_symlink(
 	dvp = LINVFS_GET_VP(dir);
 	cvp = NULL;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
-	memset(vattr, 0, sizeof(*vattr));
-	vattr->va_mode = S_IFLNK |
+	vattr.va_mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
-	vattr->va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
 	error = 0;
-	VOP_SYMLINK(dvp, dentry, vattr, (char *)symname, &cvp, NULL, error);
+	VOP_SYMLINK(dvp, dentry, &vattr, (char *)symname, &cvp, NULL, error);
 	if (likely(!error && cvp)) {
 		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = LINVFS_GET_IP(cvp);
 			d_instantiate(dentry, ip);
-			xfs_validate_fields(dir, vattr);
-			xfs_validate_fields(ip, vattr);
+			xfs_validate_fields(dir, &vattr);
+			xfs_validate_fields(ip, &vattr);
 		}
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -526,19 +503,14 @@ xfs_vn_rmdir(
 {
 	struct inode	*inode = dentry->d_inode;
 	vnode_t		*dvp = LINVFS_GET_VP(dir);
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	VOP_RMDIR(dvp, dentry, NULL, error);
 	if (likely(!error)) {
-		xfs_validate_fields(inode, vattr);
-		xfs_validate_fields(dir, vattr);
+		xfs_validate_fields(inode, &vattr);
+		xfs_validate_fields(dir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
@@ -552,25 +524,20 @@ xfs_vn_rename(
 	struct inode	*new_inode = ndentry->d_inode;
 	vnode_t		*fvp;	/* from directory */
 	vnode_t		*tvp;	/* target directory */
-	vattr_t		*vattr;
+	vattr_t		vattr;
 	int		error;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
 	fvp = LINVFS_GET_VP(odir);
 	tvp = LINVFS_GET_VP(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
 		if (new_inode)
-			xfs_validate_fields(new_inode, vattr);
-		xfs_validate_fields(odir, vattr);
+			xfs_validate_fields(new_inode, &vattr);
+		xfs_validate_fields(odir, &vattr);
 		if (ndir != odir)
-			xfs_validate_fields(ndir, vattr);
+			xfs_validate_fields(ndir, &vattr);
 	}
-	kfree(vattr);
 	return -error;
 }
 
-- 
cgit v1.2.3


From 8867bc9bf0aed7181aa72c7c938c6ce830b75166 Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Fri, 17 Mar 2006 17:25:04 +1100
Subject: [XFS] There are a few problems with the new
 xfs_bmap_search_multi_extents() wrapper function that I introduced in mod
 xfs-linux:xfs-kern:207393a. The function was added as a wrapper around
 xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS
 interface.  The idea of the function was basically to extract the target
 extent buffer (if muli- level extent allocation mode), then call
 xfs_bmap_do_search_extents() with either a pointer to the first extent in the
 target buffer or a pointer to the first extent in the file, depending on
 which extent mode was being used.  However, in addition to locating the
 target extent record for block bno, xfs_bmap_do_search_extents() also sets
 four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp. Passing
 only the target extent buffer to xfs_bmap_do_search_extents() causes *eofp to
 be set incorrectly if the extent is at the end of the target list but there
 are actually more extents in the next er_extbuf. Likewise, if the extent is
 the first one in the buffer but NOT the first in the file, *prevp is
 incorrectly set to NULL.  Adding the needed functionality to
 xfs_bmap_search_multi_extents() to re-set any incorrectly set fields is
 redundant and makes the call to xfs_bmap_do_search_extents() not make much
 sense when multi-level extent allocation mode is being used.  This mod
 basically extracts the two functional components from
 xfs_bmap_do_search_extents(), with the intent of obsoleting/removing
 xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent changes
 are checked in.  The two components are:  1) The binary search to locate the
 target extent record, and 2) Setting the four parameters needed by the caller
 (*lastx, *eofp, *gotp, *prevp).  Component 1: I created a new function in
 xfs_inode.c called xfs_iext_bno_to_ext(), which executes the binary search to
 find the target extent record. xfs_bmap_search_multi_extents() has been
 modified to call xfs_iext_bno_to_ext() rather than
 xfs_bmap_do_search_extents().  Component 2: The parameter setting
 functionality has been added to xfs_bmap_search_multi_extents(), eliminating
 the need for xfs_bmap_do_search_extents().  These changes make the removal of
 xfs_bmap_do_search_extents() trival once the CXFS changes are in place. They
 also allow us to maintain the current XFS interface, using the new search
 function introduced in mod xfs-linux:xfs-kern:207393a.

SGI-PV: 928864
SGI-Modid: xfs-linux-melb:xfs-kern:207866a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c  | 57 ++++++++++++++++++++++--------------------
 fs/xfs/xfs_inode.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_inode.h |  1 +
 3 files changed, 102 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 64a02eaf1df..2d702e4a74a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3575,10 +3575,11 @@ xfs_bmap_do_search_extents(
 }
 
 /*
- * Call xfs_bmap_do_search_extents() to search for the extent
- * record containing block bno. If in multi-level in-core extent
- * allocation mode, find and extract the target extent buffer,
- * otherwise just use the direct extent list.
+ * Search the extent records for the entry containing block bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies
+ * past eof, *eofp will be set, and *prevp will contain the last
+ * entry (null if none).  Else, *lastxp will be set to the index
+ * of the found entry; *gotp will contain the entry.
  */
 xfs_bmbt_rec_t *			/* pointer to found extent entry */
 xfs_bmap_search_multi_extents(
@@ -3589,36 +3590,38 @@ xfs_bmap_search_multi_extents(
 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
 	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
 {
-	xfs_bmbt_rec_t	*base;		/* base of extent records */
 	xfs_bmbt_rec_t	*ep;		/* extent record pointer */
-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
 	xfs_extnum_t	lastx;		/* last extent index */
-	xfs_extnum_t	nextents;	/* number of file extents */
 
 	/*
-	 * For multi-level extent allocation mode, find the
-	 * target extent list and pass only the contiguous
-	 * list to xfs_bmap_do_search_extents. Convert lastx
-	 * from a file extent index to an index within the
-	 * target extent list.
+	 * Initialize the extent entry structure to catch access to
+	 * uninitialized br_startblock field.
 	 */
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		int	erp_idx = 0;
-		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
-		base = erp->er_extbuf;
-		nextents = erp->er_extcount;
-		lastx = ifp->if_lastex - erp->er_extoff;
-	} else {
-		base = &ifp->if_u1.if_extents[0];
-		nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-		lastx = ifp->if_lastex;
+	gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
+	gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
+	gotp->br_state = XFS_EXT_INVALID;
+#if XFS_BIG_BLKNOS
+	gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
+#else
+	gotp->br_startblock = 0xffffa5a5;
+#endif
+	prevp->br_startoff = NULLFILEOFF;
+
+	ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
+	if (lastx > 0) {
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
 	}
-	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno,
-					eofp, lastxp, gotp, prevp);
-	/* Convert lastx back to file-based index */
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		*lastxp += erp->er_extoff;
+	if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+		xfs_bmbt_get_all(ep, gotp);
+		*eofp = 0;
+	} else {
+		if (lastx > 0) {
+			*gotp = *prevp;
+		}
+		*eofp = 1;
+		ep = NULL;
 	}
+	*lastxp = lastx;
 	return ep;
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 580fa075803..a16df2d435f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4265,12 +4265,81 @@ xfs_iext_destroy(
 	ifp->if_bytes = 0;
 }
 
+/*
+ * Return a pointer to the extent record for file system block bno.
+ */
+xfs_bmbt_rec_t *			/* pointer to found extent record */
+xfs_iext_bno_to_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	xfs_extnum_t	*idxp)		/* index of target extent */
+{
+	xfs_bmbt_rec_t	*base;		/* pointer to first extent */
+	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
+	xfs_bmbt_rec_t	*ep = NULL;	/* pointer to target extent */
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	int		high;		/* upper boundry in search */
+	xfs_extnum_t	idx = 0;	/* index of target extent */
+	int		low;		/* lower boundry in search */
+	xfs_extnum_t	nextents;	/* number of file extents */
+	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (nextents == 0) {
+		*idxp = 0;
+		return NULL;
+	}
+	low = 0;
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		/* Find target extent list */
+		int	erp_idx = 0;
+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+		base = erp->er_extbuf;
+		high = erp->er_extcount - 1;
+	} else {
+		base = ifp->if_u1.if_extents;
+		high = nextents - 1;
+	}
+	/* Binary search extent records */
+	while (low <= high) {
+		idx = (low + high) >> 1;
+		ep = base + idx;
+		startoff = xfs_bmbt_get_startoff(ep);
+		blockcount = xfs_bmbt_get_blockcount(ep);
+		if (bno < startoff) {
+			high = idx - 1;
+		} else if (bno >= startoff + blockcount) {
+			low = idx + 1;
+		} else {
+			/* Convert back to file-based extent index */
+			if (ifp->if_flags & XFS_IFEXTIREC) {
+				idx += erp->er_extoff;
+			}
+			*idxp = idx;
+			return ep;
+		}
+	}
+	/* Convert back to file-based extent index */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		idx += erp->er_extoff;
+	}
+	if (bno >= startoff + blockcount) {
+		if (++idx == nextents) {
+			ep = NULL;
+		} else {
+			ep = xfs_iext_get_ext(ifp, idx);
+		}
+	}
+	*idxp = idx;
+	return ep;
+}
+
 /*
  * Return a pointer to the indirection array entry containing the
  * extent record for filesystem block bno. Store the index of the
  * target irec in *erp_idxp.
  */
-xfs_ext_irec_t *
+xfs_ext_irec_t *			/* pointer to found extent record */
 xfs_iext_bno_to_irec(
 	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_fileoff_t	bno,		/* block number to search for */
@@ -4278,7 +4347,7 @@ xfs_iext_bno_to_irec(
 {
 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
-	xfs_extnum_t	erp_idx;	/* indirection array index */
+	int		erp_idx;	/* indirection array index */
 	int		nlists;		/* number of extent irec's (lists) */
 	int		high;		/* binary search upper limit */
 	int		low;		/* binary search lower limit */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3c1df1d642f..006396764cb 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -478,6 +478,7 @@ void		xfs_iext_indirect_to_direct(xfs_ifork_t *);
 void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
 void		xfs_iext_destroy(xfs_ifork_t *);
+xfs_bmbt_rec_t	*xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
 xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
 xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
 void		xfs_iext_irec_init(xfs_ifork_t *);
-- 
cgit v1.2.3


From ec86dc02fdc062d0d298814b1e78b482ab38caf7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:25:36 +1100
Subject: [XFS] Complete transition away from linvfs naming convention,
 finally.

SGI-PV: 947038
SGI-Modid: xfs-linux-melb:xfs-kern:25474a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c    | 12 +++++-----
 fs/xfs/linux-2.6/xfs_export.c  | 10 ++++----
 fs/xfs/linux-2.6/xfs_file.c    | 30 ++++++++++++------------
 fs/xfs/linux-2.6/xfs_fs_subr.c |  6 ++---
 fs/xfs/linux-2.6/xfs_ioctl.c   |  8 +++----
 fs/xfs/linux-2.6/xfs_ioctl32.c |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c    | 48 +++++++++++++++++++-------------------
 fs/xfs/linux-2.6/xfs_linux.h   |  2 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |  2 +-
 fs/xfs/linux-2.6/xfs_super.c   | 52 +++++++++++++++++++-----------------------
 fs/xfs/linux-2.6/xfs_super.h   |  5 ----
 fs/xfs/linux-2.6/xfs_vfs.c     | 17 +++++++++++++-
 fs/xfs/linux-2.6/xfs_vfs.h     |  3 ++-
 fs/xfs/linux-2.6/xfs_vnode.c   |  6 ++---
 fs/xfs/linux-2.6/xfs_vnode.h   | 32 +++++++++++++++-----------
 fs/xfs/xfs_attr.c              |  4 ++--
 fs/xfs/xfs_dfrag.c             |  4 ++--
 fs/xfs/xfs_iget.c              |  6 ++---
 fs/xfs/xfs_inode.c             |  4 ++--
 fs/xfs/xfs_vnodeops.c          |  4 ++--
 20 files changed, 135 insertions(+), 122 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 2b610c7ba32..448912f0e75 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -76,7 +76,7 @@ xfs_page_trace(
 	int		mask)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	loff_t		isize = i_size_read(inode);
 	loff_t		offset = page_offset(page);
 	int		delalloc = -1, unmapped = -1, unwritten = -1;
@@ -214,7 +214,7 @@ xfs_alloc_ioend(
 	ioend->io_uptodate = 1; /* cleared if any I/O fails */
 	ioend->io_list = NULL;
 	ioend->io_type = type;
-	ioend->io_vnode = LINVFS_GET_VP(inode);
+	ioend->io_vnode = vn_from_inode(inode);
 	ioend->io_buffer_head = NULL;
 	ioend->io_buffer_tail = NULL;
 	atomic_inc(&ioend->io_vnode->v_iocount);
@@ -239,7 +239,7 @@ xfs_map_blocks(
 	xfs_iomap_t		*mapp,
 	int			flags)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error, nmaps = 1;
 
 	VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
@@ -1229,7 +1229,7 @@ __xfs_get_block(
 	int			direct,
 	bmapi_flags_t		flags)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	xfs_iomap_t		iomap;
 	xfs_off_t		offset;
 	ssize_t			size;
@@ -1371,7 +1371,7 @@ xfs_vm_direct_IO(
 {
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_iomap_t	iomap;
 	int		maps = 1;
 	int		error;
@@ -1410,7 +1410,7 @@ xfs_vm_bmap(
 	sector_t		block)
 {
 	struct inode		*inode = (struct inode *)mapping->host;
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error;
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 391c1353cd4..b768ea910bb 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -97,7 +97,7 @@ xfs_fs_encode_fh(
 	int			len;
 	int			is64 = 0;
 #if XFS_BIG_INUMS
-	vfs_t			*vfs = LINVFS_GET_VFS(inode->i_sb);
+	vfs_t			*vfs = vfs_from_sb(inode->i_sb);
 
 	if (!(vfs->vfs_flag & VFS_32BITINODES)) {
 		/* filesystem may contain 64bit inode numbers */
@@ -139,14 +139,14 @@ xfs_fs_get_dentry(
 	vnode_t			*vp;
 	struct inode		*inode;
 	struct dentry		*result;
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_VGET(vfsp, &vp, (fid_t *)data, error);
 	if (error || vp == NULL)
 		return ERR_PTR(-ESTALE) ;
 
-	inode = LINVFS_GET_IP(vp);
+	inode = vn_to_inode(vp);
 	result = d_alloc_anon(inode);
         if (!result) {
 		iput(inode);
@@ -164,12 +164,12 @@ xfs_fs_get_parent(
 	struct dentry		*parent;
 
 	cvp = NULL;
-	vp = LINVFS_GET_VP(child->d_inode);
+	vp = vn_from_inode(child->d_inode);
 	VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
-	parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+	parent = d_alloc_anon(vn_to_inode(cvp));
 	if (unlikely(!parent)) {
 		VN_RELE(cvp);
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c271c993649..185567a6a56 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -58,7 +58,7 @@ __xfs_file_read(
 {
 	struct iovec		iov = {buf, count};
 	struct file		*file = iocb->ki_filp;
-	vnode_t			*vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(file->f_dentry->d_inode);
 	ssize_t			rval;
 
 	BUG_ON(iocb->ki_pos != pos);
@@ -102,7 +102,7 @@ __xfs_file_write(
 	struct iovec	iov = {(void __user *)buf, count};
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	ssize_t		rval;
 
 	BUG_ON(iocb->ki_pos != pos);
@@ -144,7 +144,7 @@ __xfs_file_readv(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -189,7 +189,7 @@ __xfs_file_writev(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -233,7 +233,7 @@ xfs_file_sendfile(
 	read_actor_t		actor,
 	void			*target)
 {
-	vnode_t			*vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
 	ssize_t			rval;
 
 	VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
@@ -246,7 +246,7 @@ xfs_file_open(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 
 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
@@ -263,7 +263,7 @@ xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error = 0;
 
 	if (vp)
@@ -279,7 +279,7 @@ xfs_file_fsync(
 	int		datasync)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 	int		flags = FSYNC_WAIT;
 
@@ -307,7 +307,7 @@ xfs_vm_nopage(
 	int			*type)
 {
 	struct inode	*inode = area->vm_file->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 	int		error;
 
@@ -340,7 +340,7 @@ xfs_file_readdir(
 	xfs_off_t	start_offset, curr_offset;
 	xfs_dirent_t	*dbp = NULL;
 
-	vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	vp = vn_from_inode(filp->f_dentry->d_inode);
 	ASSERT(vp);
 
 	/* Try fairly hard to get memory */
@@ -409,7 +409,7 @@ xfs_file_mmap(
 	struct vm_area_struct *vma)
 {
 	struct inode	*ip = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vnode_t		*vp = vn_from_inode(ip);
 	vattr_t		vattr;
 	int		error;
 
@@ -437,7 +437,7 @@ xfs_file_ioctl(
 {
 	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 
 	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
 	VMODIFY(vp);
@@ -459,7 +459,7 @@ xfs_file_ioctl_invis(
 {
 	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 
 	ASSERT(vp);
 	VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
@@ -481,7 +481,7 @@ xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
-	vnode_t		*vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
 	int		error = 0;
 
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
@@ -507,7 +507,7 @@ STATIC int
 xfs_file_open_exec(
 	struct inode	*inode)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 	int		error = 0;
 	xfs_inode_t	*ip;
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 4fa4b1a5187..575f2a790f3 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -57,7 +57,7 @@ fs_tosspages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp))
 		truncate_inode_pages(ip->i_mapping, first);
@@ -76,7 +76,7 @@ fs_flushinval_pages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp)) {
 		filemap_write_and_wait(ip->i_mapping);
@@ -98,7 +98,7 @@ fs_flush_pages(
 	int		fiopt)
 {
 	vnode_t		*vp = BHV_TO_VNODE(bdp);
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp)) {
 		filemap_fdatawrite(ip->i_mapping);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e435ad17419..84478491609 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -138,7 +138,7 @@ xfs_find_handle(
 	}
 
 	/* we need the vnode */
-	vp = LINVFS_GET_VP(inode);
+	vp = vn_from_inode(inode);
 
 	/* now we can grab the fsid */
 	memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
@@ -256,7 +256,7 @@ xfs_vget_fsop_handlereq(
 	}
 
 	vpp = XFS_ITOV(ip);
-	inodep = LINVFS_GET_IP(vpp);
+	inodep = vn_to_inode(vpp);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	*vp = vpp;
@@ -715,7 +715,7 @@ xfs_ioctl(
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
 
-	vp = LINVFS_GET_VP(inode);
+	vp = vn_from_inode(inode);
 
 	vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
 
@@ -1270,7 +1270,7 @@ xfs_ioc_xattr(
 	}
 
 	case XFS_IOC_GETVERSION: {
-		flags = LINVFS_GET_IP(vp)->i_generation;
+		flags = vn_to_inode(vp)->i_generation;
 		if (copy_to_user(arg, &flags, sizeof(flags)))
 			error = -EFAULT;
 		break;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index e9da0bde36a..b6321abd9a8 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -111,7 +111,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 {
 	int		error;
 	struct		inode *inode = f->f_dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_to_inode(inode);
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0a508580e57..64c3395074d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -106,7 +106,7 @@ xfs_ichgtime(
 	xfs_inode_t	*ip,
 	int		flags)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	timespec_t	tv;
 
 	nanotime(&tv);
@@ -202,7 +202,7 @@ xfs_validate_fields(
 	struct inode	*ip,
 	struct vattr	*vattr)
 {
-	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vnode_t		*vp = vn_from_inode(ip);
 	int		error;
 
 	vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
@@ -228,7 +228,7 @@ xfs_init_security(
 	struct vnode	*vp,
 	struct inode	*dir)
 {
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 	size_t		length;
 	void		*value;
 	char		*name;
@@ -277,7 +277,7 @@ xfs_cleanup_inode(
 	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
-	teardown.d_inode = LINVFS_GET_IP(vp);
+	teardown.d_inode = vn_to_inode(vp);
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
@@ -296,7 +296,7 @@ xfs_vn_mknod(
 {
 	struct inode	*ip;
 	vattr_t		vattr = { 0 };
-	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
+	vnode_t		*vp = NULL, *dvp = vn_from_inode(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
 	int		error;
@@ -359,7 +359,7 @@ xfs_vn_mknod(
 
 	if (likely(!error)) {
 		ASSERT(vp);
-		ip = LINVFS_GET_IP(vp);
+		ip = vn_to_inode(vp);
 
 		if (S_ISCHR(mode) || S_ISBLK(mode))
 			ip->i_rdev = rdev;
@@ -396,7 +396,7 @@ xfs_vn_lookup(
 	struct dentry	*dentry,
 	struct nameidata *nd)
 {
-	struct vnode	*vp = LINVFS_GET_VP(dir), *cvp;
+	struct vnode	*vp = vn_from_inode(dir), *cvp;
 	int		error;
 
 	if (dentry->d_name.len >= MAXNAMELEN)
@@ -410,7 +410,7 @@ xfs_vn_lookup(
 		return NULL;
 	}
 
-	return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
+	return d_splice_alias(vn_to_inode(cvp), dentry);
 }
 
 STATIC int
@@ -429,8 +429,8 @@ xfs_vn_link(
 	if (S_ISDIR(ip->i_mode))
 		return -EPERM;
 
-	tdvp = LINVFS_GET_VP(dir);
-	vp = LINVFS_GET_VP(ip);
+	tdvp = vn_from_inode(dir);
+	vp = vn_from_inode(ip);
 
 	VOP_LINK(tdvp, vp, dentry, NULL, error);
 	if (likely(!error)) {
@@ -453,7 +453,7 @@ xfs_vn_unlink(
 	int		error;
 
 	inode = dentry->d_inode;
-	dvp = LINVFS_GET_VP(dir);
+	dvp = vn_from_inode(dir);
 
 	VOP_REMOVE(dvp, dentry, NULL, error);
 	if (likely(!error)) {
@@ -475,7 +475,7 @@ xfs_vn_symlink(
 	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
 
-	dvp = LINVFS_GET_VP(dir);
+	dvp = vn_from_inode(dir);
 	cvp = NULL;
 
 	vattr.va_mode = S_IFLNK |
@@ -487,7 +487,7 @@ xfs_vn_symlink(
 	if (likely(!error && cvp)) {
 		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
-			ip = LINVFS_GET_IP(cvp);
+			ip = vn_to_inode(cvp);
 			d_instantiate(dentry, ip);
 			xfs_validate_fields(dir, &vattr);
 			xfs_validate_fields(ip, &vattr);
@@ -502,7 +502,7 @@ xfs_vn_rmdir(
 	struct dentry	*dentry)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*dvp = LINVFS_GET_VP(dir);
+	vnode_t		*dvp = vn_from_inode(dir);
 	vattr_t		vattr;
 	int		error;
 
@@ -527,8 +527,8 @@ xfs_vn_rename(
 	vattr_t		vattr;
 	int		error;
 
-	fvp = LINVFS_GET_VP(odir);
-	tvp = LINVFS_GET_VP(ndir);
+	fvp = vn_from_inode(odir);
+	tvp = vn_from_inode(ndir);
 
 	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
 	if (likely(!error)) {
@@ -573,7 +573,7 @@ xfs_vn_follow_link(
 		return NULL;
 	}
 
-	vp = LINVFS_GET_VP(dentry->d_inode);
+	vp = vn_from_inode(dentry->d_inode);
 
 	iov.iov_base = link;
 	iov.iov_len = MAXPATHLEN;
@@ -616,7 +616,7 @@ xfs_vn_permission(
 	int		mode,
 	struct nameidata *nd)
 {
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
 
 	mode <<= 6;		/* convert from linux to vnode access bits */
@@ -634,7 +634,7 @@ xfs_vn_getattr(
 	struct kstat	*stat)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error = 0;
 
 	if (unlikely(vp->v_flag & VMODIFIED))
@@ -651,7 +651,7 @@ xfs_vn_setattr(
 {
 	struct inode	*inode = dentry->d_inode;
 	unsigned int	ia_valid = attr->ia_valid;
-	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vnode_t		*vp = vn_from_inode(inode);
 	vattr_t		vattr = { 0 };
 	int		flags = 0;
 	int		error;
@@ -717,7 +717,7 @@ xfs_vn_setxattr(
 	size_t		size,
 	int		flags)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -747,7 +747,7 @@ xfs_vn_getxattr(
 	void		*data,
 	size_t		size)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -776,7 +776,7 @@ xfs_vn_listxattr(
 	char			*data,
 	size_t			size)
 {
-	vnode_t			*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t			*vp = vn_from_inode(dentry->d_inode);
 	int			error, xflags = ATTR_KERNAMELS;
 	ssize_t			result;
 
@@ -795,7 +795,7 @@ xfs_vn_removexattr(
 	struct dentry	*dentry,
 	const char	*name)
 {
-	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index bd88ccb0cad..1fe09f2d651 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -234,7 +234,7 @@ BUFFER_FNS(PrivateStart, unwritten);
 #define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()	dump_stack()
 #define xfs_itruncate_data(ip, off)	\
-	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+	(-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off)))
 #define xfs_statvfs_fsid(statp, mp)	\
 	({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
 	   __kernel_fsid_t *fsid = &(statp)->f_fsid;	\
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index e0ab45fbfeb..2ede4bb7ecd 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -432,7 +432,7 @@ xfs_zero_eof(
 	xfs_fsize_t	isize,		/* current inode size */
 	xfs_fsize_t	end_size)	/* terminal inode size */
 {
-	struct inode	*ip = LINVFS_GET_IP(vp);
+	struct inode	*ip = vn_to_inode(vp);
 	xfs_fileoff_t	start_zero_fsb;
 	xfs_fileoff_t	end_zero_fsb;
 	xfs_fileoff_t	zero_count_fsb;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 352aa3d40c1..d9d28a965ba 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -153,7 +153,7 @@ xfs_revalidate_inode(
 	vnode_t			*vp,
 	xfs_inode_t		*ip)
 {
-	struct inode		*inode = LINVFS_GET_IP(vp);
+	struct inode		*inode = vn_to_inode(vp);
 
 	inode->i_mode	= ip->i_d.di_mode;
 	inode->i_nlink	= ip->i_d.di_nlink;
@@ -210,7 +210,7 @@ xfs_initialize_vnode(
 	int			unlock)
 {
 	xfs_inode_t		*ip = XFS_BHVTOI(inode_bhv);
-	struct inode		*inode = LINVFS_GET_IP(vp);
+	struct inode		*inode = vn_to_inode(vp);
 
 	if (!inode_bhv->bd_vobj) {
 		vp->v_vfsp = bhvtovfs(bdp);
@@ -228,7 +228,7 @@ xfs_initialize_vnode(
 	if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
 		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
 		xfs_set_inodeops(inode);
-	
+
 		ip->i_flags &= ~XFS_INEW;
 		barrier();
 
@@ -340,14 +340,14 @@ xfs_fs_alloc_inode(
 	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
 	if (unlikely(!vp))
 		return NULL;
-	return LINVFS_GET_IP(vp);
+	return vn_to_inode(vp);
 }
 
 STATIC void
 xfs_fs_destroy_inode(
 	struct inode		*inode)
 {
-	kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
+	kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode));
 }
 
 STATIC void
@@ -358,7 +358,7 @@ xfs_fs_inode_init_once(
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 		      SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(LINVFS_GET_IP((vnode_t *)vnode));
+		inode_init_once(vn_to_inode((vnode_t *)vnode));
 }
 
 STATIC int
@@ -409,7 +409,7 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error = 0, flags = FLUSH_INODE;
 
 	if (vp) {
@@ -432,7 +432,7 @@ STATIC void
 xfs_fs_clear_inode(
 	struct inode		*inode)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
+	vnode_t			*vp = vn_from_inode(inode);
 	int			error, cache;
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -511,7 +511,7 @@ void
 xfs_flush_inode(
 	xfs_inode_t	*ip)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
@@ -536,7 +536,7 @@ void
 xfs_flush_device(
 	xfs_inode_t	*ip)
 {
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
@@ -630,7 +630,7 @@ STATIC void
 xfs_fs_put_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	xfs_fs_stop_syncd(vfsp);
@@ -650,7 +650,7 @@ STATIC void
 xfs_fs_write_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	if (sb->s_flags & MS_RDONLY) {
@@ -667,7 +667,7 @@ xfs_fs_sync_super(
 	struct super_block	*sb,
 	int			wait)
 {
-	vfs_t		*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t		*vfsp = vfs_from_sb(sb);
 	int		error;
 	int		flags = SYNC_FSDATA;
 
@@ -706,7 +706,7 @@ xfs_fs_statfs(
 	struct super_block	*sb,
 	struct kstatfs		*statp)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_STATVFS(vfsp, statp, NULL, error);
@@ -719,7 +719,7 @@ xfs_fs_remount(
 	int			*flags,
 	char			*options)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	vfs_t			*vfsp = vfs_from_sb(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb);
 	int			error;
 
@@ -734,7 +734,7 @@ STATIC void
 xfs_fs_lockfs(
 	struct super_block	*sb)
 {
-	VFS_FREEZE(LINVFS_GET_VFS(sb));
+	VFS_FREEZE(vfs_from_sb(sb));
 }
 
 STATIC int
@@ -742,7 +742,7 @@ xfs_fs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+	struct vfs		*vfsp = vfs_from_sb(mnt->mnt_sb);
 	int			error;
 
 	VFS_SHOWARGS(vfsp, m, error);
@@ -754,7 +754,7 @@ xfs_fs_quotasync(
 	struct super_block	*sb,
 	int			type)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
@@ -766,7 +766,7 @@ xfs_fs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
@@ -779,7 +779,7 @@ xfs_fs_setxstate(
 	unsigned int		flags,
 	int			op)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
@@ -793,7 +793,7 @@ xfs_fs_getxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error, getmode;
 
 	getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
@@ -809,7 +809,7 @@ xfs_fs_setxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	struct vfs		*vfsp = vfs_from_sb(sb);
 	int			error, setmode;
 
 	setmode = (type == USRQUOTA) ? Q_XSETQLIM :
@@ -825,15 +825,11 @@ xfs_fs_fill_super(
 	int			silent)
 {
 	vnode_t			*rootvp;
-	struct vfs		*vfsp = vfs_allocate();
+	struct vfs		*vfsp = vfs_allocate(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb);
 	struct kstatfs		statvfs;
 	int			error, error2;
 
-	vfsp->vfs_super = sb;
-	LINVFS_SET_VFS(sb, vfsp);
-	if (sb->s_flags & MS_RDONLY)
-		vfsp->vfs_flag |= VFS_RDONLY;
 	bhv_insert_all_vfsops(vfsp);
 
 	VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
@@ -871,7 +867,7 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_unmount;
 
-	sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+	sb->s_root = d_alloc_root(vn_to_inode(rootvp));
 	if (!sb->s_root) {
 		error = ENOMEM;
 		goto fail_vnrele;
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5e1b525cb1e..376b96cb513 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -98,11 +98,6 @@ extern void xfs_qm_exit(void);
 				XFS_DMAPI_STRING \
 				XFS_DBG_STRING /* DBG must be last */
 
-#define LINVFS_GET_VFS(s) \
-	(vfs_t *)((s)->s_fs_info)
-#define LINVFS_SET_VFS(s, vfsp) \
-	((s)->s_fs_info = vfsp)
-
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_buftarg;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index a6b4084bda8..6f7c9f7a862 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -227,7 +227,8 @@ vfs_freeze(
 }
 
 vfs_t *
-vfs_allocate( void )
+vfs_allocate(
+	struct super_block	*sb)
 {
 	struct vfs		*vfsp;
 
@@ -236,9 +237,23 @@ vfs_allocate( void )
 	INIT_LIST_HEAD(&vfsp->vfs_sync_list);
 	spin_lock_init(&vfsp->vfs_sync_lock);
 	init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
+
+	vfsp->vfs_super = sb;
+	sb->s_fs_info = vfsp;
+
+	if (sb->s_flags & MS_RDONLY)
+		vfsp->vfs_flag |= VFS_RDONLY;
+
 	return vfsp;
 }
 
+vfs_t *
+vfs_from_sb(
+	struct super_block	*sb)
+{
+	return (vfs_t *)sb->s_fs_info;
+}
+
 void
 vfs_deallocate(
 	struct vfs		*vfsp)
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 57caf9eddee..8fed356db05 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -193,7 +193,8 @@ typedef struct bhv_vfsops {
 #define vfs_bhv_set_custom(b,o)	( (b)->bhv_custom = (void *)(o))
 #define vfs_bhv_clr_custom(b)	( (b)->bhv_custom = NULL )
 
-extern vfs_t *vfs_allocate(void);
+extern vfs_t *vfs_allocate(struct super_block *);
+extern vfs_t *vfs_from_sb(struct super_block *);
 extern void vfs_deallocate(vfs_t *);
 extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
 extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 225e7dd8b21..d27c25b27cc 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -58,7 +58,7 @@ struct vnode *
 vn_initialize(
 	struct inode	*inode)
 {
-	struct vnode	*vp = LINVFS_GET_VP(inode);
+	struct vnode	*vp = vn_from_inode(inode);
 
 	XFS_STATS_INC(vn_active);
 	XFS_STATS_INC(vn_alloc);
@@ -97,7 +97,7 @@ vn_revalidate_core(
 	struct vnode	*vp,
 	vattr_t		*vap)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	inode->i_mode	    = vap->va_mode;
 	inode->i_nlink	    = vap->va_nlink;
@@ -166,7 +166,7 @@ vn_hold(
 	XFS_STATS_INC(vn_hold);
 
 	VN_LOCK(vp);
-	inode = igrab(LINVFS_GET_IP(vp));
+	inode = igrab(vn_to_inode(vp));
 	ASSERT(inode);
 	VN_UNLOCK(vp, 0);
 
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 0cf92ca80ce..06f5845e956 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -116,8 +116,14 @@ typedef enum {
 /*
  * Vnode to Linux inode mapping.
  */
-#define LINVFS_GET_VP(inode)	((vnode_t *)list_entry(inode, vnode_t, v_inode))
-#define LINVFS_GET_IP(vp)	(&(vp)->v_inode)
+static inline struct vnode *vn_from_inode(struct inode *inode)
+{
+	return (vnode_t *)list_entry(inode, vnode_t, v_inode);
+}
+static inline struct inode *vn_to_inode(struct vnode *vnode)
+{
+	return &vnode->v_inode;
+}
 
 /*
  * Vnode flags.
@@ -498,7 +504,7 @@ extern void	vn_iowake(struct vnode *vp);
 
 static inline int vn_count(struct vnode *vp)
 {
-	return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+	return atomic_read(&vn_to_inode(vp)->i_count);
 }
 
 /*
@@ -512,16 +518,16 @@ extern vnode_t	*vn_hold(struct vnode *);
 	  vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
 #define VN_RELE(vp)		\
 	  (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
-	   iput(LINVFS_GET_IP(vp)))
+	   iput(vn_to_inode(vp)))
 #else
 #define VN_HOLD(vp)		((void)vn_hold(vp))
-#define VN_RELE(vp)		(iput(LINVFS_GET_IP(vp)))
+#define VN_RELE(vp)		(iput(vn_to_inode(vp)))
 #endif
 
 static inline struct vnode *vn_grab(struct vnode *vp)
 {
-	struct inode *inode = igrab(LINVFS_GET_IP(vp));
-	return inode ? LINVFS_GET_VP(inode) : NULL;
+	struct inode *inode = igrab(vn_to_inode(vp));
+	return inode ? vn_from_inode(inode) : NULL;
 }
 
 /*
@@ -529,7 +535,7 @@ static inline struct vnode *vn_grab(struct vnode *vp)
  */
 #define VNAME(dentry)		((char *) (dentry)->d_name.name)
 #define VNAMELEN(dentry)	((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry)	(LINVFS_GET_VP((dentry)->d_inode))
+#define VNAME_TO_VNODE(dentry)	(vn_from_inode((dentry)->d_inode))
 
 /*
  * Vnode spinlock manipulation.
@@ -558,12 +564,12 @@ static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
  */
 static inline void vn_mark_bad(struct vnode *vp)
 {
-	make_bad_inode(LINVFS_GET_IP(vp));
+	make_bad_inode(vn_to_inode(vp));
 }
 
 static inline int VN_BAD(struct vnode *vp)
 {
-	return is_bad_inode(LINVFS_GET_IP(vp));
+	return is_bad_inode(vn_to_inode(vp));
 }
 
 /*
@@ -588,9 +594,9 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
 /*
  * Some useful predicates.
  */
-#define VN_MAPPED(vp)	mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
-#define VN_CACHED(vp)	(LINVFS_GET_IP(vp)->i_mapping->nrpages)
-#define VN_DIRTY(vp)	mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+#define VN_MAPPED(vp)	mapping_mapped(vn_to_inode(vp)->i_mapping)
+#define VN_CACHED(vp)	(vn_to_inode(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \
 					PAGECACHE_TAG_DIRTY)
 #define VMODIFY(vp)	VN_FLAGSET(vp, VMODIFIED)
 #define VUNMODIFY(vp)	VN_FLAGCLR(vp, VMODIFIED)
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e5e91e9c7e8..acf1b7c9f1d 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2522,7 +2522,7 @@ attr_user_capable(
 	struct vnode	*vp,
 	cred_t		*cred)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
@@ -2540,7 +2540,7 @@ attr_trusted_capable(
 	struct vnode	*vp,
 	cred_t		*cred)
 {
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index c6191d00ad2..4968a6358e6 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -83,7 +83,7 @@ xfs_swapext(
 
 	/* Pull information for the target fd */
 	if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) ||
-	    ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL))  {
+	    ((vp = vn_from_inode(fp->f_dentry->d_inode)) == NULL))  {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
@@ -95,7 +95,7 @@ xfs_swapext(
 	}
 
 	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
-	    ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) {
+	    ((tvp = vn_from_inode(tfp->f_dentry->d_inode)) == NULL)) {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e380a1fb79..3ce35a6f700 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -258,7 +258,7 @@ again:
 				goto finish_inode;
 
 			} else if (vp != inode_vp) {
-				struct inode *inode = LINVFS_GET_IP(inode_vp);
+				struct inode *inode = vn_to_inode(inode_vp);
 
 				/* The inode is being torn down, pause and
 				 * try again.
@@ -495,7 +495,7 @@ retry:
 	if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
 		xfs_inode_t	*ip;
 
-		vp = LINVFS_GET_VP(inode);
+		vp = vn_from_inode(inode);
 		if (inode->i_state & I_NEW) {
 			vn_initialize(inode);
 			error = xfs_iget_core(vp, mp, tp, ino, flags,
@@ -617,7 +617,7 @@ xfs_iput_new(xfs_inode_t	*ip,
 	     uint		lock_flags)
 {
 	vnode_t		*vp = XFS_ITOV(ip);
-	struct inode	*inode = LINVFS_GET_IP(vp);
+	struct inode	*inode = vn_to_inode(vp);
 
 	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a16df2d435f..86a8451d10f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2723,7 +2723,7 @@ xfs_iunpin(
 
 		/* make sync come back and flush this inode */
 		if (vp) {
-			struct inode	*inode = LINVFS_GET_IP(vp);
+			struct inode	*inode = vn_to_inode(vp);
 
 			if (!(inode->i_state & I_NEW))
 				mark_inode_dirty_sync(inode);
@@ -3519,7 +3519,7 @@ xfs_iaccess(
 {
 	int		error;
 	mode_t		orgmode = mode;
-	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
 
 	if (mode & S_IWUSR) {
 		umode_t		imode = inode->i_mode;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index eaab355f5a8..8b5a44fe286 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1556,7 +1556,7 @@ xfs_release(
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return error;
 			/* Update linux inode block count after free above */
-			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
+			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
 				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 	}
@@ -1637,7 +1637,7 @@ xfs_inactive(
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return VN_INACTIVE_CACHE;
 			/* Update linux inode block count after free above */
-			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
+			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
 				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 		goto out;
-- 
cgit v1.2.3


From 2ddd5928d01ca8eb49f55166411b64a5844a8959 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:25:46 +1100
Subject: [XFS] Correct the dquot reservation component for the link
 transation.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25476a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_trans.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 6a2a1e07505..2918956553a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -84,7 +84,7 @@ xfs_calc_rename_reservation(xfs_mount_t *mp)
 STATIC uint
 xfs_calc_link_reservation(xfs_mount_t *mp)
 {
-	return XFS_CALC_LINK_LOG_RES(mp);
+	return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
 }
 
 STATIC uint
-- 
cgit v1.2.3


From b12dd34298cf0cff9f337f667045335140873039 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:04 +1100
Subject: [XFS] Fix an infinite loop issue in bulkstat when a corrupt inode is
 detected.  Thanks to Roger Willcocks.

SGI-PV: 951054
SGI-Modid: xfs-linux-melb:xfs-kern:25477a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c       | 28 ++++++++++++++--------------
 fs/xfs/xfs_inode.h       |  7 ++++---
 fs/xfs/xfs_itable.c      |  5 ++++-
 fs/xfs/xfs_log_recover.c |  2 +-
 fs/xfs/xfs_vfsops.c      |  2 +-
 5 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 86a8451d10f..b1e95707e7c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -253,7 +253,8 @@ xfs_itobp(
 	xfs_inode_t	*ip,
 	xfs_dinode_t	**dipp,
 	xfs_buf_t	**bpp,
-	xfs_daddr_t	bno)
+	xfs_daddr_t	bno,
+	uint		imap_flags)
 {
 	xfs_buf_t	*bp;
 	int		error;
@@ -269,10 +270,9 @@ xfs_itobp(
 		 * inode on disk.
 		 */
 		imap.im_blkno = bno;
-		error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP);
-		if (error != 0) {
+		if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
+					XFS_IMAP_LOOKUP | imap_flags)))
 			return error;
-		}
 
 		/*
 		 * If the inode number maps to a block outside the bounds
@@ -336,9 +336,10 @@ xfs_itobp(
 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
 	 */
 #ifdef DEBUG
-	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
+	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 :
+		(BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
 #else
-	ni = 1;
+	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1;
 #endif
 	for (i = 0; i < ni; i++) {
 		int		di_ok;
@@ -868,9 +869,8 @@ xfs_iread(
 	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
 	 * know that this is a new incore inode.
 	 */
-	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno);
-
-	if (error != 0) {
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0);
+	if (error) {
 		kmem_zone_free(xfs_inode_zone, ip);
 		return error;
 	}
@@ -1895,7 +1895,7 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			return error;
 		}
@@ -2004,7 +2004,7 @@ xfs_iunlink_remove(
 		 * of dealing with the buffer when there is no need to
 		 * change it.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2066,7 +2066,7 @@ xfs_iunlink_remove(
 		 * Now last_ibp points to the buffer previous to us on
 		 * the unlinked list.  Pull us from the list.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -3023,8 +3023,8 @@ xfs_iflush(
 	/*
 	 * Get the buffer containing the on-disk inode.
 	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0);
-	if (error != 0) {
+	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
+	if (error) {
 		xfs_ifunlock(ip);
 		return error;
 	}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 006396764cb..39ef9c36ea5 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -95,9 +95,10 @@ typedef struct xfs_ifork {
 #define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
 
 /*
- * Flags for xfs_imap() and xfs_dilocate().
+ * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
  */
-#define	XFS_IMAP_LOOKUP		0x1
+#define XFS_IMAP_LOOKUP		0x1
+#define XFS_IMAP_BULKSTAT	0x2
 
 #ifdef __KERNEL__
 struct bhv_desc;
@@ -421,7 +422,7 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
  */
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
-			  xfs_daddr_t);
+			  xfs_daddr_t, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c59450e1be4..32247b6bfee 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -562,7 +562,8 @@ xfs_bulkstat(
 						if (bp)
 							xfs_buf_relse(bp);
 						error = xfs_itobp(mp, NULL, ip,
-								  &dip, &bp, bno);
+								&dip, &bp, bno,
+								XFS_IMAP_BULKSTAT);
 						if (!error)
 							clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
 						kmem_zone_free(xfs_inode_zone, ip);
@@ -570,6 +571,8 @@ xfs_bulkstat(
 								   mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
 								   XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
 							bp = NULL;
+							ubleft = 0;
+							rval = error;
 							break;
 						}
 					}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7d46cbd6a07..add13f507ed 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3249,7 +3249,7 @@ xlog_recover_process_iunlinks(
 					 * next inode in the bucket.
 					 */
 					error = xfs_itobp(mp, NULL, ip, &dip,
-							&ibp, 0);
+							&ibp, 0, 0);
 					ASSERT(error || (dip != NULL));
 				}
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 5dd84fe609c..811a4261fa2 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1229,7 +1229,7 @@ xfs_sync_inodes(
 					xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 					error = xfs_itobp(mp, NULL, ip,
-							  &dip, &bp, 0);
+							  &dip, &bp, 0, 0);
 					if (!error) {
 						xfs_buf_relse(bp);
 					} else {
-- 
cgit v1.2.3


From a13828b167532a2145c9e3f563a99f810500c7b4 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:14 +1100
Subject: [XFS] Cleanup references to i_sem.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25480a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 2ede4bb7ecd..ba07b83f988 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -249,9 +249,8 @@ xfs_read(
 	if (n < size)
 		size = n;
 
-	if (XFS_FORCED_SHUTDOWN(mp)) {
+	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
-	}
 
 	if (unlikely(ioflags & IO_ISDIRECT))
 		mutex_lock(&inode->i_mutex);
@@ -267,7 +266,7 @@ xfs_read(
 					dmflags, &locktype);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-			goto unlock_isem;
+			goto unlock_mutex;
 		}
 	}
 
@@ -281,7 +280,7 @@ xfs_read(
 
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-unlock_isem:
+unlock_mutex:
 	if (unlikely(ioflags & IO_ISDIRECT))
 		mutex_unlock(&inode->i_mutex);
 	return ret;
@@ -573,7 +572,7 @@ xfs_write(
 	vrwlock_t		locktype;
 	size_t			ocount = 0, count;
 	loff_t			pos;
-	int			need_isem = 1, need_flush = 0;
+	int			need_i_mutex = 1, need_flush = 0;
 
 	XFS_STATS_INC(xs_write_calls);
 
@@ -622,14 +621,14 @@ xfs_write(
 			return XFS_ERROR(-EINVAL);
 
 		if (!VN_CACHED(vp) && pos < i_size_read(inode))
-			need_isem = 0;
+			need_i_mutex = 0;
 
 		if (VN_CACHED(vp))
 			need_flush = 1;
 	}
 
 relock:
-	if (need_isem) {
+	if (need_i_mutex) {
 		iolock = XFS_IOLOCK_EXCL;
 		locktype = VRWLOCK_WRITE;
 
@@ -651,7 +650,7 @@ start:
 					S_ISBLK(inode->i_mode));
 	if (error) {
 		xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-		goto out_unlock_isem;
+		goto out_unlock_mutex;
 	}
 
 	new_size = pos + count;
@@ -663,7 +662,7 @@ start:
 		loff_t		savedsize = pos;
 		int		dmflags = FILP_DELAY_FLAG(file);
 
-		if (need_isem)
+		if (need_i_mutex)
 			dmflags |= DM_FLAGS_IMUX;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -672,7 +671,7 @@ start:
 				      dmflags, &locktype);
 		if (error) {
 			xfs_iunlock(xip, iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
 		eventsent = 1;
@@ -710,7 +709,7 @@ start:
 					isize, pos + count);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 	}
 	xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -731,7 +730,7 @@ start:
 			error = -remove_suid(file->f_dentry);
 		if (unlikely(error)) {
 			xfs_iunlock(xip, iolock);
-			goto out_unlock_isem;
+			goto out_unlock_mutex;
 		}
 	}
 
@@ -747,14 +746,14 @@ retry:
 					-1, FI_REMAPF_LOCKED);
 		}
 
-		if (need_isem) {
+		if (need_i_mutex) {
 			/* demote the lock now the cached pages are gone */
 			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
 			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
 			locktype = VRWLOCK_WRITE_DIRECT;
-			need_isem = 0;
+			need_i_mutex = 0;
 		}
 
  		xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
@@ -772,7 +771,7 @@ retry:
 			pos += ret;
 			count -= ret;
 
-			need_isem = 1;
+			need_i_mutex = 1;
 			ioflags &= ~IO_ISDIRECT;
 			xfs_iunlock(xip, iolock);
 			goto relock;
@@ -794,14 +793,14 @@ retry:
 	    !(ioflags & IO_INVIS)) {
 
 		xfs_rwunlock(bdp, locktype);
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
 			goto out_nounlocks;
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
 		xfs_rwlock(bdp, locktype);
 		pos = xip->i_d.di_size;
@@ -905,9 +904,9 @@ retry:
 			if (error)
 				goto out_unlock_internal;
 		}
-	
+
 		xfs_rwunlock(bdp, locktype);
-		if (need_isem)
+		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 
 		error = sync_page_range(inode, mapping, pos, ret);
@@ -918,8 +917,8 @@ retry:
 
  out_unlock_internal:
 	xfs_rwunlock(bdp, locktype);
- out_unlock_isem:
-	if (need_isem)
+ out_unlock_mutex:
+	if (need_i_mutex)
 		mutex_unlock(&inode->i_mutex);
  out_nounlocks:
 	return -error;
-- 
cgit v1.2.3


From 238f4c5468656e3e8b1d39d75c1e4fd73592c1ea Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:25 +1100
Subject: [XFS] Make couple names consitent, be more defensive on releasepage
 (and prep for nobh, someday, maybe).

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25481a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 448912f0e75..d9bf130c63b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1178,7 +1178,7 @@ out_unlock:
  *    free them and we should come back later via writepage.
  */
 STATIC int
-xfs_vm_release_page(
+xfs_vm_releasepage(
 	struct page		*page,
 	gfp_t			gfp_mask)
 {
@@ -1191,6 +1191,9 @@ xfs_vm_release_page(
 
 	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
 
+	if (!page_has_buffers(page))
+		return 0;
+
 	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
 	if (!delalloc && !unwritten)
 		goto free_buffers;
@@ -1440,7 +1443,7 @@ xfs_vm_readpages(
 }
 
 STATIC int
-xfs_vm_invalidate_page(
+xfs_vm_invalidatepage(
 	struct page		*page,
 	unsigned long		offset)
 {
@@ -1454,8 +1457,8 @@ struct address_space_operations xfs_address_space_operations = {
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.sync_page		= block_sync_page,
-	.releasepage		= xfs_vm_release_page,
-	.invalidatepage		= xfs_vm_invalidate_page,
+	.releasepage		= xfs_vm_releasepage,
+	.invalidatepage		= xfs_vm_invalidatepage,
 	.prepare_write		= xfs_vm_prepare_write,
 	.commit_write		= generic_commit_write,
 	.bmap			= xfs_vm_bmap,
-- 
cgit v1.2.3


From ce9d37c257ceba5b4d089c544e4673546f647565 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:34 +1100
Subject: [XFS] Merge Yingpings fix for a vn_count assert failure during QA -
 another ENOSPC condition.

SGI-PV: 950784
SGI-Modid: xfs-linux-melb:xfs-kern:25482a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 64c3395074d..af487437bd7 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -491,6 +491,8 @@ xfs_vn_symlink(
 			d_instantiate(dentry, ip);
 			xfs_validate_fields(dir, &vattr);
 			xfs_validate_fields(ip, &vattr);
+		} else {
+			xfs_cleanup_inode(dvp, cvp, dentry, 0);
 		}
 	}
 	return -error;
-- 
cgit v1.2.3


From 9cea236492ebabb9545564eb039aa0f477a05c96 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:41 +1100
Subject: [XFS] Flush and invalidate dirty pages at the start of a direct read
 also, else we can hit a delalloc-extents-via-direct-io BUG.

SGI-PV: 949916
SGI-Modid: xfs-linux-melb:xfs-kern:25483a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ba07b83f988..2a936bd38ce 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -270,6 +270,10 @@ xfs_read(
 		}
 	}
 
+	if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(*offset)),
+						-1, FI_REMAPF_LOCKED);
+
 	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
 				(void *)iovp, segs, *offset, ioflags);
 	ret = __generic_file_aio_read(iocb, iovp, segs, offset);
-- 
cgit v1.2.3


From 70e73f59755867383edf563d5a5cbea614c0fd49 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:26:52 +1100
Subject: [XFS] endianess annotations for xfs_dir2_data_hdr structure.

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25484a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |   2 +-
 fs/xfs/xfs_dir2_block.c |  19 ++++----
 fs/xfs/xfs_dir2_data.c  | 123 +++++++++++++++++++++++++-----------------------
 fs/xfs/xfs_dir2_data.h  |   6 +--
 fs/xfs/xfs_dir2_leaf.c  |  24 +++++-----
 fs/xfs/xfs_dir2_node.c  |  18 +++----
 6 files changed, 99 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 473671fa5c1..96517d29e22 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2204,7 +2204,7 @@ xfs_da_do_buf(
 		data = rbp->data;
 		free = rbp->data;
 		magic = INT_GET(info->magic, ARCH_CONVERT);
-		magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
+		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
 				   (magic != XFS_DIR_LEAF_MAGIC) &&
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 31bc99faa70..35a03f29b2f 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -100,8 +100,7 @@ xfs_dir2_block_addname(
 	/*
 	 * Check the magic number, corrupted if wrong.
 	 */
-	if (unlikely(INT_GET(block->hdr.magic, ARCH_CONVERT)
-						!= XFS_DIR2_BLOCK_MAGIC)) {
+	if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
 				     XFS_ERRLEVEL_LOW, mp, block);
 		xfs_da_brelse(tp, bp);
@@ -138,7 +137,7 @@ xfs_dir2_block_addname(
 		 */
 		else {
 			dup = (xfs_dir2_data_unused_t *)
-			      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+			      ((char *)block + be16_to_cpu(bf[0].offset));
 			if (dup == enddup) {
 				/*
 				 * It is the biggest freespace, is it too small
@@ -149,10 +148,10 @@ xfs_dir2_block_addname(
 					 * Yes, we use the second-largest
 					 * entry instead if it works.
 					 */
-					if (INT_GET(bf[1].length, ARCH_CONVERT) >= len)
+					if (be16_to_cpu(bf[1].length) >= len)
 						dup = (xfs_dir2_data_unused_t *)
 						      ((char *)block +
-						       INT_GET(bf[1].offset, ARCH_CONVERT));
+						       be16_to_cpu(bf[1].offset));
 					else
 						dup = NULL;
 				}
@@ -172,9 +171,9 @@ xfs_dir2_block_addname(
 	 * If there are stale entries we'll use one for the leaf.
 	 * Is the biggest entry enough to avoid compaction?
 	 */
-	else if (INT_GET(bf[0].length, ARCH_CONVERT) >= len) {
+	else if (be16_to_cpu(bf[0].length) >= len) {
 		dup = (xfs_dir2_data_unused_t *)
-		      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+		      ((char *)block + be16_to_cpu(bf[0].offset));
 		compact = 0;
 	}
 	/*
@@ -935,7 +934,7 @@ xfs_dir2_leaf_to_block(
 		goto out;
 	}
 	block = dbp->data;
-	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 	/*
 	 * Size of the "leaf" area in the block.
 	 */
@@ -956,7 +955,7 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Start converting it to block form.
 	 */
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	needlog = 1;
 	needscan = 0;
 	/*
@@ -1095,7 +1094,7 @@ xfs_dir2_sf_to_block(
 		return error;
 	}
 	block = bp->data;
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Compute size of block "tail" area.
 	 */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 5b7c47e2f14..fd6b7c03018 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -70,11 +70,11 @@ xfs_dir2_data_check(
 
 	mp = dp->i_mount;
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	bf = d->hdr.bestfree;
 	p = (char *)d->u;
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		lep = XFS_DIR2_BLOCK_LEAF_P(btp);
 		endp = (char *)lep;
@@ -96,8 +96,8 @@ xfs_dir2_data_check(
 		ASSERT(!bf[2].offset);
 		freeseen |= 1 << 2;
 	}
-	ASSERT(INT_GET(bf[0].length, ARCH_CONVERT) >= INT_GET(bf[1].length, ARCH_CONVERT));
-	ASSERT(INT_GET(bf[1].length, ARCH_CONVERT) >= INT_GET(bf[2].length, ARCH_CONVERT));
+	ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length));
+	ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length));
 	/*
 	 * Loop over the data/unused entries.
 	 */
@@ -117,8 +117,10 @@ xfs_dir2_data_check(
 				i = (int)(dfp - bf);
 				ASSERT((freeseen & (1 << i)) == 0);
 				freeseen |= 1 << i;
-			} else
-				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(bf[2].length, ARCH_CONVERT));
+			} else {
+				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <=
+				       be16_to_cpu(bf[2].length));
+			}
 			p += INT_GET(dup->length, ARCH_CONVERT);
 			lastfree = 1;
 			continue;
@@ -136,7 +138,7 @@ xfs_dir2_data_check(
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
-		if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 			addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
@@ -154,7 +156,7 @@ xfs_dir2_data_check(
 	 * Need to have seen all the entries and all the bestfree slots.
 	 */
 	ASSERT(freeseen == 7);
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
 			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
@@ -190,8 +192,8 @@ xfs_dir2_data_freefind(
 	 * Check order, non-overlapping entries, and if we find the
 	 * one we're looking for it has to be exact.
 	 */
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
 	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
 	     dfp++) {
@@ -201,23 +203,24 @@ xfs_dir2_data_freefind(
 			continue;
 		}
 		ASSERT(seenzero == 0);
-		if (INT_GET(dfp->offset, ARCH_CONVERT) == off) {
+		if (be16_to_cpu(dfp->offset) == off) {
 			matched = 1;
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(dup->length, ARCH_CONVERT));
-		} else if (off < INT_GET(dfp->offset, ARCH_CONVERT))
-			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(dfp->offset, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(dup->length, ARCH_CONVERT));
+		} else if (off < be16_to_cpu(dfp->offset))
+			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= be16_to_cpu(dfp->offset));
 		else
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) + INT_GET(dfp->length, ARCH_CONVERT) <= off);
-		ASSERT(matched || INT_GET(dfp->length, ARCH_CONVERT) >= INT_GET(dup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
+		ASSERT(matched || be16_to_cpu(dfp->length) >= INT_GET(dup->length, ARCH_CONVERT));
 		if (dfp > &d->hdr.bestfree[0])
-			ASSERT(INT_GET(dfp[-1].length, ARCH_CONVERT) >= INT_GET(dfp[0].length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
 	}
 #endif
 	/*
 	 * If this is smaller than the smallest bestfree entry,
 	 * it can't be there since they're sorted.
 	 */
-	if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+	if (INT_GET(dup->length, ARCH_CONVERT) <
+	    be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
 		return NULL;
 	/*
 	 * Look at the three bestfree entries for our guy.
@@ -227,7 +230,7 @@ xfs_dir2_data_freefind(
 	     dfp++) {
 		if (!dfp->offset)
 			return NULL;
-		if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+		if (be16_to_cpu(dfp->offset) == off)
 			return dfp;
 	}
 	/*
@@ -249,29 +252,29 @@ xfs_dir2_data_freeinsert(
 	xfs_dir2_data_free_t	new;		/* new bestfree entry */
 
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	dfp = d->hdr.bestfree;
-	INT_COPY(new.length, dup->length, ARCH_CONVERT);
-	INT_SET(new.offset, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	new.length = dup->length;
+	new.offset = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Insert at position 0, 1, or 2; or not at all.
 	 */
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[0].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
 		dfp[2] = dfp[1];
 		dfp[1] = dfp[0];
 		dfp[0] = new;
 		*loghead = 1;
 		return &dfp[0];
 	}
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[1].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
 		dfp[2] = dfp[1];
 		dfp[1] = new;
 		*loghead = 1;
 		return &dfp[1];
 	}
-	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[2].length, ARCH_CONVERT)) {
+	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
 		dfp[2] = new;
 		*loghead = 1;
 		return &dfp[2];
@@ -289,8 +292,8 @@ xfs_dir2_data_freeremove(
 	int			*loghead)	/* out: log data header */
 {
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * It's the first entry, slide the next 2 up.
@@ -334,8 +337,8 @@ xfs_dir2_data_freescan(
 	char			*p;		/* current entry pointer */
 
 #ifdef __KERNEL__
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 #endif
 	/*
 	 * Start by clearing the table.
@@ -348,7 +351,7 @@ xfs_dir2_data_freescan(
 	p = (char *)d->u;
 	if (aendp)
 		endp = aendp;
-	else if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+	else if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
 	} else
@@ -415,8 +418,8 @@ xfs_dir2_data_init(
 	 * Initialize the header.
 	 */
 	d = bp->data;
-	INT_SET(d->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
-	INT_SET(d->hdr.bestfree[0].offset, ARCH_CONVERT, (xfs_dir2_data_off_t)sizeof(d->hdr));
+	d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+	d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
 		d->hdr.bestfree[i].length = 0;
 		d->hdr.bestfree[i].offset = 0;
@@ -428,7 +431,7 @@ xfs_dir2_data_init(
 	INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
 
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
-	INT_SET(d->hdr.bestfree[0].length, ARCH_CONVERT, t);
+	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	INT_SET(dup->length, ARCH_CONVERT, t);
 	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
@@ -453,8 +456,8 @@ xfs_dir2_data_log_entry(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
 		(uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
 		       (char *)d - 1));
@@ -471,8 +474,8 @@ xfs_dir2_data_log_header(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
 		(uint)(sizeof(d->hdr) - 1));
 }
@@ -489,8 +492,8 @@ xfs_dir2_data_log_unused(
 	xfs_dir2_data_t		*d;		/* data block pointer */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	/*
 	 * Log the first part of the unused entry.
 	 */
@@ -533,12 +536,12 @@ xfs_dir2_data_make_free(
 	/*
 	 * Figure out where the end of the data area is.
 	 */
-	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC)
+	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
 		endptr = (char *)d + mp->m_dirblksize;
 	else {
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
-		ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+		ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
 		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
 	}
@@ -586,7 +589,7 @@ xfs_dir2_data_make_free(
 		 * since the third bestfree is there, there might be more
 		 * entries.
 		 */
-		needscan = d->hdr.bestfree[2].length;
+		needscan = (d->hdr.bestfree[2].length != 0);
 		/*
 		 * Fix up the new big freespace.
 		 */
@@ -614,7 +617,7 @@ xfs_dir2_data_make_free(
 			 */
 			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
 			ASSERT(dfp == &d->hdr.bestfree[0]);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(prevdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(prevdup->length, ARCH_CONVERT));
 			ASSERT(!dfp[1].length);
 			ASSERT(!dfp[2].length);
 		}
@@ -640,8 +643,10 @@ xfs_dir2_data_make_free(
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
-		else
-			needscan = INT_GET(prevdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+		else {
+			needscan = INT_GET(prevdup->length, ARCH_CONVERT) >
+				   be16_to_cpu(d->hdr.bestfree[2].length);
+		}
 	}
 	/*
 	 * The following entry is free, merge with it.
@@ -666,8 +671,10 @@ xfs_dir2_data_make_free(
 		/*
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
-		else
-			needscan = INT_GET(newdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+		else {
+			needscan = INT_GET(newdup->length, ARCH_CONVERT) >
+				   be16_to_cpu(d->hdr.bestfree[2].length);
+		}
 	}
 	/*
 	 * Neither neighbor is free.  Make a new entry.
@@ -707,8 +714,8 @@ xfs_dir2_data_use_free(
 	int			oldlen;		/* old unused entry's length */
 
 	d = bp->data;
-	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
-	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
@@ -718,7 +725,7 @@ xfs_dir2_data_use_free(
 	 */
 	dfp = xfs_dir2_data_freefind(d, dup);
 	oldlen = INT_GET(dup->length, ARCH_CONVERT);
-	ASSERT(dfp || oldlen <= INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT));
+	ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
 	/*
 	 * Check for alignment with front and back of the entry.
 	 */
@@ -732,7 +739,7 @@ xfs_dir2_data_use_free(
 	 */
 	if (matchfront && matchback) {
 		if (dfp) {
-			needscan = d->hdr.bestfree[2].offset;
+			needscan = (d->hdr.bestfree[2].offset != 0);
 			if (!needscan)
 				xfs_dir2_data_freeremove(d, dfp, needlogp);
 		}
@@ -755,8 +762,8 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -783,8 +790,8 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
-			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -819,7 +826,7 @@ xfs_dir2_data_use_free(
 		 * the 2 new will work.
 		 */
 		if (dfp) {
-			needscan = d->hdr.bestfree[2].length;
+			needscan = (d->hdr.bestfree[2].length != 0);
 			if (!needscan) {
 				xfs_dir2_data_freeremove(d, dfp, needlogp);
 				(void)xfs_dir2_data_freeinsert(d, newdup,
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 5e3a7f9ec73..dd8b8652363 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -65,8 +65,8 @@ struct xfs_trans;
  * The freespace will be formatted as a xfs_dir2_data_unused_t.
  */
 typedef struct xfs_dir2_data_free {
-	xfs_dir2_data_off_t	offset;		/* start of freespace */
-	xfs_dir2_data_off_t	length;		/* length of freespace */
+	__be16			offset;		/* start of freespace */
+	__be16			length;		/* length of freespace */
 } xfs_dir2_data_free_t;
 
 /*
@@ -75,7 +75,7 @@ typedef struct xfs_dir2_data_free {
  * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
  */
 typedef struct xfs_dir2_data_hdr {
-	__uint32_t		magic;		/* XFS_DIR2_DATA_MAGIC */
+	__be32			magic;		/* XFS_DIR2_DATA_MAGIC */
 						/* or XFS_DIR2_BLOCK_MAGIC */
 	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
 } xfs_dir2_data_hdr_t;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e4e07c8f7a7..8d7f154d190 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -133,7 +133,7 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Fix up the block header, make it a data block.
 	 */
-	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+	block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
 	if (needscan)
 		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
 			NULL);
@@ -143,7 +143,7 @@ xfs_dir2_block_to_leaf(
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT);
+	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
 	 * Log the data header and leaf bests table.
 	 */
@@ -372,7 +372,7 @@ xfs_dir2_leaf_addname(
 		else
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 		data = dbp->data;
-		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		grown = 1;
 	}
 	/*
@@ -394,7 +394,7 @@ xfs_dir2_leaf_addname(
 	 * Point to the biggest freespace in our data block.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
 	needscan = needlog = 0;
 	/*
@@ -427,8 +427,8 @@ xfs_dir2_leaf_addname(
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
 	 */
-	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
-		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		bestsp[use_block] = data->hdr.bestfree[0].length;
 		if (!grown)
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 	}
@@ -1477,7 +1477,7 @@ xfs_dir2_leaf_removename(
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
 	needscan = needlog = 0;
-	oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
 	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
@@ -1506,15 +1506,15 @@ xfs_dir2_leaf_removename(
 	 * If the longest freespace in the data block has changed,
 	 * put the new value in the bests table and log that.
 	 */
-	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) {
-		INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
+		bestsp[db] = data->hdr.bestfree[0].length;
 		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
 	}
 	xfs_dir2_data_check(dp, dbp);
 	/*
 	 * If the data block is now empty then get rid of the data block.
 	 */
-	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	if (be16_to_cpu(data->hdr.bestfree[0].length) ==
 	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
 		ASSERT(db != mp->m_dirdatablk);
 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
@@ -1708,7 +1708,7 @@ xfs_dir2_leaf_trim_data(
 	}
 #ifdef DEBUG
 	data = dbp->data;
-	ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 #endif
 	/* this seems to be an error
 	 * data is only valid if DEBUG is defined?
@@ -1717,7 +1717,7 @@ xfs_dir2_leaf_trim_data(
 
 	leaf = lbp->data;
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
 	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 	/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 641f8633d25..e47bde74eec 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -894,7 +894,7 @@ xfs_dir2_leafn_remove(
 	dbp = dblk->bp;
 	data = dbp->data;
 	dep = (xfs_dir2_data_entry_t *)((char *)data + off);
-	longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	longest = be16_to_cpu(data->hdr.bestfree[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
 		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
@@ -911,7 +911,7 @@ xfs_dir2_leafn_remove(
 	 * If the longest data block freespace changes, need to update
 	 * the corresponding freeblock entry.
 	 */
-	if (longest < INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+	if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
 		int		error;		/* error return value */
 		xfs_dabuf_t	*fbp;		/* freeblock buffer */
 		xfs_dir2_db_t	fdb;		/* freeblock block number */
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
 		 * Calculate which entry we need to fix.
 		 */
 		findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
-		longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+		longest = be16_to_cpu(data->hdr.bestfree[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
 		 * (usually).
@@ -1649,7 +1649,7 @@ xfs_dir2_node_addname_int(
 		 * change again.
 		 */
 		data = dbp->data;
-		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1677,12 +1677,12 @@ xfs_dir2_node_addname_int(
 		data = dbp->data;
 		logfree = 0;
 	}
-	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) >= length);
+	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
 	/*
 	 * Point to the existing unused space.
 	 */
 	dup = (xfs_dir2_data_unused_t *)
-	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
 	needscan = needlog = 0;
 	/*
 	 * Mark the first part of the unused space, inuse for us.
@@ -1713,8 +1713,8 @@ xfs_dir2_node_addname_int(
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
-	if (INT_GET(free->bests[findex], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
-		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+	if (INT_GET(free->bests[findex], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
 	/*
@@ -1900,7 +1900,7 @@ xfs_dir2_node_replace(
 		 * Point to the data entry.
 		 */
 		data = state->extrablk.bp->data;
-		ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
 		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
-- 
cgit v1.2.3


From 0ba962ef7128d9276b8f95196382d5b9e2ad841d Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:07 +1100
Subject: [XFS] endianess annotations for xfs_dir2_free_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25485a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c  |  2 +-
 fs/xfs/xfs_dir2_leaf.c |  6 ++--
 fs/xfs/xfs_dir2_node.c | 90 +++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_node.h | 10 +++---
 4 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 96517d29e22..2f4acb9d9b9 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2213,7 +2213,7 @@ xfs_da_do_buf(
 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-				   (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
+				   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
 				mp, XFS_ERRTAG_DA_READ_BUF,
 				XFS_RANDOM_DA_READ_BUF))) {
 			xfs_buftrace("DA READ ERROR", rbp->bps[0]);
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 8d7f154d190..bd425925ee2 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1816,7 +1816,7 @@ xfs_dir2_node_to_leaf(
 		return error;
 	}
 	free = fbp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	ASSERT(!free->hdr.firstdb);
 	/*
 	 * Now see if the leafn and free data will fit in a leaf1.
@@ -1824,7 +1824,7 @@ xfs_dir2_node_to_leaf(
 	 */
 	if ((uint)sizeof(leaf->hdr) +
 	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
-	    INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) +
+	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
 	    (uint)sizeof(leaf->tail) >
 	    mp->m_dirblksize) {
 		xfs_da_brelse(tp, fbp);
@@ -1843,7 +1843,7 @@ xfs_dir2_node_to_leaf(
 	 * Set up the leaf tail from the freespace block.
 	 */
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT);
+	ltp->bestcount = free->hdr.nvalid;
 	/*
 	 * Set up the leaf bests table.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index e47bde74eec..b051e2a09f3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -76,7 +76,7 @@ xfs_dir2_free_log_bests(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp,
 		(uint)((char *)&free->bests[first] - (char *)free),
 		(uint)((char *)&free->bests[last] - (char *)free +
@@ -94,7 +94,7 @@ xfs_dir2_free_log_header(
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
 		(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
 }
@@ -121,7 +121,7 @@ xfs_dir2_leaf_to_node(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			n;		/* count of live freespc ents */
 	xfs_dir2_data_off_t	off;		/* freespace entry value */
-	xfs_dir2_data_off_t	*to;		/* pointer to freespace entry */
+	__be16			*to;		/* pointer to freespace entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	xfs_dir2_trace_args_b("leaf_to_node", args, lbp);
@@ -149,10 +149,10 @@ xfs_dir2_leaf_to_node(
 	/*
 	 * Initialize the freespace block header.
 	 */
-	INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+	free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 	free->hdr.firstdb = 0;
 	ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
-	INT_COPY(free->hdr.nvalid, ltp->bestcount, ARCH_CONVERT);
+	free->hdr.nvalid = ltp->bestcount;
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
@@ -161,16 +161,16 @@ xfs_dir2_leaf_to_node(
 	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
 		if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
 			n++;
-		INT_SET(*to, ARCH_CONVERT, off);
+		*to = cpu_to_be16(off);
 	}
-	INT_SET(free->hdr.nused, ARCH_CONVERT, n);
+	free->hdr.nused = cpu_to_be32(n);
 	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Log everything.
 	 */
 	xfs_dir2_leaf_log_header(tp, lbp);
 	xfs_dir2_free_log_header(tp, fbp);
-	xfs_dir2_free_log_bests(tp, fbp, 0, INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1);
+	xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
 	xfs_da_buf_done(fbp);
 	xfs_dir2_leafn_check(dp, lbp);
 	return 0;
@@ -443,7 +443,7 @@ xfs_dir2_leafn_lookup_int(
 		curdb = -1;
 		length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
 		if ((free = (curbp ? curbp->data : NULL)))
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
 	/*
 	 * For others, it's a data block buffer, get the block number.
@@ -506,15 +506,15 @@ xfs_dir2_leafn_lookup_int(
 					}
 					curfdb = newfdb;
 					free = curbp->data;
-					ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) ==
+					ASSERT(be32_to_cpu(free->hdr.magic) ==
 					       XFS_DIR2_FREE_MAGIC);
-					ASSERT((INT_GET(free->hdr.firstdb, ARCH_CONVERT) %
+					ASSERT((be32_to_cpu(free->hdr.firstdb) %
 						XFS_DIR2_MAX_FREE_BESTS(mp)) ==
 					       0);
-					ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) <= curdb);
+					ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
 					ASSERT(curdb <
-					       INT_GET(free->hdr.firstdb, ARCH_CONVERT) +
-					       INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+					       be32_to_cpu(free->hdr.firstdb) +
+					       be32_to_cpu(free->hdr.nvalid));
 				}
 				/*
 				 * Get the index for our entry.
@@ -523,12 +523,12 @@ xfs_dir2_leafn_lookup_int(
 				/*
 				 * If it has room, return it.
 				 */
-				if (unlikely(INT_GET(free->bests[fi], ARCH_CONVERT) == NULLDATAOFF)) {
+				if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
 					XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
 							 XFS_ERRLEVEL_LOW, mp);
 					return XFS_ERROR(EFSCORRUPTED);
 				}
-				if (INT_GET(free->bests[fi], ARCH_CONVERT) >= length) {
+				if (be16_to_cpu(free->bests[fi]) >= length) {
 					*indexp = index;
 					state->extravalid = 1;
 					state->extrablk.bp = curbp;
@@ -929,8 +929,8 @@ xfs_dir2_leafn_remove(
 			return error;
 		}
 		free = fbp->data;
-		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
-		ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) ==
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+		ASSERT(be32_to_cpu(free->hdr.firstdb) ==
 		       XFS_DIR2_MAX_FREE_BESTS(mp) *
 		       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
 		/*
@@ -969,7 +969,7 @@ xfs_dir2_leafn_remove(
 			/*
 			 * One less used entry in the free table.
 			 */
-			INT_MOD(free->hdr.nused, ARCH_CONVERT, -1);
+			free->hdr.nused = cpu_to_be32(-1);
 			xfs_dir2_free_log_header(tp, fbp);
 			/*
 			 * If this was the last entry in the table, we can
@@ -977,21 +977,21 @@ xfs_dir2_leafn_remove(
 			 * entries at the end referring to non-existent
 			 * data blocks, get those too.
 			 */
-			if (findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1) {
+			if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
 				int	i;		/* free entry index */
 
 				for (i = findex - 1;
-				     i >= 0 && INT_GET(free->bests[i], ARCH_CONVERT) == NULLDATAOFF;
+				     i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
 				     i--)
 					continue;
-				INT_SET(free->hdr.nvalid, ARCH_CONVERT, i + 1);
+				free->hdr.nvalid = cpu_to_be32(i + 1);
 				logfree = 0;
 			}
 			/*
 			 * Not the last entry, just punch it out.
 			 */
 			else {
-				INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+				free->bests[findex] = cpu_to_be16(NULLDATAOFF);
 				logfree = 1;
 			}
 			/*
@@ -1017,7 +1017,7 @@ xfs_dir2_leafn_remove(
 		 * the new value.
 		 */
 		else {
-			INT_SET(free->bests[findex], ARCH_CONVERT, longest);
+			free->bests[findex] = cpu_to_be16(longest);
 			logfree = 1;
 		}
 		/*
@@ -1397,7 +1397,7 @@ xfs_dir2_node_addname_int(
 		 */
 		ifbno = fblk->blkno;
 		free = fbp->data;
-		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		findex = fblk->index;
 		/*
 		 * This means the free entry showed that the data block had
@@ -1405,10 +1405,10 @@ xfs_dir2_node_addname_int(
 		 * Use that data block.
 		 */
 		if (findex >= 0) {
-			ASSERT(findex < INT_GET(free->hdr.nvalid, ARCH_CONVERT));
-			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF);
-			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) >= length);
-			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+			ASSERT(findex < be32_to_cpu(free->hdr.nvalid));
+			ASSERT(be16_to_cpu(free->bests[findex]) != NULLDATAOFF);
+			ASSERT(be16_to_cpu(free->bests[findex]) >= length);
+			dbno = be32_to_cpu(free->hdr.firstdb) + findex;
 		}
 		/*
 		 * The data block looked at didn't have enough room.
@@ -1481,20 +1481,20 @@ xfs_dir2_node_addname_int(
 				continue;
 			}
 			free = fbp->data;
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 			findex = 0;
 		}
 		/*
 		 * Look at the current free entry.  Is it good enough?
 		 */
-		if (INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF &&
-		    INT_GET(free->bests[findex], ARCH_CONVERT) >= length)
-			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+		if (be16_to_cpu(free->bests[findex]) != NULLDATAOFF &&
+		    be16_to_cpu(free->bests[findex]) >= length)
+			dbno = be32_to_cpu(free->hdr.firstdb) + findex;
 		else {
 			/*
 			 * Are we done with the freeblock?
 			 */
-			if (++findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+			if (++findex == be32_to_cpu(free->hdr.nvalid)) {
 				/*
 				 * Drop the block.
 				 */
@@ -1608,15 +1608,15 @@ xfs_dir2_node_addname_int(
 			 * its first slot as our empty slot.
 			 */
 			free = fbp->data;
-			INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
-			INT_SET(free->hdr.firstdb, ARCH_CONVERT,
+			free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
+			free->hdr.firstdb = cpu_to_be32(
 				(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
 				XFS_DIR2_MAX_FREE_BESTS(mp));
 			free->hdr.nvalid = 0;
 			free->hdr.nused = 0;
 		} else {
 			free = fbp->data;
-			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 		}
 
 		/*
@@ -1627,20 +1627,20 @@ xfs_dir2_node_addname_int(
 		 * If it's after the end of the current entries in the
 		 * freespace block, extend that table.
 		 */
-		if (findex >= INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+		if (findex >= be32_to_cpu(free->hdr.nvalid)) {
 			ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
-			INT_SET(free->hdr.nvalid, ARCH_CONVERT, findex + 1);
+			free->hdr.nvalid = cpu_to_be32(findex + 1);
 			/*
 			 * Tag new entry so nused will go up.
 			 */
-			INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+			free->bests[findex] = cpu_to_be16(NULLDATAOFF);
 		}
 		/*
 		 * If this entry was for an empty data block
 		 * (this should always be true) then update the header.
 		 */
-		if (INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
-			INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+		if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+			be32_add(&free->hdr.nused, 1);
 			xfs_dir2_free_log_header(tp, fbp);
 		}
 		/*
@@ -1713,7 +1713,7 @@ xfs_dir2_node_addname_int(
 	/*
 	 * If the freespace entry is now wrong, update it.
 	 */
-	if (INT_GET(free->bests[findex], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+	if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
 		free->bests[findex] = data->hdr.bestfree[0].length;
 		logfree = 1;
 	}
@@ -1966,11 +1966,11 @@ xfs_dir2_node_trim_free(
 		return 0;
 	}
 	free = bp->data;
-	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	/*
 	 * If there are used entries, there's nothing to do.
 	 */
-	if (INT_GET(free->hdr.nused, ARCH_CONVERT) > 0) {
+	if (be32_to_cpu(free->hdr.nused) > 0) {
 		xfs_da_brelse(tp, bp);
 		*rvalp = 0;
 		return 0;
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index 0ab8fbd5951..c7c870ee785 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -41,15 +41,15 @@ struct xfs_trans;
 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
 
 typedef	struct xfs_dir2_free_hdr {
-	__uint32_t		magic;		/* XFS_DIR2_FREE_MAGIC */
-	__int32_t		firstdb;	/* db of first entry */
-	__int32_t		nvalid;		/* count of valid entries */
-	__int32_t		nused;		/* count of used entries */
+	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
+	__be32			firstdb;	/* db of first entry */
+	__be32			nvalid;		/* count of valid entries */
+	__be32			nused;		/* count of used entries */
 } xfs_dir2_free_hdr_t;
 
 typedef struct xfs_dir2_free {
 	xfs_dir2_free_hdr_t	hdr;		/* block header */
-	xfs_dir2_data_off_t	bests[1];	/* best free counts */
+	__be16			bests[1];	/* best free counts */
 						/* unused entries are -1 */
 } xfs_dir2_free_t;
 
-- 
cgit v1.2.3


From 68b3a1024a7cda4afaacb3d25e6ac234ddfc0834 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:19 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_LEAF_BESTS_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25486a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  4 ++--
 fs/xfs/xfs_dir2_leaf.c  | 30 +++++++++++++++---------------
 fs/xfs/xfs_dir2_leaf.h  |  5 ++---
 fs/xfs/xfs_dir2_node.c  |  4 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 35a03f29b2f..e9d298f06ca 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -880,7 +880,7 @@ xfs_dir2_leaf_to_block(
 	xfs_dabuf_t		*lbp,		/* leaf buffer */
 	xfs_dabuf_t		*dbp)		/* data buffer */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+	__be16			*bestsp;	/* leaf bests table */
 	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_block_tail_t	*btp;		/* block tail */
 	xfs_inode_t		*dp;		/* incore directory inode */
@@ -914,7 +914,7 @@ xfs_dir2_leaf_to_block(
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
 		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-		if (INT_GET(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1], ARCH_CONVERT) ==
+		if (be16_to_cpu(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bd425925ee2..568e0feca03 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -66,7 +66,7 @@ xfs_dir2_block_to_leaf(
 	xfs_da_args_t		*args,		/* operation arguments */
 	xfs_dabuf_t		*dbp)		/* input block's buffer */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf's bestsp entries */
+	__be16			*bestsp;	/* leaf's bestsp entries */
 	xfs_dablk_t		blkno;		/* leaf block's bno */
 	xfs_dir2_block_t	*block;		/* block structure */
 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
@@ -163,7 +163,7 @@ int						/* error */
 xfs_dir2_leaf_addname(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* freespace table in leaf */
+	__be16			*bestsp;	/* freespace table in leaf */
 	int			compact;	/* need to compact leaves */
 	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
@@ -228,8 +228,8 @@ xfs_dir2_leaf_addname(
 			continue;
 		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
 		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
-		ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF);
-		if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+		if (be16_to_cpu(bestsp[i]) >= length) {
 			use_block = i;
 			break;
 		}
@@ -242,9 +242,9 @@ xfs_dir2_leaf_addname(
 			/*
 			 * Remember a block we see that's missing.
 			 */
-			if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1)
+			if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
 				use_block = i;
-			else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+			else if (be16_to_cpu(bestsp[i]) >= length) {
 				use_block = i;
 				break;
 			}
@@ -260,7 +260,7 @@ xfs_dir2_leaf_addname(
 	 * Now kill use_block if it refers to a missing block, so we
 	 * can use it as an indication of allocation needed.
 	 */
-	if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF)
+	if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
 		use_block = -1;
 	/*
 	 * If we don't have enough free bytes but we can make enough
@@ -427,7 +427,7 @@ xfs_dir2_leaf_addname(
 	 * If the bests table needs to be changed, do it.
 	 * Log the change unless we've already done that.
 	 */
-	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
 		bestsp[use_block] = data->hdr.bestfree[0].length;
 		if (!grown)
 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
@@ -1203,8 +1203,8 @@ xfs_dir2_leaf_log_bests(
 	int			first,		/* first entry to log */
 	int			last)		/* last entry to log */
 {
-	xfs_dir2_data_off_t	*firstb;	/* pointer to first entry */
-	xfs_dir2_data_off_t	*lastb;		/* pointer to last entry */
+	__be16			*firstb;	/* pointer to first entry */
+	__be16			*lastb;		/* pointer to last entry */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
@@ -1437,7 +1437,7 @@ int						/* error */
 xfs_dir2_leaf_removename(
 	xfs_da_args_t		*args)		/* operation arguments */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf block best freespace */
+	__be16			*bestsp;	/* leaf block best freespace */
 	xfs_dir2_data_t		*data;		/* data block structure */
 	xfs_dir2_db_t		db;		/* data block number */
 	xfs_dabuf_t		*dbp;		/* data block buffer */
@@ -1480,7 +1480,7 @@ xfs_dir2_leaf_removename(
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
+	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
 	/*
 	 * Mark the former data entry unused.
 	 */
@@ -1542,7 +1542,7 @@ xfs_dir2_leaf_removename(
 			 * Look for the last active entry (i).
 			 */
 			for (i = db - 1; i > 0; i--) {
-				if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF)
+				if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
 					break;
 			}
 			/*
@@ -1555,7 +1555,7 @@ xfs_dir2_leaf_removename(
 			xfs_dir2_leaf_log_tail(tp, lbp);
 			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
 		} else
-			INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF);
+			bestsp[db] = cpu_to_be16(NULLDATAOFF);
 	}
 	/*
 	 * If the data block was not the first one, drop it.
@@ -1684,7 +1684,7 @@ xfs_dir2_leaf_trim_data(
 	xfs_dabuf_t		*lbp,		/* leaf buffer */
 	xfs_dir2_db_t		db)		/* data block number */
 {
-	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+	__be16			*bestsp;	/* leaf bests table */
 #ifdef DEBUG
 	xfs_dir2_data_t		*data;		/* data block structure */
 #endif
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 1393993d61e..7fb32e954ce 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -105,11 +105,10 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
  * Get address of the bests array in the single-leaf block.
  */
 #define	XFS_DIR2_LEAF_BESTS_P(ltp)	xfs_dir2_leaf_bests_p(ltp)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
-	return (xfs_dir2_data_off_t *)
-		(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
+	return (__be16 *)(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index b051e2a09f3..ddbe453fade 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -114,7 +114,7 @@ xfs_dir2_leaf_to_node(
 	xfs_dabuf_t		*fbp;		/* freespace buffer */
 	xfs_dir2_db_t		fdb;		/* freespace block number */
 	xfs_dir2_free_t		*free;		/* freespace structure */
-	xfs_dir2_data_off_t	*from;		/* pointer to freespace entry */
+	__be16			*from;		/* pointer to freespace entry */
 	int			i;		/* leaf freespace index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
@@ -159,7 +159,7 @@ xfs_dir2_leaf_to_node(
 	 */
 	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
 	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
-		if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
+		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
 		*to = cpu_to_be16(off);
 	}
-- 
cgit v1.2.3


From afbcb3f91903bcd34d470efe64b3738257178667 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:28 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_tail_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25487a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  4 ++--
 fs/xfs/xfs_dir2_leaf.c  | 34 +++++++++++++++++-----------------
 fs/xfs/xfs_dir2_leaf.h  |  4 ++--
 fs/xfs/xfs_dir2_node.c  |  4 ++--
 4 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index e9d298f06ca..f70640c6b70 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -914,11 +914,11 @@ xfs_dir2_leaf_to_block(
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
 		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-		if (be16_to_cpu(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1]) ==
+		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
 			    xfs_dir2_leaf_trim_data(args, lbp,
-				    (xfs_dir2_db_t)(INT_GET(ltp->bestcount, ARCH_CONVERT) - 1))))
+				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
 				goto out;
 		} else {
 			error = 0;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 568e0feca03..9abecd207c4 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -141,7 +141,7 @@ xfs_dir2_block_to_leaf(
 	 * Set up leaf tail and bests table.
 	 */
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
+	ltp->bestcount = cpu_to_be32(1);
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
 	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
@@ -227,7 +227,7 @@ xfs_dir2_leaf_addname(
 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
-		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
+		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
 			use_block = i;
@@ -238,7 +238,7 @@ xfs_dir2_leaf_addname(
 	 * Didn't find a block yet, linear search all the data blocks.
 	 */
 	if (use_block == -1) {
-		for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
 			/*
 			 * Remember a block we see that's missing.
 			 */
@@ -358,13 +358,13 @@ xfs_dir2_leaf_addname(
 		 * If we're adding a new data block on the end we need to
 		 * extend the bests table.  Copy it up one entry.
 		 */
-		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
+		if (use_block >= be32_to_cpu(ltp->bestcount)) {
 			bestsp--;
 			memmove(&bestsp[0], &bestsp[1],
-				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
-			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
+				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
+			be32_add(&ltp->bestcount, 1);
 			xfs_dir2_leaf_log_tail(tp, lbp);
-			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		}
 		/*
 		 * If we're filling in a previously empty block just log it.
@@ -1537,7 +1537,7 @@ xfs_dir2_leaf_removename(
 		 * If this is the last data block then compact the
 		 * bests table by getting rid of entries.
 		 */
-		if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) {
+		if (db == be32_to_cpu(ltp->bestcount) - 1) {
 			/*
 			 * Look for the last active entry (i).
 			 */
@@ -1550,10 +1550,10 @@ xfs_dir2_leaf_removename(
 			 * end are removed.
 			 */
 			memmove(&bestsp[db - i], bestsp,
-				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
-			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
+				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
+			be32_add(&ltp->bestcount, -(db - i));
 			xfs_dir2_leaf_log_tail(tp, lbp);
-			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		} else
 			bestsp[db] = cpu_to_be16(NULLDATAOFF);
 	}
@@ -1719,7 +1719,7 @@ xfs_dir2_leaf_trim_data(
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
-	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
 	/*
 	 * Get rid of the data block.
 	 */
@@ -1732,10 +1732,10 @@ xfs_dir2_leaf_trim_data(
 	 * Eliminate the last bests entry from the table.
 	 */
 	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
-	memmove(&bestsp[1], &bestsp[0], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+	be32_add(&ltp->bestcount, -1);
+	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
-	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	return 0;
 }
 
@@ -1848,8 +1848,8 @@ xfs_dir2_node_to_leaf(
 	 * Set up the leaf bests table.
 	 */
 	memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests,
-		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
-	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_check(dp, lbp);
 	/*
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 7fb32e954ce..fcd3b7dea0f 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -62,7 +62,7 @@ typedef struct xfs_dir2_leaf_entry {
  * Leaf block tail.
  */
 typedef struct xfs_dir2_leaf_tail {
-	__uint32_t		bestcount;
+	__be32			bestcount;
 } xfs_dir2_leaf_tail_t;
 
 /*
@@ -108,7 +108,7 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
-	return (__be16 *)(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
+	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ddbe453fade..c32894cb622 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -151,14 +151,14 @@ xfs_dir2_leaf_to_node(
 	 */
 	free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
 	free->hdr.firstdb = 0;
-	ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
+	ASSERT(be32_to_cpu(ltp->bestcount) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
 	free->hdr.nvalid = ltp->bestcount;
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
 	 */
 	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
-	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
+	     i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
 		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
 		*to = cpu_to_be16(off);
-- 
cgit v1.2.3


From ad354eb34eb354eedc483d1e89e17710165bd2db Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:37 +1100
Subject: [XFS] endianess annotations for xfs_dir2_data_unused_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25489a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 27 ++++++++++----------
 fs/xfs/xfs_dir2_data.c  | 68 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_data.h  |  8 +++---
 fs/xfs/xfs_dir2_leaf.c  | 12 ++++-----
 fs/xfs/xfs_dir2_sf.c    |  4 +--
 5 files changed, 58 insertions(+), 61 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index f70640c6b70..8b8aed77acd 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -130,7 +130,7 @@ xfs_dir2_block_addname(
 		 * the space before the first leaf entry needs to be free so it
 		 * can be expanded to hold the pointer to the new entry.
 		 */
-		if (INT_GET(enddup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			dup = enddup = NULL;
 		/*
 		 * Check out the biggest freespace and see if it's the same one.
@@ -143,7 +143,7 @@ xfs_dir2_block_addname(
 				 * It is the biggest freespace, is it too small
 				 * to hold the new leaf too?
 				 */
-				if (INT_GET(dup->length, ARCH_CONVERT) < len + (uint)sizeof(*blp)) {
+				if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
 					/*
 					 * Yes, we use the second-largest
 					 * entry instead if it works.
@@ -160,7 +160,7 @@ xfs_dir2_block_addname(
 				 * Not the same free entry,
 				 * just check its length.
 				 */
-				if (INT_GET(dup->length, ARCH_CONVERT) < len) {
+				if (be16_to_cpu(dup->length) < len) {
 					dup = NULL;
 				}
 			}
@@ -192,8 +192,8 @@ xfs_dir2_block_addname(
 		 * If it's not free then the data will go where the
 		 * leaf data starts now, if it works at all.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			if (INT_GET(dup->length, ARCH_CONVERT) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			if (be16_to_cpu(dup->length) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
 			    (uint)sizeof(*blp) < len)
 				dup = NULL;
 		} else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
@@ -310,7 +310,7 @@ xfs_dir2_block_addname(
 		 */
 		xfs_dir2_data_use_free(tp, bp, enddup,
 			(xfs_dir2_data_aoff_t)
-			((char *)enddup - (char *)block + INT_GET(enddup->length, ARCH_CONVERT) -
+			((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
 			 sizeof(*blp)),
 			(xfs_dir2_data_aoff_t)sizeof(*blp),
 			&needlog, &needscan);
@@ -484,8 +484,8 @@ xfs_dir2_block_getdents(
 		/*
 		 * Unused, skip it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += be16_to_cpu(dup->length);
 			continue;
 		}
 
@@ -948,7 +948,8 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * If it's not free or is too short we can't do it.
 	 */
-	if (INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG || INT_GET(dup->length, ARCH_CONVERT) < size) {
+	if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
+	    be16_to_cpu(dup->length) < size) {
 		error = 0;
 		goto out;
 	}
@@ -1122,7 +1123,7 @@ xfs_dir2_sf_to_block(
 	 */
 	xfs_dir2_data_use_free(tp, bp, dup,
 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
-		INT_GET(dup->length, ARCH_CONVERT), &needlog, &needscan);
+		be16_to_cpu(dup->length), &needlog, &needscan);
 	/*
 	 * Create entry for .
 	 */
@@ -1175,15 +1176,15 @@ xfs_dir2_sf_to_block(
 		if (offset < newoffset) {
 			dup = (xfs_dir2_data_unused_t *)
 			      ((char *)block + offset);
-			INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-			INT_SET(dup->length, ARCH_CONVERT, newoffset - offset);
+			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+			dup->length = cpu_to_be16(newoffset - offset);
 			INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 				(xfs_dir2_data_off_t)
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
 				dup, &dummy);
-			offset += INT_GET(dup->length, ARCH_CONVERT);
+			offset += be16_to_cpu(dup->length);
 			continue;
 		}
 		/*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fd6b7c03018..48b4906c8e8 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -108,7 +108,7 @@ xfs_dir2_data_check(
 		 * If we find it, account for that, else make sure it
 		 * doesn't need to be there.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
 			ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT) ==
 			       (char *)dup - (char *)d);
@@ -118,10 +118,10 @@ xfs_dir2_data_check(
 				ASSERT((freeseen & (1 << i)) == 0);
 				freeseen |= 1 << i;
 			} else {
-				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <=
+				ASSERT(be16_to_cpu(dup->length) <=
 				       be16_to_cpu(bf[2].length));
 			}
-			p += INT_GET(dup->length, ARCH_CONVERT);
+			p += be16_to_cpu(dup->length);
 			lastfree = 1;
 			continue;
 		}
@@ -205,12 +205,12 @@ xfs_dir2_data_freefind(
 		ASSERT(seenzero == 0);
 		if (be16_to_cpu(dfp->offset) == off) {
 			matched = 1;
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(dup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == dup->length);
 		} else if (off < be16_to_cpu(dfp->offset))
-			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= be16_to_cpu(dfp->offset));
+			ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
 		else
 			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
-		ASSERT(matched || be16_to_cpu(dfp->length) >= INT_GET(dup->length, ARCH_CONVERT));
+		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
 		if (dfp > &d->hdr.bestfree[0])
 			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
 	}
@@ -219,7 +219,7 @@ xfs_dir2_data_freefind(
 	 * If this is smaller than the smallest bestfree entry,
 	 * it can't be there since they're sorted.
 	 */
-	if (INT_GET(dup->length, ARCH_CONVERT) <
+	if (be16_to_cpu(dup->length) <
 	    be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
 		return NULL;
 	/*
@@ -364,11 +364,11 @@ xfs_dir2_data_freescan(
 		/*
 		 * If it's a free entry, insert it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
 			       INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
-			p += INT_GET(dup->length, ARCH_CONVERT);
+			p += be16_to_cpu(dup->length);
 		}
 		/*
 		 * For active entries, check their tags and skip them.
@@ -428,11 +428,11 @@ xfs_dir2_data_init(
 	 * Set up an unused entry for the block's body.
 	 */
 	dup = &d->u[0].unused;
-	INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
-	INT_SET(dup->length, ARCH_CONVERT, t);
+	dup->length = cpu_to_be16(t);
 	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
 		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
 	/*
@@ -554,7 +554,7 @@ xfs_dir2_data_make_free(
 
 		tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
 		prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
-		if (INT_GET(prevdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			prevdup = NULL;
 	} else
 		prevdup = NULL;
@@ -565,7 +565,7 @@ xfs_dir2_data_make_free(
 	if ((char *)d + offset + len < endptr) {
 		postdup =
 			(xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		if (INT_GET(postdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			postdup = NULL;
 	} else
 		postdup = NULL;
@@ -593,7 +593,7 @@ xfs_dir2_data_make_free(
 		/*
 		 * Fix up the new big freespace.
 		 */
-		INT_MOD(prevdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
@@ -617,7 +617,7 @@ xfs_dir2_data_make_free(
 			 */
 			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
 			ASSERT(dfp == &d->hdr.bestfree[0]);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(prevdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == prevdup->length);
 			ASSERT(!dfp[1].length);
 			ASSERT(!dfp[2].length);
 		}
@@ -627,7 +627,7 @@ xfs_dir2_data_make_free(
 	 */
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
-		INT_MOD(prevdup->length, ARCH_CONVERT, len);
+		be16_add(&prevdup->length, len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
@@ -644,7 +644,7 @@ xfs_dir2_data_make_free(
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
-			needscan = INT_GET(prevdup->length, ARCH_CONVERT) >
+			needscan = be16_to_cpu(prevdup->length) >
 				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
@@ -654,8 +654,8 @@ xfs_dir2_data_make_free(
 	else if (postdup) {
 		dfp = xfs_dir2_data_freefind(d, postdup);
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -672,7 +672,7 @@ xfs_dir2_data_make_free(
 		 * Otherwise we need a scan if the new entry is big enough.
 		 */
 		else {
-			needscan = INT_GET(newdup->length, ARCH_CONVERT) >
+			needscan = be16_to_cpu(newdup->length) >
 				   be16_to_cpu(d->hdr.bestfree[2].length);
 		}
 	}
@@ -681,8 +681,8 @@ xfs_dir2_data_make_free(
 	 */
 	else {
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, len);
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -716,15 +716,15 @@ xfs_dir2_data_use_free(
 	d = bp->data;
 	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
 	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-	ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
+	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
-	ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
+	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
 	ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
 	dfp = xfs_dir2_data_freefind(d, dup);
-	oldlen = INT_GET(dup->length, ARCH_CONVERT);
+	oldlen = be16_to_cpu(dup->length);
 	ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
 	/*
 	 * Check for alignment with front and back of the entry.
@@ -750,8 +750,8 @@ xfs_dir2_data_use_free(
 	 */
 	else if (matchfront) {
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup->length, ARCH_CONVERT, oldlen - len);
+		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup->length = cpu_to_be16(oldlen - len);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -762,7 +762,7 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == newdup->length);
 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
@@ -778,8 +778,7 @@ xfs_dir2_data_use_free(
 	 */
 	else if (matchback) {
 		newdup = dup;
-		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
-			(((char *)d + offset) - (char *)newdup));
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
@@ -790,7 +789,7 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(d, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
 			ASSERT(dfp != NULL);
-			ASSERT(be16_to_cpu(dfp->length) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(dfp->length == newdup->length);
 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
 			/*
 			 * If we got inserted at the last slot,
@@ -806,14 +805,13 @@ xfs_dir2_data_use_free(
 	 */
 	else {
 		newdup = dup;
-		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
-			(((char *)d + offset) - (char *)newdup));
+		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
-		INT_SET(newdup2->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
-		INT_SET(newdup2->length, ARCH_CONVERT, oldlen - len - INT_GET(newdup->length, ARCH_CONVERT));
+		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
 		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2), ARCH_CONVERT,
 			(xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index dd8b8652363..28adddb48e8 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -97,10 +97,10 @@ typedef struct xfs_dir2_data_entry {
  * Tag appears as the last 2 bytes.
  */
 typedef struct xfs_dir2_data_unused {
-	__uint16_t		freetag;	/* XFS_DIR2_DATA_FREE_TAG */
-	xfs_dir2_data_off_t	length;		/* total free length */
+	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
+	__be16			length;		/* total free length */
 						/* variable offset */
-	xfs_dir2_data_off_t	tag;		/* starting offset of us */
+	__be16			tag;		/* starting offset of us */
 } xfs_dir2_data_unused_t;
 
 typedef union {
@@ -151,7 +151,7 @@ static inline xfs_dir2_data_off_t *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
 	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dup) + INT_GET((dup)->length, ARCH_CONVERT) \
+		 ((char *)(dup) + be16_to_cpu((dup)->length) \
 				- (uint)sizeof(xfs_dir2_data_off_t));
 }
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 9abecd207c4..cbd371d9e98 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -395,7 +395,7 @@ xfs_dir2_leaf_addname(
 	 */
 	dup = (xfs_dir2_data_unused_t *)
 	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
-	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
+	ASSERT(be16_to_cpu(dup->length) >= length);
 	needscan = needlog = 0;
 	/*
 	 * Mark the initial part of our freespace in use for the new entry.
@@ -1047,11 +1047,10 @@ xfs_dir2_leaf_getdents(
 				while ((char *)ptr - (char *)data < byteoff) {
 					dup = (xfs_dir2_data_unused_t *)ptr;
 
-					if (INT_GET(dup->freetag, ARCH_CONVERT)
+					if (be16_to_cpu(dup->freetag)
 						  == XFS_DIR2_DATA_FREE_TAG) {
 
-						length = INT_GET(dup->length,
-								 ARCH_CONVERT);
+						length = be16_to_cpu(dup->length);
 						ptr += length;
 						continue;
 					}
@@ -1080,9 +1079,8 @@ xfs_dir2_leaf_getdents(
 		/*
 		 * No, it's unused, skip over it.
 		 */
-		if (INT_GET(dup->freetag, ARCH_CONVERT)
-						== XFS_DIR2_DATA_FREE_TAG) {
-			length = INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			length = be16_to_cpu(dup->length);
 			ptr += length;
 			curoff += length;
 			continue;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index ec8e7476c8b..3a571d8cf1f 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -220,8 +220,8 @@ xfs_dir2_block_to_sf(
 		 * If it's unused, just skip over it.
 		 */
 		dup = (xfs_dir2_data_unused_t *)ptr;
-		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
-			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += be16_to_cpu(dup->length);
 			continue;
 		}
 		dep = (xfs_dir2_data_entry_t *)ptr;
-- 
cgit v1.2.3


From 1fba9f7fe2164553557e26583e6feb5299cf9f76 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:47 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_DATA_UNUSED_TAG_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25490a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c |  3 +--
 fs/xfs/xfs_dir2_data.c  | 41 ++++++++++++++++++++---------------------
 fs/xfs/xfs_dir2_data.h  |  7 +++----
 3 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 8b8aed77acd..2c1fcdc7a76 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1178,8 +1178,7 @@ xfs_dir2_sf_to_block(
 			      ((char *)block + offset);
 			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 			dup->length = cpu_to_be16(newoffset - offset);
-			INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
-				(xfs_dir2_data_off_t)
+			*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16(
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 48b4906c8e8..acb61132dec 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -110,7 +110,7 @@ xfs_dir2_data_check(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
-			ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT) ==
+			ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) ==
 			       (char *)dup - (char *)d);
 			dfp = xfs_dir2_data_freefind(d, dup);
 			if (dfp) {
@@ -366,7 +366,7 @@ xfs_dir2_data_freescan(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
-			       INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
+			       be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
 			p += be16_to_cpu(dup->length);
 		}
@@ -433,8 +433,7 @@ xfs_dir2_data_init(
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
-	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT,
-		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Log it and return it.
 	 */
@@ -594,8 +593,8 @@ xfs_dir2_data_make_free(
 		 * Fix up the new big freespace.
 		 */
 		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		if (!needscan) {
 			/*
@@ -628,8 +627,8 @@ xfs_dir2_data_make_free(
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
 		be16_add(&prevdup->length, len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		/*
 		 * If the previous entry was in the table, the new entry
@@ -656,8 +655,8 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If the following entry was in the table, the new entry
@@ -683,8 +682,8 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
 	}
@@ -719,7 +718,7 @@ xfs_dir2_data_use_free(
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-	ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P(dup), ARCH_CONVERT));
+	ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
@@ -752,8 +751,8 @@ xfs_dir2_data_use_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(oldlen - len);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
@@ -779,8 +778,8 @@ xfs_dir2_data_use_free(
 	else if (matchback) {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
 		 * If it was in the table, remove it and add the new one.
@@ -806,14 +805,14 @@ xfs_dir2_data_use_free(
 	else {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2), ARCH_CONVERT,
-			(xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
+		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) =
+			cpu_to_be16((char *)newdup2 - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
 		/*
 		 * If the old entry was in the table, we need to scan
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 28adddb48e8..479b59f50db 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -147,12 +147,11 @@ xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
  */
 #define	XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
 	xfs_dir2_data_unused_tag_p(dup)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
-	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dup) + be16_to_cpu((dup)->length) \
-				- (uint)sizeof(xfs_dir2_data_off_t));
+	return (__be16 *)((char *)dup +
+			be16_to_cpu(dup->length) - sizeof(__be16));
 }
 
 /*
-- 
cgit v1.2.3


From e922fffa4188ef6207cd3afef7f4d33bf4a9ca64 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:27:56 +1100
Subject: [XFS] endianess annotations for xfs_dir2_block_tail_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25491a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 52 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_dir2_block.h |  7 +++----
 fs/xfs/xfs_dir2_data.c  | 10 +++++-----
 fs/xfs/xfs_dir2_leaf.c  |  6 +++---
 fs/xfs/xfs_dir2_sf.c    |  2 +-
 5 files changed, 38 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 2c1fcdc7a76..6f3b99dfe55 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -193,10 +193,10 @@ xfs_dir2_block_addname(
 		 * leaf data starts now, if it works at all.
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-			if (be16_to_cpu(dup->length) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+			if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
 			    (uint)sizeof(*blp) < len)
 				dup = NULL;
-		} else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
+		} else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
 			dup = NULL;
 		else
 			dup = (xfs_dir2_data_unused_t *)blp;
@@ -242,7 +242,7 @@ xfs_dir2_block_addname(
 		int	fromidx;		/* source leaf index */
 		int	toidx;			/* target leaf index */
 
-		for (fromidx = toidx = INT_GET(btp->count, ARCH_CONVERT) - 1,
+		for (fromidx = toidx = be32_to_cpu(btp->count) - 1,
 			highstale = lfloghigh = -1;
 		     fromidx >= 0;
 		     fromidx--) {
@@ -259,15 +259,15 @@ xfs_dir2_block_addname(
 				blp[toidx] = blp[fromidx];
 			toidx--;
 		}
-		lfloglow = toidx + 1 - (INT_GET(btp->stale, ARCH_CONVERT) - 1);
-		lfloghigh -= INT_GET(btp->stale, ARCH_CONVERT) - 1;
-		INT_MOD(btp->count, ARCH_CONVERT, -(INT_GET(btp->stale, ARCH_CONVERT) - 1));
+		lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
+		lfloghigh -= be32_to_cpu(btp->stale) - 1;
+		be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1));
 		xfs_dir2_data_make_free(tp, bp,
 			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
-			(xfs_dir2_data_aoff_t)((INT_GET(btp->stale, ARCH_CONVERT) - 1) * sizeof(*blp)),
+			(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
 			&needlog, &needscan);
-		blp += INT_GET(btp->stale, ARCH_CONVERT) - 1;
-		INT_SET(btp->stale, ARCH_CONVERT, 1);
+		blp += be32_to_cpu(btp->stale) - 1;
+		btp->stale = cpu_to_be32(1);
 		/*
 		 * If we now need to rebuild the bestfree map, do so.
 		 * This needs to happen before the next call to use_free.
@@ -282,14 +282,14 @@ xfs_dir2_block_addname(
 	 * Set leaf logging boundaries to impossible state.
 	 * For the no-stale case they're set explicitly.
 	 */
-	else if (INT_GET(btp->stale, ARCH_CONVERT)) {
-		lfloglow = INT_GET(btp->count, ARCH_CONVERT);
+	else if (btp->stale) {
+		lfloglow = be32_to_cpu(btp->count);
 		lfloghigh = -1;
 	}
 	/*
 	 * Find the slot that's first lower than our hash value, -1 if none.
 	 */
-	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; low <= high; ) {
+	for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
 		mid = (low + high) >> 1;
 		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
 			break;
@@ -317,7 +317,7 @@ xfs_dir2_block_addname(
 		/*
 		 * Update the tail (entry count).
 		 */
-		INT_MOD(btp->count, ARCH_CONVERT, +1);
+		be32_add(&btp->count, 1);
 		/*
 		 * If we now need to rebuild the bestfree map, do so.
 		 * This needs to happen before the next call to use_free.
@@ -349,7 +349,7 @@ xfs_dir2_block_addname(
 		     lowstale--)
 			continue;
 		for (highstale = mid + 1;
-		     highstale < INT_GET(btp->count, ARCH_CONVERT) &&
+		     highstale < be32_to_cpu(btp->count) &&
 			INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
 			(lowstale < 0 || mid - lowstale > highstale - mid);
 		     highstale++)
@@ -358,7 +358,7 @@ xfs_dir2_block_addname(
 		 * Move entries toward the low-numbered stale entry.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
+		    (highstale == be32_to_cpu(btp->count) ||
 		     mid - lowstale <= highstale - mid)) {
 			if (mid - lowstale)
 				memmove(&blp[lowstale], &blp[lowstale + 1],
@@ -370,7 +370,7 @@ xfs_dir2_block_addname(
 		 * Move entries toward the high-numbered stale entry.
 		 */
 		else {
-			ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
+			ASSERT(highstale < be32_to_cpu(btp->count));
 			mid++;
 			if (highstale - mid)
 				memmove(&blp[mid + 1], &blp[mid],
@@ -378,7 +378,7 @@ xfs_dir2_block_addname(
 			lfloglow = MIN(mid, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(btp->stale, ARCH_CONVERT, -1);
+		be32_add(&btp->stale, -1);
 	}
 	/*
 	 * Point to the new data entry.
@@ -673,7 +673,7 @@ xfs_dir2_block_lookup_int(
 	 * Loop doing a binary search for our hash value.
 	 * Find our entry, ENOENT if it's not there.
 	 */
-	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; ; ) {
+	for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
 		ASSERT(low <= high);
 		mid = (low + high) >> 1;
 		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
@@ -716,7 +716,7 @@ xfs_dir2_block_lookup_int(
 			*entno = mid;
 			return 0;
 		}
-	} while (++mid < INT_GET(btp->count, ARCH_CONVERT) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+	} while (++mid < be32_to_cpu(btp->count) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
@@ -777,7 +777,7 @@ xfs_dir2_block_removename(
 	/*
 	 * Fix up the block tail.
 	 */
-	INT_MOD(btp->stale, ARCH_CONVERT, +1);
+	be32_add(&btp->stale, 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	/*
 	 * Remove the leaf entry by marking it stale.
@@ -968,7 +968,7 @@ xfs_dir2_leaf_to_block(
 	 * Initialize the block tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	INT_SET(btp->count, ARCH_CONVERT, INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	btp->count = cpu_to_be32(INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
@@ -980,8 +980,8 @@ xfs_dir2_leaf_to_block(
 			continue;
 		lep[to++] = leaf->ents[from];
 	}
-	ASSERT(to == INT_GET(btp->count, ARCH_CONVERT));
-	xfs_dir2_block_log_leaf(tp, dbp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	ASSERT(to == be32_to_cpu(btp->count));
+	xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
 	/*
 	 * Scan the bestfree if we need it and log the data block header.
 	 */
@@ -1114,7 +1114,7 @@ xfs_dir2_sf_to_block(
 	 * Fill in the tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	INT_SET(btp->count, ARCH_CONVERT, INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
+	btp->count = cpu_to_be32(INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
@@ -1211,13 +1211,13 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
-	xfs_sort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
+	xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort);
 	/*
 	 * Log the leaf entry area and tail.
 	 * Already logged the header in data_init, ignore needlog.
 	 */
 	ASSERT(needscan == 0);
-	xfs_dir2_block_log_leaf(tp, bp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	xfs_dir2_data_check(dp, bp);
 	xfs_da_buf_done(bp);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index a2e5cb98a83..6722effd0b2 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -43,8 +43,8 @@ struct xfs_trans;
 #define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: for one block dirs */
 
 typedef struct xfs_dir2_block_tail {
-	__uint32_t	count;			/* count of leaf entries */
-	__uint32_t	stale;			/* count of stale lf entries */
+	__be32		count;			/* count of leaf entries */
+	__be32		stale;			/* count of stale lf entries */
 } xfs_dir2_block_tail_t;
 
 /*
@@ -75,8 +75,7 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 static inline struct xfs_dir2_leaf_entry *
 xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
 {
-	return (((struct xfs_dir2_leaf_entry *)
-		(btp)) - INT_GET((btp)->count, ARCH_CONVERT));
+	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index acb61132dec..0af757873be 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -143,12 +143,12 @@ xfs_dir2_data_check(
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
-			for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+			for (i = 0; i < be32_to_cpu(btp->count); i++) {
 				if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
 				    INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
 					break;
 			}
-			ASSERT(i < INT_GET(btp->count, ARCH_CONVERT));
+			ASSERT(i < be32_to_cpu(btp->count));
 		}
 		p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
 	}
@@ -157,14 +157,14 @@ xfs_dir2_data_check(
 	 */
 	ASSERT(freeseen == 7);
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
 			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 			if (i > 0)
 				ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
 		}
-		ASSERT(count == INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT));
-		ASSERT(stale == INT_GET(btp->stale, ARCH_CONVERT));
+		ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
+		ASSERT(stale == be32_to_cpu(btp->stale));
 	}
 }
 #endif
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index cbd371d9e98..63e81dc2737 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -111,13 +111,13 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set the counts in the leaf header.
 	 */
-	INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */
-	INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */
+	leaf->hdr.count = btp->count;
+	leaf->hdr.stale = btp->stale;
 	/*
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
-	memcpy(leaf->ents, blp, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
 	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
 	needscan = 0;
 	needlog = 1;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 3a571d8cf1f..35dd003051c 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -98,7 +98,7 @@ xfs_dir2_block_sfsize(
 	/*
 	 * Iterate over the block's data entries by using the leaf pointers.
 	 */
-	for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+	for (i = 0; i < be32_to_cpu(btp->count); i++) {
 		if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
-- 
cgit v1.2.3


From a818e5de7e21ddaa7352bb8c9fc785c7b4f3019f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:07 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25492a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |   4 +-
 fs/xfs/xfs_dir2_block.c |   6 +--
 fs/xfs/xfs_dir2_leaf.c  |  74 ++++++++++++++++----------------
 fs/xfs/xfs_dir2_leaf.h  |   4 +-
 fs/xfs/xfs_dir2_node.c  | 112 ++++++++++++++++++++++++------------------------
 5 files changed, 100 insertions(+), 100 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2f4acb9d9b9..c443cbaaaa9 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -366,7 +366,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
-		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
+		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
 			     (char *)leaf);
 	}
 	memcpy(node, oldroot, size);
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
-		dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+		dead_hash = INT_GET(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval, ARCH_CONVERT);
 	} else {
 		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 6f3b99dfe55..1dd1a7694e4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -939,7 +939,7 @@ xfs_dir2_leaf_to_block(
 	 * Size of the "leaf" area in the block.
 	 */
 	size = (uint)sizeof(block->tail) +
-	       (uint)sizeof(*lep) * (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	       (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	/*
 	 * Look at the last data entry.
 	 */
@@ -968,14 +968,14 @@ xfs_dir2_leaf_to_block(
 	 * Initialize the block tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	btp->count = cpu_to_be32(INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
 	 * Initialize the block leaf area.  We compact out stale entries.
 	 */
 	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
-	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		lep[to++] = leaf->ents[from];
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 63e81dc2737..0d366ab4db1 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -111,14 +111,14 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set the counts in the leaf header.
 	 */
-	leaf->hdr.count = btp->count;
-	leaf->hdr.stale = btp->stale;
+	leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
+	leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
 	/*
 	 * Could compact these but I think we always do the conversion
 	 * after squeezing out stale entries.
 	 */
 	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
-	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
+	xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
 	needscan = 0;
 	needlog = 1;
 	/*
@@ -222,7 +222,7 @@ xfs_dir2_leaf_addname(
 	 * in a data block, improving the lookup of those entries.
 	 */
 	for (use_block = -1, lep = &leaf->ents[index];
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     index++, lep++) {
 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
@@ -266,15 +266,15 @@ xfs_dir2_leaf_addname(
 	 * If we don't have enough free bytes but we can make enough
 	 * by compacting out stale entries, we'll do that.
 	 */
-	if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes &&
-	    INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) {
+	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
+	    be16_to_cpu(leaf->hdr.stale) > 1) {
 		compact = 1;
 	}
 	/*
 	 * Otherwise if we don't have enough free bytes we need to
 	 * convert to node form.
 	 */
-	else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <
+	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
 		 needbytes) {
 		/*
 		 * Just checking or no space reservation, give up.
@@ -330,8 +330,8 @@ xfs_dir2_leaf_addname(
 	 * There are stale entries, so we'll need log-low and log-high
 	 * impossibly bad values later.
 	 */
-	else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
-		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	else if (be16_to_cpu(leaf->hdr.stale)) {
+		lfloglow = be16_to_cpu(leaf->hdr.count);
 		lfloghigh = -1;
 	}
 	/*
@@ -440,15 +440,15 @@ xfs_dir2_leaf_addname(
 		/*
 		 * lep is still good as the index leaf entry.
 		 */
-		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+		if (index < be16_to_cpu(leaf->hdr.count))
 			memmove(lep + 1, lep,
-				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
 		/*
 		 * Record low and high logging indices for the leaf.
 		 */
 		lfloglow = index;
-		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.
@@ -478,7 +478,7 @@ xfs_dir2_leaf_addname(
 			 * lowstale entry would be better.
 			 */
 			for (highstale = index;
-			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
 				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
@@ -490,7 +490,7 @@ xfs_dir2_leaf_addname(
 		 * If the low one is better, use it.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(index - lowstale - 1 >= 0);
 			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
@@ -526,7 +526,7 @@ xfs_dir2_leaf_addname(
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+		be16_add(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Fill in the new leaf entry.
@@ -569,24 +569,24 @@ xfs_dir2_leaf_check(
 	 * Should factor in the size of the bests table as well.
 	 * We can deduce a value for that from di_size.
 	 */
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	/*
 	 * Leaves and bests don't overlap.
 	 */
-	ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <=
+	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
 	       (char *)XFS_DIR2_LEAF_BESTS_P(ltp));
 	/*
 	 * Check hash value order, count stale entries.
 	 */
-	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
-		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
+		if (i + 1 < be16_to_cpu(leaf->hdr.count))
 			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
 			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
 		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
 }
 #endif	/* DEBUG */
 
@@ -611,7 +611,7 @@ xfs_dir2_leaf_compact(
 	/*
 	 * Compress out the stale entries in place.
 	 */
-	for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
@@ -627,8 +627,8 @@ xfs_dir2_leaf_compact(
 	/*
 	 * Update and log the header, log the leaf entries.
 	 */
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to);
-	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT)));
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
+	be16_add(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
 	leaf->hdr.stale = 0;
 	xfs_dir2_leaf_log_header(args->trans, bp);
 	if (loglow != -1)
@@ -662,7 +662,7 @@ xfs_dir2_leaf_compact_x1(
 	int		to;		/* destination copy index */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
 	index = *indexp;
 	/*
 	 * Find the first stale entry before our index, if any.
@@ -677,7 +677,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Stop if the answer would be worse than lowstale.
 	 */
 	for (highstale = index;
-	     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+	     highstale < be16_to_cpu(leaf->hdr.count) &&
 		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
 		(lowstale < 0 || index - lowstale > highstale - index);
 	     highstale++)
@@ -686,7 +686,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Pick the better of lowstale and highstale.
 	 */
 	if (lowstale >= 0 &&
-	    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	    (highstale == be16_to_cpu(leaf->hdr.count) ||
 	     index - lowstale <= highstale - index))
 		keepstale = lowstale;
 	else
@@ -695,7 +695,7 @@ xfs_dir2_leaf_compact_x1(
 	 * Copy the entries in place, removing all the stale entries
 	 * except keepstale.
 	 */
-	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		/*
 		 * Notice the new value of index.
 		 */
@@ -730,8 +730,8 @@ xfs_dir2_leaf_compact_x1(
 	/*
 	 * Adjust the leaf header values.
 	 */
-	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to));
-	INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1);
+	be16_add(&leaf->hdr.count, -(from - to));
+	leaf->hdr.stale = cpu_to_be16(1);
 	/*
 	 * Remember the low/high stale value only in the "right"
 	 * direction.
@@ -739,8 +739,8 @@ xfs_dir2_leaf_compact_x1(
 	if (lowstale >= newindex)
 		lowstale = -1;
 	else
-		highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-	*highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
+		highstale = be16_to_cpu(leaf->hdr.count);
+	*highlogp = be16_to_cpu(leaf->hdr.count) - 1;
 	*lowstalep = lowstale;
 	*highstalep = highstale;
 }
@@ -1373,7 +1373,7 @@ xfs_dir2_leaf_lookup_int(
 	 * looking to match the name.
 	 */
 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip over stale leaf entries.
@@ -1488,7 +1488,7 @@ xfs_dir2_leaf_removename(
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
-	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, lbp);
 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
@@ -1645,7 +1645,7 @@ xfs_dir2_leaf_search_hash(
 	 * Note, the table cannot be empty, so we have to go through the loop.
 	 * Binary search the leaf entries looking for our hash value.
 	 */
-	for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1,
+	for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
 		hashwant = args->hashval;
 	     low <= high; ) {
 		mid = (low + high) >> 1;
@@ -1821,7 +1821,7 @@ xfs_dir2_node_to_leaf(
 	 * If not, release the buffer and give up.
 	 */
 	if ((uint)sizeof(leaf->hdr) +
-	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
+	    (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
 	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
 	    (uint)sizeof(leaf->tail) >
 	    mp->m_dirblksize) {
@@ -1832,7 +1832,7 @@ xfs_dir2_node_to_leaf(
 	 * If the leaf has any stale entries in it, compress them out.
 	 * The compact routine will log the header.
 	 */
-	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT))
+	if (be16_to_cpu(leaf->hdr.stale))
 		xfs_dir2_leaf_compact(args, lbp);
 	else
 		xfs_dir2_leaf_log_header(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index fcd3b7dea0f..a1db51b845b 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -46,8 +46,8 @@ typedef	__uint32_t	xfs_dir2_dataptr_t;
  */
 typedef struct xfs_dir2_leaf_hdr {
 	xfs_da_blkinfo_t	info;		/* header for da routines */
-	__uint16_t		count;		/* count of entries */
-	__uint16_t		stale;		/* count of stale entries */
+	__be16			count;		/* count of entries */
+	__be16			stale;		/* count of stale entries */
 } xfs_dir2_leaf_hdr_t;
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index c32894cb622..40e4a2e2776 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -217,14 +217,14 @@ xfs_dir2_leafn_add(
 	 * a compact.
 	 */
 
-	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+	if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
 		if (!leaf->hdr.stale)
 			return XFS_ERROR(ENOSPC);
-		compact = INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1;
+		compact = be16_to_cpu(leaf->hdr.stale) > 1;
 	} else
 		compact = 0;
 	ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
-	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
 	       INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
 
 	if (args->justcheck)
@@ -242,7 +242,7 @@ xfs_dir2_leafn_add(
 	 * Set impossible logging indices for this case.
 	 */
 	else if (leaf->hdr.stale) {
-		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		lfloglow = be16_to_cpu(leaf->hdr.count);
 		lfloghigh = -1;
 	}
 	/*
@@ -250,12 +250,12 @@ xfs_dir2_leafn_add(
 	 */
 	if (!leaf->hdr.stale) {
 		lep = &leaf->ents[index];
-		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+		if (index < be16_to_cpu(leaf->hdr.count))
 			memmove(lep + 1, lep,
-				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
 		lfloglow = index;
-		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+		lfloghigh = be16_to_cpu(leaf->hdr.count);
+		be16_add(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.  We'll use one for the new entry.
@@ -281,7 +281,7 @@ xfs_dir2_leafn_add(
 			 * lowstale already found.
 			 */
 			for (highstale = index;
-			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+			     highstale < be16_to_cpu(leaf->hdr.count) &&
 				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
@@ -294,7 +294,7 @@ xfs_dir2_leafn_add(
 		 * Shift entries up toward the stale slot.
 		 */
 		if (lowstale >= 0 &&
-		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
 			       XFS_DIR2_NULL_DATAPTR);
@@ -323,7 +323,7 @@ xfs_dir2_leafn_add(
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+		be16_add(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Insert the new entry, log everything.
@@ -353,16 +353,16 @@ xfs_dir2_leafn_check(
 	leaf = bp->data;
 	mp = dp->i_mount;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
-	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
-		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
+		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
 			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
 			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
 		}
 		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
-	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
 }
 #endif	/* DEBUG */
 
@@ -380,10 +380,10 @@ xfs_dir2_leafn_lasthash(
 	leaf = bp->data;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	if (count)
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return 0;
-	return INT_GET(leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	return INT_GET(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 }
 
 /*
@@ -421,7 +421,7 @@ xfs_dir2_leafn_lookup_int(
 	leaf = bp->data;
 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > 0);
+	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
 	xfs_dir2_leafn_check(dp, bp);
 	/*
@@ -456,7 +456,7 @@ xfs_dir2_leafn_lookup_int(
 	 * Loop over leaf entries with the right hash value.
 	 */
 	for (lep = &leaf->ents[index];
-	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip stale leaf entries.
@@ -619,7 +619,7 @@ xfs_dir2_leafn_lookup_int(
 	 * Return the final index, that will be the insertion point.
 	 */
 	*indexp = index;
-	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
 	return XFS_ERROR(ENOENT);
 }
 
@@ -657,12 +657,12 @@ xfs_dir2_leafn_moveents(
 	 * destination leaf entries, open up a hole in the destination
 	 * to hold the new entries.
 	 */
-	if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
+	if (start_d < be16_to_cpu(leaf_d->hdr.count)) {
 		memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d],
-			(INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
+			(be16_to_cpu(leaf_d->hdr.count) - start_d) *
 			sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
-			count + INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - 1);
+			count + be16_to_cpu(leaf_d->hdr.count) - 1);
 	}
 	/*
 	 * If the source has stale leaves, count the ones in the copy range
@@ -687,7 +687,7 @@ xfs_dir2_leafn_moveents(
 	 * If there are source entries after the ones we copied,
 	 * delete the ones we copied by sliding the next ones down.
 	 */
-	if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
+	if (start_s + count < be16_to_cpu(leaf_s->hdr.count)) {
 		memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count],
 			count * sizeof(xfs_dir2_leaf_entry_t));
 		xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
@@ -695,10 +695,10 @@ xfs_dir2_leafn_moveents(
 	/*
 	 * Update the headers and log them.
 	 */
-	INT_MOD(leaf_s->hdr.count, ARCH_CONVERT, -(count));
-	INT_MOD(leaf_s->hdr.stale, ARCH_CONVERT, -(stale));
-	INT_MOD(leaf_d->hdr.count, ARCH_CONVERT, count);
-	INT_MOD(leaf_d->hdr.stale, ARCH_CONVERT, stale);
+	be16_add(&leaf_s->hdr.count, -(count));
+	be16_add(&leaf_s->hdr.stale, -(stale));
+	be16_add(&leaf_d->hdr.count, count);
+	be16_add(&leaf_d->hdr.stale, stale);
 	xfs_dir2_leaf_log_header(tp, bp_s);
 	xfs_dir2_leaf_log_header(tp, bp_d);
 	xfs_dir2_leafn_check(args->dp, bp_s);
@@ -721,11 +721,11 @@ xfs_dir2_leafn_order(
 	leaf2 = leaf2_bp->data;
 	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0 &&
-	    INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0 &&
+	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
+	    be16_to_cpu(leaf2->hdr.count) > 0 &&
 	    (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
-	     INT_GET(leaf2->ents[INT_GET(leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT) <
-	     INT_GET(leaf1->ents[INT_GET(leaf1->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)))
+	     INT_GET(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval, ARCH_CONVERT) <
+	     INT_GET(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval, ARCH_CONVERT)))
 		return 1;
 	return 0;
 }
@@ -768,9 +768,9 @@ xfs_dir2_leafn_rebalance(
 	}
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	oldsum = INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT);
+	oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
 #ifdef DEBUG
-	oldstale = INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT);
+	oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
 #endif
 	mid = oldsum >> 1;
 	/*
@@ -780,8 +780,8 @@ xfs_dir2_leafn_rebalance(
 	if (oldsum & 1) {
 		xfs_dahash_t	midhash;	/* middle entry hash value */
 
-		if (mid >= INT_GET(leaf1->hdr.count, ARCH_CONVERT))
-			midhash = INT_GET(leaf2->ents[mid - INT_GET(leaf1->hdr.count, ARCH_CONVERT)].hashval, ARCH_CONVERT);
+		if (mid >= be16_to_cpu(leaf1->hdr.count))
+			midhash = INT_GET(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval, ARCH_CONVERT);
 		else
 			midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
 		isleft = args->hashval <= midhash;
@@ -797,30 +797,30 @@ xfs_dir2_leafn_rebalance(
 	 * Calculate moved entry count.  Positive means left-to-right,
 	 * negative means right-to-left.  Then move the entries.
 	 */
-	count = INT_GET(leaf1->hdr.count, ARCH_CONVERT) - mid + (isleft == 0);
+	count = be16_to_cpu(leaf1->hdr.count) - mid + (isleft == 0);
 	if (count > 0)
 		xfs_dir2_leafn_moveents(args, blk1->bp,
-			INT_GET(leaf1->hdr.count, ARCH_CONVERT) - count, blk2->bp, 0, count);
+			be16_to_cpu(leaf1->hdr.count) - count, blk2->bp, 0, count);
 	else if (count < 0)
 		xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp,
-			INT_GET(leaf1->hdr.count, ARCH_CONVERT), count);
-	ASSERT(INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT) == oldsum);
-	ASSERT(INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT) == oldstale);
+			be16_to_cpu(leaf1->hdr.count), count);
+	ASSERT(be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count) == oldsum);
+	ASSERT(be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale) == oldstale);
 	/*
 	 * Mark whether we're inserting into the old or new leaf.
 	 */
-	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) < INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+	if (be16_to_cpu(leaf1->hdr.count) < be16_to_cpu(leaf2->hdr.count))
 		state->inleaf = swap;
-	else if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+	else if (be16_to_cpu(leaf1->hdr.count) > be16_to_cpu(leaf2->hdr.count))
 		state->inleaf = !swap;
 	else
 		state->inleaf =
-			swap ^ (blk1->index <= INT_GET(leaf1->hdr.count, ARCH_CONVERT));
+			swap ^ (blk1->index <= be16_to_cpu(leaf1->hdr.count));
 	/*
 	 * Adjust the expected index for insertion.
 	 */
 	if (!state->inleaf)
-		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+		blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
 	
 	/* 
 	 * Finally sanity check just to make sure we are not returning a negative index 
@@ -883,7 +883,7 @@ xfs_dir2_leafn_remove(
 	 * Kill the leaf entry by marking it stale.
 	 * Log the leaf block changes.
 	 */
-	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, bp);
 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, bp, index, index);
@@ -1039,7 +1039,7 @@ xfs_dir2_leafn_remove(
 	*rval =
 		((uint)sizeof(leaf->hdr) +
 		 (uint)sizeof(leaf->ents[0]) *
-		 (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT))) <
+		 (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale))) <
 		mp->m_dir_magicpct;
 	return 0;
 }
@@ -1140,7 +1140,7 @@ xfs_dir2_leafn_toosmall(
 	info = blk->bp->data;
 	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	leaf = (xfs_dir2_leaf_t *)info;
-	count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
 	if (bytes > (state->blocksize >> 1)) {
 		/*
@@ -1194,11 +1194,11 @@ xfs_dir2_leafn_toosmall(
 		 * Count bytes in the two blocks combined.
 		 */
 		leaf = (xfs_dir2_leaf_t *)info;
-		count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
 		leaf = bp->data;
 		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-		count += INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
 		/*
 		 * Fits with at least 25% to spare.
@@ -1262,21 +1262,21 @@ xfs_dir2_leafn_unbalance(
 	 * If there are any stale leaf entries, take this opportunity
 	 * to purge them.
 	 */
-	if (INT_GET(drop_leaf->hdr.stale, ARCH_CONVERT))
+	if (drop_leaf->hdr.stale)
 		xfs_dir2_leaf_compact(args, drop_blk->bp);
-	if (INT_GET(save_leaf->hdr.stale, ARCH_CONVERT))
+	if (save_leaf->hdr.stale)
 		xfs_dir2_leaf_compact(args, save_blk->bp);
 	/*
 	 * Move the entries from drop to the appropriate end of save.
 	 */
-	drop_blk->hashval = INT_GET(drop_leaf->ents[INT_GET(drop_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	drop_blk->hashval = INT_GET(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
-			INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+			be16_to_cpu(drop_leaf->hdr.count));
 	else
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
-			INT_GET(save_leaf->hdr.count, ARCH_CONVERT), INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
-	save_blk->hashval = INT_GET(save_leaf->ents[INT_GET(save_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+			be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count));
+	save_blk->hashval = INT_GET(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
 	xfs_dir2_leafn_check(args->dp, save_blk->bp);
 }
 
-- 
cgit v1.2.3


From 3c1f9c158050259cf3965cf900916ec49a288972 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:18 +1100
Subject: [XFS] endianess annotations for xfs_dir2_leaf_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25493a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_da_btree.c   |  2 +-
 fs/xfs/xfs_dir2_block.c | 52 +++++++++++++++++++++++---------------------
 fs/xfs/xfs_dir2_data.c  |  8 +++----
 fs/xfs/xfs_dir2_leaf.c  | 55 ++++++++++++++++++++++++-----------------------
 fs/xfs/xfs_dir2_leaf.h  |  4 ++--
 fs/xfs/xfs_dir2_node.c  | 57 +++++++++++++++++++++++++------------------------
 fs/xfs/xfs_dir2_sf.c    |  2 +-
 7 files changed, 93 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c443cbaaaa9..31796c7faba 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1798,7 +1798,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
-		dead_hash = INT_GET(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval, ARCH_CONVERT);
+		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
 	} else {
 		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 1dd1a7694e4..fa372b2b433 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -246,7 +246,7 @@ xfs_dir2_block_addname(
 			highstale = lfloghigh = -1;
 		     fromidx >= 0;
 		     fromidx--) {
-			if (INT_GET(blp[fromidx].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+			if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
 				if (highstale == -1)
 					highstale = toidx;
 				else {
@@ -291,14 +291,14 @@ xfs_dir2_block_addname(
 	 */
 	for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+		if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
 			break;
 		if (hash < args->hashval)
 			low = mid + 1;
 		else
 			high = mid - 1;
 	}
-	while (mid >= 0 && INT_GET(blp[mid].hashval, ARCH_CONVERT) >= args->hashval) {
+	while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) {
 		mid--;
 	}
 	/*
@@ -345,12 +345,12 @@ xfs_dir2_block_addname(
 	else {
 		for (lowstale = mid;
 		     lowstale >= 0 &&
-			INT_GET(blp[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+			be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
 		     lowstale--)
 			continue;
 		for (highstale = mid + 1;
 		     highstale < be32_to_cpu(btp->count) &&
-			INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+			be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
 			(lowstale < 0 || mid - lowstale > highstale - mid);
 		     highstale++)
 			continue;
@@ -387,8 +387,9 @@ xfs_dir2_block_addname(
 	/*
 	 * Fill in the leaf entry.
 	 */
-	INT_SET(blp[mid].hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(blp[mid].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[mid].hashval = cpu_to_be32(args->hashval);
+	blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
 	 * Mark space for the data entry used.
@@ -621,7 +622,7 @@ xfs_dir2_block_lookup(
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, release the block.
 	 */
@@ -676,7 +677,7 @@ xfs_dir2_block_lookup_int(
 	for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
 		ASSERT(low <= high);
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+		if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
 			break;
 		if (hash < args->hashval)
 			low = mid + 1;
@@ -691,7 +692,7 @@ xfs_dir2_block_lookup_int(
 	/*
 	 * Back up to the first one with the right hash value.
 	 */
-	while (mid > 0 && INT_GET(blp[mid - 1].hashval, ARCH_CONVERT) == args->hashval) {
+	while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) {
 		mid--;
 	}
 	/*
@@ -699,7 +700,7 @@ xfs_dir2_block_lookup_int(
 	 * right hash value looking for our name.
 	 */
 	do {
-		if ((addr = INT_GET(blp[mid].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+		if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Get pointer to the entry from the leaf.
@@ -716,7 +717,7 @@ xfs_dir2_block_lookup_int(
 			*entno = mid;
 			return 0;
 		}
-	} while (++mid < be32_to_cpu(btp->count) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+	} while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
@@ -766,7 +767,7 @@ xfs_dir2_block_removename(
 	 * Point to the data entry using the leaf entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
@@ -782,7 +783,7 @@ xfs_dir2_block_removename(
 	/*
 	 * Remove the leaf entry by marking it stale.
 	 */
-	INT_SET(blp[ent].address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_block_log_leaf(tp, bp, ent, ent);
 	/*
 	 * Fix up bestfree, log the header if necessary.
@@ -842,7 +843,7 @@ xfs_dir2_block_replace(
 	 * Point to the data entry we need to change.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
 	ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
@@ -867,8 +868,8 @@ xfs_dir2_block_sort(
 
 	la = a;
 	lb = b;
-	return INT_GET(la->hashval, ARCH_CONVERT) < INT_GET(lb->hashval, ARCH_CONVERT) ? -1 :
-		(INT_GET(la->hashval, ARCH_CONVERT) > INT_GET(lb->hashval, ARCH_CONVERT) ? 1 : 0);
+	return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 :
+		(be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0);
 }
 
 /*
@@ -976,7 +977,7 @@ xfs_dir2_leaf_to_block(
 	 */
 	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		lep[to++] = leaf->ents[from];
 	}
@@ -1135,8 +1136,9 @@ xfs_dir2_sf_to_block(
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	xfs_dir2_data_log_entry(tp, bp, dep);
-	INT_SET(blp[0].hashval, ARCH_CONVERT, xfs_dir_hash_dot);
-	INT_SET(blp[0].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
+	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	/*
 	 * Create entry for ..
 	 */
@@ -1148,8 +1150,9 @@ xfs_dir2_sf_to_block(
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 	xfs_dir2_data_log_entry(tp, bp, dep);
-	INT_SET(blp[1].hashval, ARCH_CONVERT, xfs_dir_hash_dotdot);
-	INT_SET(blp[1].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
+	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+				(char *)dep - (char *)block));
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	/*
 	 * Loop over existing entries, stuff them in.
@@ -1197,8 +1200,9 @@ xfs_dir2_sf_to_block(
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
 		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
 		xfs_dir2_data_log_entry(tp, bp, dep);
-		INT_SET(blp[2 + i].hashval, ARCH_CONVERT, xfs_da_hashname((char *)sfep->name, sfep->namelen));
-		INT_SET(blp[2 + i].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp,
+		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
+					(char *)sfep->name, sfep->namelen));
+		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
 		if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 0af757873be..084b134361a 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -144,8 +144,8 @@ xfs_dir2_data_check(
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
 			for (i = 0; i < be32_to_cpu(btp->count); i++) {
-				if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
-				    INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
+				if (be32_to_cpu(lep[i].address) == addr &&
+				    be32_to_cpu(lep[i].hashval) == hash)
 					break;
 			}
 			ASSERT(i < be32_to_cpu(btp->count));
@@ -158,10 +158,10 @@ xfs_dir2_data_check(
 	ASSERT(freeseen == 7);
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
 		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 			if (i > 0)
-				ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
+				ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
 		}
 		ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
 		ASSERT(stale == be32_to_cpu(btp->stale));
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 0d366ab4db1..752fc67354e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -222,11 +222,11 @@ xfs_dir2_leaf_addname(
 	 * in a data block, improving the lookup of those entries.
 	 */
 	for (use_block = -1, lep = &leaf->ents[index];
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     index++, lep++) {
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
-		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
@@ -468,7 +468,7 @@ xfs_dir2_leaf_addname(
 			 */
 			for (lowstale = index - 1;
 			     lowstale >= 0 &&
-				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[lowstale].address) !=
 				XFS_DIR2_NULL_DATAPTR;
 			     lowstale--)
 				continue;
@@ -479,7 +479,7 @@ xfs_dir2_leaf_addname(
 			 */
 			for (highstale = index;
 			     highstale < be16_to_cpu(leaf->hdr.count) &&
-				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[highstale].address) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
 				 index - lowstale - 1 >= highstale - index);
@@ -493,7 +493,7 @@ xfs_dir2_leaf_addname(
 		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
 			ASSERT(index - lowstale - 1 >= 0);
-			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
 			 * Copy entries up to cover the stale entry
@@ -512,7 +512,7 @@ xfs_dir2_leaf_addname(
 		 */
 		else {
 			ASSERT(highstale - index >= 0);
-			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
 			 * Copy entries down to copver the stale entry
@@ -531,8 +531,9 @@ xfs_dir2_leaf_addname(
 	/*
 	 * Fill in the new leaf entry.
 	 */
-	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT)));
+	lep->hashval = cpu_to_be32(args->hashval);
+	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
+				INT_GET(*tagp, ARCH_CONVERT)));
 	/*
 	 * Log the leaf fields and give up the buffers.
 	 */
@@ -581,9 +582,9 @@ xfs_dir2_leaf_check(
 	 */
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count))
-			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
-			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
-		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
+			       be32_to_cpu(leaf->ents[i + 1].hashval));
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -612,7 +613,7 @@ xfs_dir2_leaf_compact(
 	 * Compress out the stale entries in place.
 	 */
 	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
-		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Only actually copy the entries that are different.
@@ -669,7 +670,7 @@ xfs_dir2_leaf_compact_x1(
 	 */
 	for (lowstale = index - 1;
 	     lowstale >= 0 &&
-		INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+		be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
 	     lowstale--)
 		continue;
 	/*
@@ -678,7 +679,7 @@ xfs_dir2_leaf_compact_x1(
 	 */
 	for (highstale = index;
 	     highstale < be16_to_cpu(leaf->hdr.count) &&
-		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+		be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
 		(lowstale < 0 || index - lowstale > highstale - index);
 	     highstale++)
 		continue;
@@ -702,7 +703,7 @@ xfs_dir2_leaf_compact_x1(
 		if (index == from)
 			newindex = to;
 		if (from != keepstale &&
-		    INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+		    be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
 			if (from == to)
 				*lowlogp = to;
 			continue;
@@ -1314,7 +1315,7 @@ xfs_dir2_leaf_lookup(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number.
 	 */
@@ -1373,17 +1374,17 @@ xfs_dir2_leaf_lookup_int(
 	 * looking to match the name.
 	 */
 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip over stale leaf entries.
 		 */
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Get the new data block number.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		/*
 		 * If it's not the same as the old data block number,
 		 * need to pitch the old one and read the new one.
@@ -1406,7 +1407,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)dbp->data +
-		       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+		       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 		/*
 		 * If it matches then return it.
 		 */
@@ -1471,9 +1472,9 @@ xfs_dir2_leaf_removename(
 	 * Point to the leaf entry, use that to point to the data entry.
 	 */
 	lep = &leaf->ents[index];
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
@@ -1490,7 +1491,7 @@ xfs_dir2_leaf_removename(
 	 */
 	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, lbp);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
 	/*
 	 * Scan the freespace in the data block again if necessary,
@@ -1604,7 +1605,7 @@ xfs_dir2_leaf_replace(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
 	/*
 	 * Put the new inode number in, log it.
@@ -1649,7 +1650,7 @@ xfs_dir2_leaf_search_hash(
 		hashwant = args->hashval;
 	     low <= high; ) {
 		mid = (low + high) >> 1;
-		if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant)
+		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
 			break;
 		if (hash < hashwant)
 			low = mid + 1;
@@ -1660,7 +1661,7 @@ xfs_dir2_leaf_search_hash(
 	 * Found one, back up through all the equal hash values.
 	 */
 	if (hash == hashwant) {
-		while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) {
+		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
 			mid--;
 		}
 	}
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index a1db51b845b..f57ca116241 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -54,8 +54,8 @@ typedef struct xfs_dir2_leaf_hdr {
  * Leaf block entry.
  */
 typedef struct xfs_dir2_leaf_entry {
-	xfs_dahash_t		hashval;	/* hash value of name */
-	xfs_dir2_dataptr_t	address;	/* address of data entry */
+	__be32			hashval;	/* hash value of name */
+	__be32			address;	/* address of data entry */
 } xfs_dir2_leaf_entry_t;
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 40e4a2e2776..062e4a99209 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -223,9 +223,9 @@ xfs_dir2_leafn_add(
 		compact = be16_to_cpu(leaf->hdr.stale) > 1;
 	} else
 		compact = 0;
-	ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
+	ASSERT(index == 0 || be32_to_cpu(leaf->ents[index - 1].hashval) <= args->hashval);
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
-	       INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
+	       be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
 
 	if (args->justcheck)
 		return 0;
@@ -271,7 +271,7 @@ xfs_dir2_leafn_add(
 			 */
 			for (lowstale = index - 1;
 			     lowstale >= 0 &&
-				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[lowstale].address) !=
 				XFS_DIR2_NULL_DATAPTR;
 			     lowstale--)
 				continue;
@@ -282,7 +282,7 @@ xfs_dir2_leafn_add(
 			 */
 			for (highstale = index;
 			     highstale < be16_to_cpu(leaf->hdr.count) &&
-				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				be32_to_cpu(leaf->ents[highstale].address) !=
 				XFS_DIR2_NULL_DATAPTR &&
 				(lowstale < 0 ||
 				 index - lowstale - 1 >= highstale - index);
@@ -296,7 +296,7 @@ xfs_dir2_leafn_add(
 		if (lowstale >= 0 &&
 		    (highstale == be16_to_cpu(leaf->hdr.count) ||
 		     index - lowstale - 1 < highstale - index)) {
-			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(index - lowstale - 1 >= 0);
 			if (index - lowstale - 1 > 0)
@@ -312,7 +312,7 @@ xfs_dir2_leafn_add(
 		 * Shift entries down toward the stale slot.
 		 */
 		else {
-			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			ASSERT(highstale - index >= 0);
 			if (highstale - index > 0)
@@ -328,8 +328,9 @@ xfs_dir2_leafn_add(
 	/*
 	 * Insert the new entry, log everything.
 	 */
-	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, args->blkno, args->index));
+	lep->hashval = cpu_to_be32(args->hashval);
+	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp,
+				args->blkno, args->index));
 	xfs_dir2_leaf_log_header(tp, bp);
 	xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
 	xfs_dir2_leafn_check(dp, bp);
@@ -356,10 +357,10 @@ xfs_dir2_leafn_check(
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
-			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
-			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
+			       be32_to_cpu(leaf->ents[i + 1].hashval));
 		}
-		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 			stale++;
 	}
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -383,7 +384,7 @@ xfs_dir2_leafn_lasthash(
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return 0;
-	return INT_GET(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	return be32_to_cpu(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval);
 }
 
 /*
@@ -456,17 +457,17 @@ xfs_dir2_leafn_lookup_int(
 	 * Loop over leaf entries with the right hash value.
 	 */
 	for (lep = &leaf->ents[index];
-	     index < be16_to_cpu(leaf->hdr.count) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 	     lep++, index++) {
 		/*
 		 * Skip stale leaf entries.
 		 */
-		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 		/*
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
@@ -572,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			dep = (xfs_dir2_data_entry_t *)
 			      ((char *)curbp->data +
-			       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+			       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
 			/*
 			 * Compare the entry, return it if it matches.
 			 */
@@ -672,7 +673,7 @@ xfs_dir2_leafn_moveents(
 		int	i;			/* temp leaf index */
 
 		for (i = start_s, stale = 0; i < start_s + count; i++) {
-			if (INT_GET(leaf_s->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
 				stale++;
 		}
 	} else
@@ -723,9 +724,9 @@ xfs_dir2_leafn_order(
 	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
 	    be16_to_cpu(leaf2->hdr.count) > 0 &&
-	    (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
-	     INT_GET(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval, ARCH_CONVERT) <
-	     INT_GET(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval, ARCH_CONVERT)))
+	    (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
+	     be32_to_cpu(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval) <
+	     be32_to_cpu(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval)))
 		return 1;
 	return 0;
 }
@@ -781,9 +782,9 @@ xfs_dir2_leafn_rebalance(
 		xfs_dahash_t	midhash;	/* middle entry hash value */
 
 		if (mid >= be16_to_cpu(leaf1->hdr.count))
-			midhash = INT_GET(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval, ARCH_CONVERT);
+			midhash = be32_to_cpu(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval);
 		else
-			midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
+			midhash = be32_to_cpu(leaf1->ents[mid].hashval);
 		isleft = args->hashval <= midhash;
 	}
 	/*
@@ -875,9 +876,9 @@ xfs_dir2_leafn_remove(
 	/*
 	 * Extract the data block and offset from the entry.
 	 */
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->blkno == db);
-	off = XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT));
+	off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->index == off);
 	/*
 	 * Kill the leaf entry by marking it stale.
@@ -885,7 +886,7 @@ xfs_dir2_leafn_remove(
 	 */
 	be16_add(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, bp);
-	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, bp, index, index);
 	/*
 	 * Make the data entry free.  Keep track of the longest freespace
@@ -1269,14 +1270,14 @@ xfs_dir2_leafn_unbalance(
 	/*
 	 * Move the entries from drop to the appropriate end of save.
 	 */
-	drop_blk->hashval = INT_GET(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval);
 	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
 			be16_to_cpu(drop_leaf->hdr.count));
 	else
 		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
 			be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count));
-	save_blk->hashval = INT_GET(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval);
 	xfs_dir2_leafn_check(args->dp, save_blk->bp);
 }
 
@@ -1903,7 +1904,7 @@ xfs_dir2_node_replace(
 		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
-		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
+		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
 		ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT));
 		/*
 		 * Fill in the new inode number and log the entry.
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 35dd003051c..d98a41d1fe6 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -99,7 +99,7 @@ xfs_dir2_block_sfsize(
 	 * Iterate over the block's data entries by using the leaf pointers.
 	 */
 	for (i = 0; i < be32_to_cpu(btp->count); i++) {
-		if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+		if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR)
 			continue;
 		/*
 		 * Calculate the pointer to the entry at hand.
-- 
cgit v1.2.3


From 3d693c6ed7892d066e8fb3311c6b74f7699326f9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:27 +1100
Subject: [XFS] endianess annotations for XFS_DIR2_DATA_ENTRY_TAG_P

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25494a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 26 +++++++++++++-------------
 fs/xfs/xfs_dir2_data.c  | 10 +++++-----
 fs/xfs/xfs_dir2_data.h  |  7 +++----
 fs/xfs/xfs_dir2_leaf.c  |  6 +++---
 fs/xfs/xfs_dir2_node.c  |  6 +++---
 5 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fa372b2b433..25d3a04f2e4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -81,7 +81,7 @@ xfs_dir2_block_addname(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			needlog;	/* need to log header */
 	int			needscan;	/* need to rescan freespace */
-	xfs_dir2_data_off_t	*tagp;		/* pointer to tag value */
+	__be16			*tagp;		/* pointer to tag value */
 	xfs_trans_t		*tp;		/* transaction structure */
 
 	xfs_dir2_trace_args("block_addname", args);
@@ -120,11 +120,11 @@ xfs_dir2_block_addname(
 		/*
 		 * Tag just before the first leaf entry.
 		 */
-		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		tagp = (__be16 *)blp - 1;
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		enddup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then can't do this add without cleaning up:
 		 * the space before the first leaf entry needs to be free so it
@@ -183,11 +183,11 @@ xfs_dir2_block_addname(
 		/*
 		 * Tag just before the first leaf entry.
 		 */
-		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		tagp = (__be16 *)blp - 1;
 		/*
 		 * Data object just before the first leaf entry.
 		 */
-		dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 		/*
 		 * If it's not free then the data will go where the
 		 * leaf data starts now, if it works at all.
@@ -404,7 +404,7 @@ xfs_dir2_block_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
 	 */
@@ -896,7 +896,7 @@ xfs_dir2_leaf_to_block(
 	int			needscan;	/* need to scan for bestfree */
 	xfs_dir2_sf_hdr_t	sfh;		/* shortform header */
 	int			size;		/* bytes used */
-	xfs_dir2_data_off_t	*tagp;		/* end of entry (tag) */
+	__be16			*tagp;		/* end of entry (tag) */
 	int			to;		/* block/leaf to index */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
@@ -944,8 +944,8 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Look at the last data entry.
 	 */
-	tagp = (xfs_dir2_data_off_t *)((char *)block + mp->m_dirblksize) - 1;
-	dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+	tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
+	dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
 	/*
 	 * If it's not free or is too short we can't do it.
 	 */
@@ -1044,7 +1044,7 @@ xfs_dir2_sf_to_block(
 	int			offset;		/* target block offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* sf entry pointer */
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
-	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	xfs_dir2_trace_args("sf_to_block", args);
@@ -1134,7 +1134,7 @@ xfs_dir2_sf_to_block(
 	dep->namelen = 1;
 	dep->name[0] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
 	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
@@ -1148,7 +1148,7 @@ xfs_dir2_sf_to_block(
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
 	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
@@ -1198,7 +1198,7 @@ xfs_dir2_sf_to_block(
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
 		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
 					(char *)sfep->name, sfep->namelen));
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 084b134361a..bb3d03ff002 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -134,7 +134,7 @@ xfs_dir2_data_check(
 		dep = (xfs_dir2_data_entry_t *)p;
 		ASSERT(dep->namelen != 0);
 		ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0);
-		ASSERT(INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) ==
+		ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
@@ -376,7 +376,7 @@ xfs_dir2_data_freescan(
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
 			ASSERT((char *)dep - (char *)d ==
-			       INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT));
+			       be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)));
 			p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
 		}
 	}
@@ -549,10 +549,10 @@ xfs_dir2_data_make_free(
 	 * the previous entry and see if it's free.
 	 */
 	if (offset > sizeof(d->hdr)) {
-		xfs_dir2_data_off_t	*tagp;	/* tag just before us */
+		__be16			*tagp;	/* tag just before us */
 
-		tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
-		prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
+		tagp = (__be16 *)((char *)d + offset) - 1;
+		prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
 		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
 			prevdup = NULL;
 	} else
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 479b59f50db..0847cbb53e1 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -134,12 +134,11 @@ static inline int xfs_dir2_data_entsize(int n)
  * Pointer to an entry's tag word.
  */
 #define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
-static inline xfs_dir2_data_off_t *
+static inline __be16 *
 xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
 {
-	return (xfs_dir2_data_off_t *) \
-		 ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
-		  (uint)sizeof(xfs_dir2_data_off_t));
+	return (__be16 *)((char *)dep +
+		XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16));
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 752fc67354e..b5036512d02 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -187,7 +187,7 @@ xfs_dir2_leaf_addname(
 	int			needbytes;	/* leaf block bytes needed */
 	int			needlog;	/* need to log data header */
 	int			needscan;	/* need to rescan data free */
-	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	xfs_dir2_db_t		use_block;	/* data block number */
 
@@ -411,7 +411,7 @@ xfs_dir2_leaf_addname(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	/*
 	 * Need to scan fix up the bestfree table.
 	 */
@@ -533,7 +533,7 @@ xfs_dir2_leaf_addname(
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
 	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
-				INT_GET(*tagp, ARCH_CONVERT)));
+				be16_to_cpu(*tagp)));
 	/*
 	 * Log the leaf fields and give up the buffers.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 062e4a99209..56b7cc89645 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1379,7 +1379,7 @@ xfs_dir2_node_addname_int(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	int			needlog;	/* need to log data header */
 	int			needscan;	/* need to rescan data frees */
-	xfs_dir2_data_off_t	*tagp;		/* data entry tag pointer */
+	__be16			*tagp;		/* data entry tag pointer */
 	xfs_trans_t		*tp;		/* transaction pointer */
 
 	dp = args->dp;
@@ -1699,7 +1699,7 @@ xfs_dir2_node_addname_int(
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
 	 * Rescan the block for bestfree if needed.
@@ -1732,7 +1732,7 @@ xfs_dir2_node_addname_int(
 	 * Return the data block and offset in args, then drop the data block.
 	 */
 	args->blkno = (xfs_dablk_t)dbno;
-	args->index = INT_GET(*tagp, ARCH_CONVERT);
+	args->index = be16_to_cpu(*tagp);
 	xfs_da_buf_done(dbp);
 	return 0;
 }
-- 
cgit v1.2.3


From 89da054424a775b4b257556eda8a300be1134d7c Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:40 +1100
Subject: [XFS] endianess annotations for xfs_da_blkinfo_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25495a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c       |  17 ++--
 fs/xfs/xfs_attr_leaf.c  |  98 ++++++++---------------
 fs/xfs/xfs_da_btree.c   | 206 +++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h   |   8 +-
 fs/xfs/xfs_dir.c        |  16 ++--
 fs/xfs/xfs_dir2_block.c |   2 +-
 fs/xfs/xfs_dir2_leaf.c  |  20 ++---
 fs/xfs/xfs_dir2_node.c  |  28 +++----
 fs/xfs/xfs_dir_leaf.c   |  55 +++++++------
 9 files changed, 205 insertions(+), 245 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index acf1b7c9f1d..36b120d859a 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1127,8 +1127,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 		return(error);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						!= XFS_ATTR_LEAF_MAGIC)) {
+	if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
 		XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
 				     context->dp->i_mount, leaf);
 		xfs_da_brelse(NULL, bp);
@@ -1541,8 +1540,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 						     XFS_ATTR_FORK);
 		if (error)
 			goto out;
-		ASSERT(INT_GET(((xfs_attr_leafblock_t *)
-				      bp->data)->hdr.info.magic, ARCH_CONVERT)
+		ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
+				      bp->data)->hdr.info.magic)
 						       == XFS_ATTR_LEAF_MAGIC);
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
@@ -1763,7 +1762,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 			return(error);
 		if (bp) {
 			node = bp->data;
-			switch (INT_GET(node->hdr.info.magic, ARCH_CONVERT)) {
+			switch (be16_to_cpu(node->hdr.info.magic)) {
 			case XFS_DA_NODE_MAGIC:
 				xfs_attr_trace_l_cn("wrong blk", context, node);
 				xfs_da_brelse(NULL, bp);
@@ -1817,10 +1816,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			node = bp->data;
-			if (INT_GET(node->hdr.info.magic, ARCH_CONVERT)
+			if (be16_to_cpu(node->hdr.info.magic)
 							== XFS_ATTR_LEAF_MAGIC)
 				break;
-			if (unlikely(INT_GET(node->hdr.info.magic, ARCH_CONVERT)
+			if (unlikely(be16_to_cpu(node->hdr.info.magic)
 							!= XFS_DA_NODE_MAGIC)) {
 				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
 						     XFS_ERRLEVEL_LOW,
@@ -1858,7 +1857,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 */
 	for (;;) {
 		leaf = bp->data;
-		if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+		if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
 						!= XFS_ATTR_LEAF_MAGIC)) {
 			XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
 					     XFS_ERRLEVEL_LOW,
@@ -1869,7 +1868,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		error = xfs_attr_leaf_list_int(bp, context);
 		if (error || !leaf->hdr.info.forw)
 			break;	/* not really an error, buffer full or EOF */
-		cursor->blkno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+		cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
 		xfs_da_brelse(NULL, bp);
 		error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
 					      &bp, XFS_ATTR_FORK);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe91eac4e2a..d279d944e03 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -720,8 +720,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 	int bytes, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	entry = &leaf->entries[0];
 	bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -766,8 +765,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	ASSERT(bp != NULL);
 	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
@@ -875,8 +873,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 		goto out;
 	node = bp1->data;
 	leaf = bp2->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval;
@@ -920,7 +917,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	leaf = bp->data;
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
-	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
+	hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused) {
 		INT_SET(hdr->firstused, ARCH_CONVERT,
@@ -1004,8 +1001,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	int tablesize, entsize, sum, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT((args->index >= 0)
 		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
 	hdr = &leaf->hdr;
@@ -1079,8 +1075,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	int tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
 	ASSERT((args->index >= 0)
@@ -1279,10 +1274,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	args = state->args;
 
 	/*
@@ -1566,7 +1559,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 	leaf = (xfs_attr_leafblock_t *)info;
 	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1588,7 +1581,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1610,13 +1603,12 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * to shrink an attribute list over time.
 	 */
 	/* start with smaller blk num */
-	forward = (INT_GET(info->forw, ARCH_CONVERT)
-					< INT_GET(info->back, ARCH_CONVERT));
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -1630,8 +1622,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
 		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1685,8 +1676,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_mount_t *mp;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
 	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0)
@@ -1859,10 +1849,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	drop_hdr = &drop_leaf->hdr;
 	save_hdr = &save_leaf->hdr;
 
@@ -1972,8 +1960,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_dahash_t hashval;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 
@@ -2090,8 +2077,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 	ASSERT(args->index < ((int)INT_GET(leaf->hdr.count, ARCH_CONVERT)));
@@ -2159,10 +2145,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Set up environment.
 	 */
-	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
@@ -2301,10 +2285,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC) &&
-	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC));
+	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
+	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
 	if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
 	    && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
 	    && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
@@ -2327,8 +2309,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_attr_leafblock_t *leaf;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (count)
 		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	if (!leaf->hdr.count)
@@ -2348,8 +2329,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 	xfs_attr_leaf_name_remote_t *name_rmt;
 	int size;
 
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
@@ -2596,8 +2576,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2663,8 +2642,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 	ASSERT(bp != NULL);
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
@@ -2736,15 +2714,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	}
 
 	leaf1 = bp1->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index < INT_GET(leaf1->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
 	leaf2 = bp2->data;
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(args->index2 < INT_GET(leaf2->hdr.count, ARCH_CONVERT));
 	ASSERT(args->index2 >= 0);
 	entry2 = &leaf2->entries[ args->index2 ];
@@ -2842,9 +2818,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
 	 * This is a depth-first traversal!
 	 */
 	info = bp->data;
-	if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+	if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 		error = xfs_attr_node_inactive(trans, dp, bp, 1);
-	} else if (INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+	} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 		error = xfs_attr_leaf_inactive(trans, dp, bp);
 	} else {
 		error = XFS_ERROR(EIO);
@@ -2892,8 +2868,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 	}
 
 	node = bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT)
-						== XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
 	count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!count) {
@@ -2927,12 +2902,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 			 * Invalidate the subtree, however we have to.
 			 */
 			info = child_bp->data;
-			if (INT_GET(info->magic, ARCH_CONVERT)
-							== XFS_DA_NODE_MAGIC) {
+			if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
 				error = xfs_attr_node_inactive(trans, dp,
 						child_bp, level+1);
-			} else if (INT_GET(info->magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC) {
+			} else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
 				error = xfs_attr_leaf_inactive(trans, dp,
 						child_bp);
 			} else {
@@ -2991,8 +2964,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	int error, count, size, tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
-						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 
 	/*
 	 * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 31796c7faba..4f3bb1cb961 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -126,7 +126,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	node = bp->data;
 	node->hdr.info.forw = 0;
 	node->hdr.info.back = 0;
-	INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
+	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
 	node->hdr.info.pad = 0;
 	node->hdr.count = 0;
 	INT_SET(node->hdr.level, ARCH_CONVERT, level);
@@ -290,28 +290,28 @@ xfs_da_split(xfs_da_state_t *state)
 
 	node = oldblk->bp->data;
 	if (node->hdr.info.forw) {
-		if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
+		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
 			bp = addblk->bp;
 		} else {
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
 		node = bp->data;
-		INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
+		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
 		xfs_da_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
 	}
 	node = oldblk->bp->data;
-	if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
-		if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
+	if (node->hdr.info.back) {
+		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
 			bp = addblk->bp;
 		} else {
 			ASSERT(state->extravalid);
 			bp = state->extrablk.bp;
 		}
 		node = bp->data;
-		INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
+		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
 		xfs_da_log_buf(state->args->trans, bp,
 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
 		    sizeof(node->hdr.info)));
@@ -359,12 +359,12 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(bp != NULL);
 	node = bp->data;
 	oldroot = blk1->bp->data;
-	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
 		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
 			     (char *)oldroot);
 	} else {
 		ASSERT(XFS_DIR_IS_V2(mp));
-		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
 			     (char *)leaf);
@@ -392,7 +392,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 
 #ifdef DEBUG
-	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
 		       blk1->blkno < mp->m_dirfreeblk);
 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -424,7 +424,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	int useextra;
 
 	node = oldblk->bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 
 	/*
 	 * With V2 the extra block is data or freespace.
@@ -524,8 +524,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		node1 = node2;
 		node2 = tmpnode;
 	}
-	ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
-	ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
 	if (count == 0)
 		return;
@@ -622,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 	node = oldblk->bp->data;
 	mp = state->mp;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
@@ -768,7 +768,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(args != NULL);
 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
 	oldroot = root_blk->bp->data;
-	ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT(!oldroot->hdr.info.forw);
 	ASSERT(!oldroot->hdr.info.back);
 
@@ -791,10 +791,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
 	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
-		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
-		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
 	}
 	ASSERT(!blkinfo->forw);
 	ASSERT(!blkinfo->back);
@@ -830,7 +830,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
 	node = (xfs_da_intnode_t *)info;
 	count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (count > (state->node_ents >> 1)) {
@@ -849,7 +849,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -871,13 +871,12 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	 * to shrink a directory over time.
 	 */
 	/* start with smaller blk num */
-	forward = (INT_GET(info->forw, ARCH_CONVERT)
-				< INT_GET(info->back, ARCH_CONVERT));
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -891,7 +890,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		count -= state->node_ents >> 2;
 		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 		node = bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 		xfs_da_brelse(state->args->trans, bp);
 		if (count >= 0)
@@ -973,7 +972,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 	}
 	for (blk--, level--; level >= 0; blk--, level--) {
 		node = blk->bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		btree = &node->btree[ blk->index ];
 		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
 			break;
@@ -1041,8 +1040,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
 	drop_node = drop_blk->bp->data;
 	save_node = save_blk->bp->data;
-	ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
-	ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	tp = state->args->trans;
 
 	/*
@@ -1138,15 +1137,15 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			return(error);
 		}
 		curr = blk->bp->data;
-		ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
-		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
+		       be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
 
 		/*
 		 * Search an intermediate node for a match.
 		 */
-		blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
-		if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+		blk->magic = be16_to_cpu(curr->magic);
+		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
 			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
@@ -1193,15 +1192,15 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 				blkno = INT_GET(btree->before, ARCH_CONVERT);
 			}
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) {
 			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
-		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+		else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
 			break;
 		}
@@ -1274,8 +1273,8 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
 	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
-	ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
+	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
+	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
 	ASSERT(old_blk->magic == new_blk->magic);
 
 	switch (old_blk->magic) {
@@ -1302,47 +1301,44 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 		/*
 		 * Link new block in before existing block.
 		 */
-		INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
-		new_info->back = old_info->back; /* INT_: direct copy */
-		if (INT_GET(old_info->back, ARCH_CONVERT)) {
+		new_info->forw = cpu_to_be32(old_blk->blkno);
+		new_info->back = old_info->back;
+		if (old_info->back) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(old_info->back,
-							ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(old_info->back),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
-			INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
+			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
+			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
+			tmp_info->forw = cpu_to_be32(new_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 			xfs_da_buf_done(bp);
 		}
-		INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
+		old_info->back = cpu_to_be32(new_blk->blkno);
 	} else {
 		/*
 		 * Link new block in after existing block.
 		 */
-		new_info->forw = old_info->forw; /* INT_: direct copy */
-		INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
-		if (INT_GET(old_info->forw, ARCH_CONVERT)) {
+		new_info->forw = old_info->forw;
+		new_info->back = cpu_to_be32(old_blk->blkno);
+		if (old_info->forw) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(old_info->forw),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
-				    == INT_GET(old_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
-				    == old_blk->blkno);
-			INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
+			ASSERT(tmp_info->magic == old_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
+			tmp_info->back = cpu_to_be32(new_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 			xfs_da_buf_done(bp);
 		}
-		INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
+		old_info->forw = cpu_to_be32(new_blk->blkno);
 	}
 
 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
@@ -1360,8 +1356,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 
 	node1 = node1_bp->data;
 	node2 = node2_bp->data;
-	ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
-	       (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
+	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
+	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
 	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
 	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
@@ -1381,7 +1377,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_da_intnode_t *node;
 
 	node = bp->data;
-	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	if (count)
 		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!node->hdr.count)
@@ -1411,50 +1407,47 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
 	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
-	ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
+	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
+	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
 	ASSERT(save_blk->magic == drop_blk->magic);
-	ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
-	       (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
-	ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
-	       (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
+	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
+	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
+	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
+	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
 
 	/*
 	 * Unlink the leaf block from the doubly linked chain of leaves.
 	 */
-	if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
-		save_info->back = drop_info->back; /* INT_: direct copy */
-		if (INT_GET(drop_info->back, ARCH_CONVERT)) {
+	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
+		save_info->back = drop_info->back;
+		if (drop_info->back) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(drop_info->back,
-							ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(drop_info->back),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
-			INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
+			ASSERT(tmp_info->magic == save_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
+			tmp_info->forw = cpu_to_be32(save_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
 			xfs_da_buf_done(bp);
 		}
 	} else {
-		save_info->forw = drop_info->forw; /* INT_: direct copy */
-		if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
+		save_info->forw = drop_info->forw;
+		if (drop_info->forw) {
 			error = xfs_da_read_buf(args->trans, args->dp,
-						INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
-						args->whichfork);
+						be32_to_cpu(drop_info->forw),
+						-1, &bp, args->whichfork);
 			if (error)
 				return(error);
 			ASSERT(bp != NULL);
 			tmp_info = bp->data;
-			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
-				    == INT_GET(save_info->magic, ARCH_CONVERT));
-			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
-				    == drop_blk->blkno);
-			INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
+			ASSERT(tmp_info->magic == save_info->magic);
+			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
+			tmp_info->back = cpu_to_be32(save_blk->blkno);
 			xfs_da_log_buf(args->trans, bp, 0,
 						    sizeof(*tmp_info) - 1);
 			xfs_da_buf_done(bp);
@@ -1497,7 +1490,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
 		ASSERT(blk->bp != NULL);
 		node = blk->bp->data;
-		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 			blk->index++;
 			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
@@ -1536,11 +1529,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 			return(error);
 		ASSERT(blk->bp != NULL);
 		info = blk->bp->data;
-		ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
-		       INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
-		       INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
-		blk->magic = INT_GET(info->magic, ARCH_CONVERT);
-		if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
+		       be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+		blk->magic = be16_to_cpu(info->magic);
+		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
 			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 			if (forward)
@@ -1788,19 +1781,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Get values from the moved block.
 	 */
-	if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+	if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) {
 		ASSERT(XFS_DIR_IS_V1(mp));
 		dead_leaf = (xfs_dir_leafblock_t *)dead_info;
 		dead_level = 0;
 		dead_hash =
 			INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
-	} else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+	} else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
 		ASSERT(XFS_DIR_IS_V2(mp));
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
 	} else {
-		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
 		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
 		dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
@@ -1809,19 +1802,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * If the moved block has a left sibling, fix up the pointers.
 	 */
-	if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
+	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
 		sib_info = sib_buf->data;
 		if (unlikely(
-		    INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
-		    INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
+		    be32_to_cpu(sib_info->forw) != last_blkno ||
+		    sib_info->magic != dead_info->magic)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
+		sib_info->forw = cpu_to_be32(dead_blkno);
 		xfs_da_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
 					sizeof(sib_info->forw)));
@@ -1831,20 +1824,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * If the moved block has a right sibling, fix up the pointers.
 	 */
-	if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
+	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 			goto done;
 		sib_info = sib_buf->data;
 		if (unlikely(
-		       INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
-		    || INT_GET(sib_info->magic, ARCH_CONVERT)
-				!= INT_GET(dead_info->magic, ARCH_CONVERT))) {
+		       be32_to_cpu(sib_info->back) != last_blkno ||
+		       sib_info->magic != dead_info->magic)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
+		sib_info->back = cpu_to_be32(dead_blkno);
 		xfs_da_log_buf(tp, sib_buf,
 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
 					sizeof(sib_info->back)));
@@ -1861,7 +1853,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		par_node = par_buf->data;
 		if (unlikely(
-		    INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
 		    (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -1898,7 +1890,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			continue;
 		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
 			break;
-		par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
+		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
 		xfs_da_brelse(tp, par_buf);
 		par_buf = NULL;
 		if (unlikely(par_blkno == 0)) {
@@ -1912,7 +1904,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_node = par_buf->data;
 		if (unlikely(
 		    INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
-		    INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
+		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
@@ -2203,7 +2195,7 @@ xfs_da_do_buf(
 		info = rbp->data;
 		data = rbp->data;
 		free = rbp->data;
-		magic = INT_GET(info->magic, ARCH_CONVERT);
+		magic = be16_to_cpu(info->magic);
 		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 41352113721..e727bf456a1 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -45,10 +45,10 @@ struct zone;
 	(XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
 
 typedef struct xfs_da_blkinfo {
-	xfs_dablk_t forw;			/* previous block in list */
-	xfs_dablk_t back;			/* following block in list */
-	__uint16_t magic;			/* validity check on block */
-	__uint16_t pad;				/* unused */
+	__be32		forw;			/* previous block in list */
+	__be32		back;			/* following block in list */
+	__be16		magic;			/* validity check on block */
+	__be16		pad;			/* unused */
 } xfs_da_blkinfo_t;
 
 /*
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index bb87d2a700a..7a708e993db 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -634,7 +634,7 @@ xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
 		return(retval);
 	ASSERT(bp != NULL);
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
 	if (retval == EEXIST) {
 		(void)xfs_dir_leaf_remove(args->trans, bp, index);
@@ -912,7 +912,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			return(error);
 		if (bp)
 			leaf = bp->data;
-		if (bp && INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+		if (bp && be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
 			xfs_dir_trace_g_dub("node: block not a leaf",
 						   dp, uio, bno);
 			xfs_da_brelse(trans, bp);
@@ -949,7 +949,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			if (bp == NULL)
 				return(XFS_ERROR(EFSCORRUPTED));
 			node = bp->data;
-			if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+			if (be16_to_cpu(node->hdr.info.magic) != XFS_DA_NODE_MAGIC)
 				break;
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
@@ -982,7 +982,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 	 */
 	for (;;) {
 		leaf = bp->data;
-		if (unlikely(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)) {
+		if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC)) {
 			xfs_dir_trace_g_dul("node: not a leaf", dp, uio, leaf);
 			xfs_da_brelse(trans, bp);
 			XFS_CORRUPTION_ERROR("xfs_dir_node_getdents(1)",
@@ -990,7 +990,7 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			return XFS_ERROR(EFSCORRUPTED);
 		}
 		xfs_dir_trace_g_dul("node: leaf detail", dp, uio, leaf);
-		if ((nextbno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT))) {
+		if ((nextbno = be32_to_cpu(leaf->hdr.info.forw))) {
 			nextda = xfs_da_reada_buf(trans, dp, nextbno,
 						  XFS_DATA_FORK);
 		} else
@@ -1125,8 +1125,7 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)((unsigned long)(uio->uio_offset >> 32)),
 		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
 		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)
-			INT_GET(node->hdr.info.forw, ARCH_CONVERT),
+		     (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
 		     (void *)(unsigned long)
 			INT_GET(node->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
@@ -1150,8 +1149,7 @@ xfs_dir_trace_g_dul(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)((unsigned long)(uio->uio_offset >> 32)),
 		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
 		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)
-			INT_GET(leaf->hdr.info.forw, ARCH_CONVERT),
+		     (void *)(unsigned long)be32_to_cpu(leaf->hdr.info.forw),
 		     (void *)(unsigned long)
 			INT_GET(leaf->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 25d3a04f2e4..aaf5644d8fd 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -905,7 +905,7 @@ xfs_dir2_leaf_to_block(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = lbp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	/*
 	 * If there are data blocks other than the first one, take this
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index b5036512d02..08648b18265 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -564,7 +564,7 @@ xfs_dir2_leaf_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	/*
 	 * This value is not restrictive enough.
 	 * Should factor in the size of the bests table as well.
@@ -1172,7 +1172,7 @@ xfs_dir2_leaf_init(
 	/*
 	 * Initialize the header.
 	 */
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic);
+	leaf->hdr.info.magic = cpu_to_be16(magic);
 	leaf->hdr.info.forw = 0;
 	leaf->hdr.info.back = 0;
 	leaf->hdr.count = 0;
@@ -1208,7 +1208,7 @@ xfs_dir2_leaf_log_bests(
 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
 	firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first;
 	lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last;
@@ -1231,8 +1231,8 @@ xfs_dir2_leaf_log_ents(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
-	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	firstlep = &leaf->ents[first];
 	lastlep = &leaf->ents[last];
 	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1250,8 +1250,8 @@ xfs_dir2_leaf_log_header(
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
-	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
 		(uint)(sizeof(leaf->hdr) - 1));
 }
@@ -1270,7 +1270,7 @@ xfs_dir2_leaf_log_tail(
 
 	mp = tp->t_mountp;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
@@ -1806,7 +1806,7 @@ xfs_dir2_node_to_leaf(
 		return 0;
 	lbp = state->path.blk[0].bp;
 	leaf = lbp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Read the freespace block.
 	 */
@@ -1837,7 +1837,7 @@ xfs_dir2_node_to_leaf(
 		xfs_dir2_leaf_compact(args, lbp);
 	else
 		xfs_dir2_leaf_log_header(tp, lbp);
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC);
+	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
 	/*
 	 * Set up the leaf tail from the freespace block.
 	 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56b7cc89645..af556f16a0c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -164,7 +164,7 @@ xfs_dir2_leaf_to_node(
 		*to = cpu_to_be16(off);
 	}
 	free->hdr.nused = cpu_to_be32(n);
-	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Log everything.
 	 */
@@ -353,7 +353,7 @@ xfs_dir2_leafn_check(
 
 	leaf = bp->data;
 	mp = dp->i_mount;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
@@ -379,7 +379,7 @@ xfs_dir2_leafn_lasthash(
 	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
@@ -420,7 +420,7 @@ xfs_dir2_leafn_lookup_int(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 #ifdef __KERNEL__
 	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -720,8 +720,8 @@ xfs_dir2_leafn_order(
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	if (be16_to_cpu(leaf1->hdr.count) > 0 &&
 	    be16_to_cpu(leaf2->hdr.count) > 0 &&
 	    (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -868,7 +868,7 @@ xfs_dir2_leafn_remove(
 	tp = args->trans;
 	mp = dp->i_mount;
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * Point to the entry we're removing.
 	 */
@@ -1139,7 +1139,7 @@ xfs_dir2_leafn_toosmall(
 	 */
 	blk = &state->path.blk[state->path.active - 1];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
 	leaf = (xfs_dir2_leaf_t *)info;
 	count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1161,7 +1161,7 @@ xfs_dir2_leafn_toosmall(
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
 			&rval);
@@ -1177,9 +1177,9 @@ xfs_dir2_leafn_toosmall(
 	 * We prefer coalescing with the lower numbered sibling so as
 	 * to shrink a directory over time.
 	 */
-	forward = INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT);
+	forward = be32_to_cpu(info->forw) < be32_to_cpu(info->back);
 	for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
-		blkno = forward ?INT_GET( info->forw, ARCH_CONVERT) : INT_GET(info->back, ARCH_CONVERT);
+		blkno = forward ? be32_to_cpu(info->forw) : be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		/*
@@ -1198,7 +1198,7 @@ xfs_dir2_leafn_toosmall(
 		count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes = state->blocksize - (state->blocksize >> 2);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
 		bytes -= count * (uint)sizeof(leaf->ents[0]);
 		/*
@@ -1257,8 +1257,8 @@ xfs_dir2_leafn_unbalance(
 	ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 	/*
 	 * If there are any stale leaf entries, take this opportunity
 	 * to purge them.
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index e83074016ab..0b2eca590b3 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -644,7 +644,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 	ASSERT(bp != NULL);
 	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
 	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
 
 	/*
@@ -742,7 +742,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	}
 	node = bp1->data;
 	leaf = bp2->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
 	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
@@ -781,7 +781,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	leaf = bp->data;
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
-	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
+	hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC);
 	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused)
 		INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
@@ -860,7 +860,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
 	int tablesize, entsize, sum, i, tmp, error;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
 	hdr = &leaf->hdr;
 	entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
@@ -940,7 +940,7 @@ xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
 	int tmp, i;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
 	ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
@@ -1097,8 +1097,8 @@ xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
 	leaf1 = blk1->bp->data;
 	leaf2 = blk2->bp->data;
-	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 
 	/*
 	 * Check ordering of blocks, reverse if it makes things simpler.
@@ -1325,7 +1325,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 */
 	blk = &state->path.blk[ state->path.active-1 ];
 	info = blk->bp->data;
-	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC);
 	leaf = (xfs_dir_leafblock_t *)info;
 	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
@@ -1348,7 +1348,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		 * Make altpath point to the block we want to keep and
 		 * path point to the block we want to drop (this one).
 		 */
-		forward = info->forw;
+		forward = (info->forw != 0);
 		memcpy(&state->altpath, &state->path, sizeof(state->path));
 		error = xfs_da_path_shift(state, &state->altpath, forward,
 						 0, &retval);
@@ -1369,12 +1369,12 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	 * We prefer coalescing with the lower numbered sibling so as
 	 * to shrink a directory over time.
 	 */
-	forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT));	/* start with smaller blk num */
+	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));	/* start with smaller blk num */
 	for (i = 0; i < 2; forward = !forward, i++) {
 		if (forward)
-			blkno = INT_GET(info->forw, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->forw);
 		else
-			blkno = INT_GET(info->back, ARCH_CONVERT);
+			blkno = be32_to_cpu(info->back);
 		if (blkno == 0)
 			continue;
 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
@@ -1389,7 +1389,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 		bytes  = state->blocksize - (state->blocksize>>2);
 		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
 		leaf = bp->data;
-		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
 		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
 		bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
@@ -1447,7 +1447,7 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
 	xfs_mount_t *mp;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = trans->t_mountp;
 	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
@@ -1599,8 +1599,8 @@ xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
 	drop_leaf = drop_blk->bp->data;
 	save_leaf = save_blk->bp->data;
-	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	drop_hdr = &drop_leaf->hdr;
 	save_hdr = &save_leaf->hdr;
 
@@ -1695,7 +1695,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
 	xfs_dahash_t hashval;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
 
 	/*
@@ -1782,8 +1782,8 @@ xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Set up environment.
 	 */
-	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
@@ -1883,8 +1883,8 @@ xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 
 	leaf1 = leaf1_bp->data;
 	leaf2 = leaf2_bp->data;
-	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) &&
-	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC));
+	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) &&
+	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC));
 	if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) &&
 	    ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
 	      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
@@ -1904,7 +1904,7 @@ xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	xfs_dir_leafblock_t *leaf;
 
 	leaf = bp->data;
-	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	if (count)
 		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
 	if (!leaf->hdr.count)
@@ -1940,7 +1940,7 @@ xfs_dir_leaf_getdents_int(
 
 	mp = dp->i_mount;
 	leaf = bp->data;
-	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+	if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
 		*eobp = 1;
 		return XFS_ERROR(ENOENT);	/* XXX wrong code */
 	}
@@ -1992,7 +1992,7 @@ xfs_dir_leaf_getdents_int(
 
 	if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
 		xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
-		if (!INT_GET(leaf->hdr.info.forw, ARCH_CONVERT))
+		if (!leaf->hdr.info.forw)
 			uio->uio_offset =
 				XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
 		/*
@@ -2047,8 +2047,7 @@ xfs_dir_leaf_getdents_int(
 			xfs_dir_trace_g_duc("leaf: middle cookie  ",
 						   dp, uio, p.cook.o);
 
-		} else if ((thishash = INT_GET(leaf->hdr.info.forw,
-							ARCH_CONVERT))) {
+		} else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) {
 			xfs_dabuf_t *bp2;
 			xfs_dir_leafblock_t *leaf2;
 
@@ -2064,9 +2063,9 @@ xfs_dir_leaf_getdents_int(
 			leaf2 = bp2->data;
 
 			if (unlikely(
-			       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+			       (be16_to_cpu(leaf2->hdr.info.magic)
 						!= XFS_DIR_LEAF_MAGIC)
-			    || (INT_GET(leaf2->hdr.info.back, ARCH_CONVERT)
+			    || (be32_to_cpu(leaf2->hdr.info.back)
 						!= bno))) {	/* GROT */
 				XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
 						     XFS_ERRLEVEL_LOW, mp,
-- 
cgit v1.2.3


From 8f44e047a044df613bbc29837b9556e0c2e42e6b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:47 +1100
Subject: [XFS] remove bogus INT_GET on u8 variables in xfs_dir2_block.c

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25496a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir2_block.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index aaf5644d8fd..bd5cee6aa51 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1101,7 +1101,7 @@ xfs_dir2_sf_to_block(
 	 * Compute size of block "tail" area.
 	 */
 	i = (uint)sizeof(*btp) +
-	    (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+	    (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
 	/*
 	 * The whole thing is initialized to free by the init routine.
 	 * Say we're using the leaf and tail area.
@@ -1115,7 +1115,7 @@ xfs_dir2_sf_to_block(
 	 * Fill in the tail.
 	 */
 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	btp->count = cpu_to_be32(INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
+	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
 	btp->stale = 0;
 	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
@@ -1157,7 +1157,7 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Loop over existing entries, stuff them in.
 	 */
-	if ((i = 0) == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+	if ((i = 0) == sfp->hdr.count)
 		sfep = NULL;
 	else
 		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
@@ -1205,7 +1205,7 @@ xfs_dir2_sf_to_block(
 		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
-		if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+		if (++i == sfp->hdr.count)
 			sfep = NULL;
 		else
 			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
-- 
cgit v1.2.3


From 918ae424e18666249cf32f16ba2803061bf1ebb7 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:28:54 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25497a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |   7 +-
 fs/xfs/xfs_attr_leaf.c | 414 ++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_leaf.h |  14 +-
 3 files changed, 198 insertions(+), 237 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36b120d859a..4a3f3cf6b20 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1772,8 +1772,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				leaf = bp->data;
 				if (cursor->hashval >
 				    INT_GET(leaf->entries[
-					 INT_GET(leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
+					be16_to_cpu(leaf->hdr.count)-1].hashval,
 							ARCH_CONVERT)) {
 					xfs_attr_trace_l_cl("wrong blk",
 							   context, leaf);
@@ -2289,9 +2288,9 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(leaf->hdr.count, ARCH_CONVERT),
+		(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
 		(__psunsigned_t)INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval, ARCH_CONVERT));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d279d944e03..31b55e58e2c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -724,7 +724,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 
 	entry = &leaf->entries[0];
 	bytes = sizeof(struct xfs_attr_sf_hdr);
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
 			continue;		/* don't copy partial entries */
 		if (!(entry->flags & XFS_ATTR_LOCAL))
@@ -808,7 +808,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	nargs.trans = args->trans;
 	nargs.oknoent = 1;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
 			continue;	/* don't copy partial entries */
 		if (!entry->nameidx)
@@ -876,7 +876,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
-		leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval;
+		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
 	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
@@ -918,18 +918,15 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
 	hdr = &leaf->hdr;
 	hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
-	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+	hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount));
 	if (!hdr->firstused) {
-		INT_SET(hdr->firstused, ARCH_CONVERT,
+		hdr->firstused = cpu_to_be16(
 			XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
 	}
 
-	INT_SET(hdr->freemap[0].base, ARCH_CONVERT,
-						sizeof(xfs_attr_leaf_hdr_t));
-	INT_SET(hdr->freemap[0].size, ARCH_CONVERT,
-					  INT_GET(hdr->firstused, ARCH_CONVERT)
-					- INT_GET(hdr->freemap[0].base,
-								ARCH_CONVERT));
+	hdr->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
+					   sizeof(xfs_attr_leaf_hdr_t));
 
 	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
 
@@ -1003,7 +1000,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	ASSERT((args->index >= 0)
-		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+		&& (args->index <= be16_to_cpu(leaf->hdr.count)));
 	hdr = &leaf->hdr;
 	entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 			   args->trans->t_mountp->m_sb.sb_blocksize, NULL);
@@ -1012,26 +1009,25 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Search through freemap for first-fit on new name length.
 	 * (may need to figure in size of entry struct too)
 	 */
-	tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1)
+	tablesize = (be16_to_cpu(hdr->count) + 1)
 					* sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1];
 	for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
-		if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
-			sum += INT_GET(map->size, ARCH_CONVERT);
+		if (tablesize > be16_to_cpu(hdr->firstused)) {
+			sum += be16_to_cpu(map->size);
 			continue;
 		}
 		if (!map->size)
 			continue;	/* no space in this map */
 		tmp = entsize;
-		if (INT_GET(map->base, ARCH_CONVERT)
-				< INT_GET(hdr->firstused, ARCH_CONVERT))
+		if (be16_to_cpu(map->base) < be16_to_cpu(hdr->firstused))
 			tmp += sizeof(xfs_attr_leaf_entry_t);
-		if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+		if (be16_to_cpu(map->size) >= tmp) {
 			tmp = xfs_attr_leaf_add_work(bp, args, i);
 			return(tmp);
 		}
-		sum += INT_GET(map->size, ARCH_CONVERT);
+		sum += be16_to_cpu(map->size);
 	}
 
 	/*
@@ -1052,7 +1048,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * After compaction, the block is guaranteed to have only one
 	 * free region, in freemap[0].  If it is not big enough, give up.
 	 */
-	if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT)
+	if (be16_to_cpu(hdr->freemap[0].size)
 				< (entsize + sizeof(xfs_attr_leaf_entry_t)))
 		return(XFS_ERROR(ENOSPC));
 
@@ -1078,40 +1074,39 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
-	ASSERT((args->index >= 0)
-		&& (args->index <= INT_GET(hdr->count, ARCH_CONVERT)));
+	ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
 
 	/*
 	 * Force open some space in the entry array and fill it in.
 	 */
 	entry = &leaf->entries[args->index];
-	if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
+	if (args->index < be16_to_cpu(hdr->count)) {
+		tmp  = be16_to_cpu(hdr->count) - args->index;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		memmove((char *)(entry+1), (char *)entry, tmp);
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
-	INT_MOD(hdr->count, ARCH_CONVERT, 1);
+	be16_add(&hdr->count, 1);
 
 	/*
 	 * Allocate space for the new string (at the end of the run).
 	 */
 	map = &hdr->freemap[mapindex];
 	mp = args->trans->t_mountp;
-	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0);
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) >=
+	ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
+	ASSERT((be16_to_cpu(map->base) & 0x3) == 0);
+	ASSERT(be16_to_cpu(map->size) >=
 		xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					 mp->m_sb.sb_blocksize, NULL));
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0);
-	INT_MOD(map->size, ARCH_CONVERT,
+	ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
+	ASSERT((be16_to_cpu(map->size) & 0x3) == 0);
+	be16_add(&map->size,
 		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					  mp->m_sb.sb_blocksize, &tmp));
 	INT_SET(entry->nameidx, ARCH_CONVERT,
-					INT_GET(map->base, ARCH_CONVERT)
-				      + INT_GET(map->size, ARCH_CONVERT));
+					be16_to_cpu(map->base)
+				      + be16_to_cpu(map->size));
 	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
 	entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
@@ -1128,7 +1123,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
 						>= INT_GET((entry-1)->hashval,
 							    ARCH_CONVERT)));
-	ASSERT((args->index == INT_GET(hdr->count, ARCH_CONVERT)-1) ||
+	ASSERT((args->index == be16_to_cpu(hdr->count)-1) ||
 	       (INT_GET(entry->hashval, ARCH_CONVERT)
 			    <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
 
@@ -1167,27 +1162,23 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	 * Update the control info for this leaf node
 	 */
 	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				< INT_GET(hdr->firstused, ARCH_CONVERT)) {
+				< be16_to_cpu(hdr->firstused)) {
 		/* both on-disk, don't endian-flip twice */
 		hdr->firstused = entry->nameidx;
 	}
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
-				>= ((INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(*entry))+sizeof(*hdr)));
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1)
-					* sizeof(xfs_attr_leaf_entry_t)
+	ASSERT(be16_to_cpu(hdr->firstused) >=
+	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
+	tmp = (be16_to_cpu(hdr->count)-1) * sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[0];
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
-		if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
-			INT_MOD(map->base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_entry_t));
-			INT_MOD(map->size, ARCH_CONVERT,
-					-sizeof(xfs_attr_leaf_entry_t));
+		if (be16_to_cpu(map->base) == tmp) {
+			be16_add(&map->base, sizeof(xfs_attr_leaf_entry_t));
+			be16_add(&map->size,
+				 -((int)sizeof(xfs_attr_leaf_entry_t)));
 		}
 	}
-	INT_MOD(hdr->usedbytes, ARCH_CONVERT,
-				xfs_attr_leaf_entsize(leaf, args->index));
+	be16_add(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
 	xfs_da_log_buf(args->trans, bp,
 		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
 	return(0);
@@ -1218,28 +1209,25 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
 	hdr_d->info = hdr_s->info;	/* struct copy */
-	INT_SET(hdr_d->firstused, ARCH_CONVERT, XFS_LBSIZE(mp));
+	hdr_d->firstused = cpu_to_be16(XFS_LBSIZE(mp));
 	/* handle truncation gracefully */
 	if (!hdr_d->firstused) {
-		INT_SET(hdr_d->firstused, ARCH_CONVERT,
+		hdr_d->firstused = cpu_to_be16(
 				XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
 	}
 	hdr_d->usedbytes = 0;
 	hdr_d->count = 0;
 	hdr_d->holes = 0;
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_hdr_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
-				INT_GET(hdr_d->firstused, ARCH_CONVERT)
-			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
+					     sizeof(xfs_attr_leaf_hdr_t));
 
 	/*
 	 * Copy all entry's in the same (sorted) order,
 	 * but allocate name/value pairs packed and in sequence.
 	 */
 	xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
-				(int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
-
+				be16_to_cpu(hdr_s->count), mp);
 	xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
 
 	kmem_free(tmpbuffer, XFS_LBSIZE(mp));
@@ -1312,22 +1300,21 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	/*
 	 * Move any entries required from leaf to leaf:
 	 */
-	if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+	if (count < be16_to_cpu(hdr1->count)) {
 		/*
 		 * Figure the total bytes to be added to the destination leaf.
 		 */
 		/* number entries being moved */
-		count = INT_GET(hdr1->count, ARCH_CONVERT) - count;
-		space  = INT_GET(hdr1->usedbytes, ARCH_CONVERT) - totallen;
+		count = be16_to_cpu(hdr1->count) - count;
+		space  = be16_to_cpu(hdr1->usedbytes) - totallen;
 		space += count * sizeof(xfs_attr_leaf_entry_t);
 
 		/*
 		 * leaf2 is the destination, compact it if it looks tight.
 		 */
-		max  = INT_GET(hdr2->firstused, ARCH_CONVERT)
+		max  = be16_to_cpu(hdr2->firstused)
 						- sizeof(xfs_attr_leaf_hdr_t);
-		max -= INT_GET(hdr2->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
+		max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
 		if (space > max) {
 			xfs_attr_leaf_compact(args->trans, blk2->bp);
 		}
@@ -1335,13 +1322,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		/*
 		 * Move high entries from leaf1 to low end of leaf2.
 		 */
-		xfs_attr_leaf_moveents(leaf1,
-				INT_GET(hdr1->count, ARCH_CONVERT)-count,
+		xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
 				leaf2, 0, count, state->mp);
 
 		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
 		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
-	} else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+	} else if (count > be16_to_cpu(hdr1->count)) {
 		/*
 		 * I assert that since all callers pass in an empty
 		 * second buffer, this code should never execute.
@@ -1351,17 +1337,16 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Figure the total bytes to be added to the destination leaf.
 		 */
 		/* number entries being moved */
-		count -= INT_GET(hdr1->count, ARCH_CONVERT);
-		space  = totallen - INT_GET(hdr1->usedbytes, ARCH_CONVERT);
+		count -= be16_to_cpu(hdr1->count);
+		space  = totallen - be16_to_cpu(hdr1->usedbytes);
 		space += count * sizeof(xfs_attr_leaf_entry_t);
 
 		/*
 		 * leaf1 is the destination, compact it if it looks tight.
 		 */
-		max  = INT_GET(hdr1->firstused, ARCH_CONVERT)
+		max  = be16_to_cpu(hdr1->firstused)
 						- sizeof(xfs_attr_leaf_hdr_t);
-		max -= INT_GET(hdr1->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
+		max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
 		if (space > max) {
 			xfs_attr_leaf_compact(args->trans, blk1->bp);
 		}
@@ -1370,8 +1355,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Move low entries from leaf2 to high end of leaf1.
 		 */
 		xfs_attr_leaf_moveents(leaf2, 0, leaf1,
-				(int)INT_GET(hdr1->count, ARCH_CONVERT), count,
-				state->mp);
+				be16_to_cpu(hdr1->count), count, state->mp);
 
 		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
 		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
@@ -1381,11 +1365,11 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Copy out last hashval in each block for B-tree code.
 	 */
 	blk1->hashval =
-	    INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
-				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+	    INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
+			    ARCH_CONVERT);
 	blk2->hashval =
-	    INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
-				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+	    INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
+			    ARCH_CONVERT);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -1399,13 +1383,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * inserting.  The index/blkno fields refer to the "old" entry,
 	 * while the index2/blkno2 fields refer to the "new" entry.
 	 */
-	if (blk1->index > INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+	if (blk1->index > be16_to_cpu(leaf1->hdr.count)) {
 		ASSERT(state->inleaf == 0);
-		blk2->index = blk1->index
-				- INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+		blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
 		args->index = args->index2 = blk2->index;
 		args->blkno = args->blkno2 = blk2->blkno;
-	} else if (blk1->index == INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+	} else if (blk1->index == be16_to_cpu(leaf1->hdr.count)) {
 		if (state->inleaf) {
 			args->index = blk1->index;
 			args->blkno = blk1->blkno;
@@ -1413,7 +1396,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 			args->blkno2 = blk2->blkno;
 		} else {
 			blk2->index = blk1->index
-				    - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+				    - be16_to_cpu(leaf1->hdr.count);
 			args->index = args->index2 = blk2->index;
 			args->blkno = args->blkno2 = blk2->blkno;
 		}
@@ -1457,15 +1440,14 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 	 * Examine entries until we reduce the absolute difference in
 	 * byte usage between the two blocks to a minimum.
 	 */
-	max = INT_GET(hdr1->count, ARCH_CONVERT)
-			+ INT_GET(hdr2->count, ARCH_CONVERT);
+	max = be16_to_cpu(hdr1->count) + be16_to_cpu(hdr2->count);
 	half  = (max+1) * sizeof(*entry);
-	half += INT_GET(hdr1->usedbytes, ARCH_CONVERT)
-				+ INT_GET(hdr2->usedbytes, ARCH_CONVERT)
-				+ xfs_attr_leaf_newentsize(
-						state->args->namelen,
-						state->args->valuelen,
-						state->blocksize, NULL);
+	half += be16_to_cpu(hdr1->usedbytes) +
+		be16_to_cpu(hdr2->usedbytes) +
+		xfs_attr_leaf_newentsize(
+				state->args->namelen,
+				state->args->valuelen,
+				state->blocksize, NULL);
 	half /= 2;
 	lastdelta = state->blocksize;
 	entry = &leaf1->entries[0];
@@ -1491,7 +1473,7 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 		/*
 		 * Wrap around into the second block if necessary.
 		 */
-		if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+		if (count == be16_to_cpu(hdr1->count)) {
 			leaf1 = leaf2;
 			entry = &leaf1->entries[0];
 			index = 0;
@@ -1561,10 +1543,10 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	info = blk->bp->data;
 	ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 	leaf = (xfs_attr_leafblock_t *)info;
-	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(leaf->hdr.count);
 	bytes = sizeof(xfs_attr_leaf_hdr_t) +
 		count * sizeof(xfs_attr_leaf_entry_t) +
-		INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		be16_to_cpu(leaf->hdr.usedbytes);
 	if (bytes > (state->blocksize >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);
@@ -1618,13 +1600,13 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 		ASSERT(bp != NULL);
 
 		leaf = (xfs_attr_leafblock_t *)info;
-		count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		count  = be16_to_cpu(leaf->hdr.count);
 		bytes  = state->blocksize - (state->blocksize>>2);
-		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		leaf = bp->data;
 		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		count += be16_to_cpu(leaf->hdr.count);
+		bytes -= be16_to_cpu(leaf->hdr.usedbytes);
 		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
 		bytes -= sizeof(xfs_attr_leaf_hdr_t);
 		xfs_da_brelse(state->args->trans, bp);
@@ -1679,16 +1661,15 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr = &leaf->hdr;
 	mp = args->trans->t_mountp;
-	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0)
-		&& (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+	ASSERT((be16_to_cpu(hdr->count) > 0)
+		&& (be16_to_cpu(hdr->count) < (XFS_LBSIZE(mp)/8)));
 	ASSERT((args->index >= 0)
-		&& (args->index < INT_GET(hdr->count, ARCH_CONVERT)));
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
-				>= ((INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(*entry))+sizeof(*hdr)));
+		&& (args->index < be16_to_cpu(hdr->count)));
+	ASSERT(be16_to_cpu(hdr->firstused) >=
+	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
 	entry = &leaf->entries[args->index];
 	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr->firstused));
 	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
 
 	/*
@@ -1697,33 +1678,30 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 *    find smallest free region in case we need to replace it,
 	 *    adjust any map that borders the entry table,
 	 */
-	tablesize = INT_GET(hdr->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t)
+	tablesize = be16_to_cpu(hdr->count) * sizeof(xfs_attr_leaf_entry_t)
 					+ sizeof(xfs_attr_leaf_hdr_t);
 	map = &hdr->freemap[0];
-	tmp = INT_GET(map->size, ARCH_CONVERT);
+	tmp = be16_to_cpu(map->size);
 	before = after = -1;
 	smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
 	entsize = xfs_attr_leaf_entsize(leaf, args->index);
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
-		ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
-			INT_MOD(map->base, ARCH_CONVERT,
-					-sizeof(xfs_attr_leaf_entry_t));
-			INT_MOD(map->size, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_entry_t));
+		ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
+		ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
+		if (be16_to_cpu(map->base) == tablesize) {
+			be16_add(&map->base,
+				 -((int)sizeof(xfs_attr_leaf_entry_t)));
+			be16_add(&map->size, sizeof(xfs_attr_leaf_entry_t));
 		}
 
-		if ((INT_GET(map->base, ARCH_CONVERT)
-					+ INT_GET(map->size, ARCH_CONVERT))
+		if ((be16_to_cpu(map->base) + be16_to_cpu(map->size))
 				== INT_GET(entry->nameidx, ARCH_CONVERT)) {
 			before = i;
-		} else if (INT_GET(map->base, ARCH_CONVERT)
+		} else if (be16_to_cpu(map->base)
 			== (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
 			after = i;
-		} else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
-			tmp = INT_GET(map->size, ARCH_CONVERT);
+		} else if (be16_to_cpu(map->size) < tmp) {
+			tmp = be16_to_cpu(map->size);
 			smallest = i;
 		}
 	}
@@ -1735,30 +1713,29 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if ((before >= 0) || (after >= 0)) {
 		if ((before >= 0) && (after >= 0)) {
 			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
-			INT_MOD(map->size, ARCH_CONVERT,
-				INT_GET(hdr->freemap[after].size,
-							ARCH_CONVERT));
+			be16_add(&map->size, entsize);
+			be16_add(&map->size,
+				 be16_to_cpu(hdr->freemap[after].size));
 			hdr->freemap[after].base = 0;
 			hdr->freemap[after].size = 0;
 		} else if (before >= 0) {
 			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
+			be16_add(&map->size, entsize);
 		} else {
 			map = &hdr->freemap[after];
 			/* both on-disk, don't endian flip twice */
 			map->base = entry->nameidx;
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
+			be16_add(&map->size, entsize);
 		}
 	} else {
 		/*
 		 * Replace smallest region (if it is smaller than free'd entry)
 		 */
 		map = &hdr->freemap[smallest];
-		if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
-			INT_SET(map->base, ARCH_CONVERT,
+		if (be16_to_cpu(map->size) < entsize) {
+			map->base = cpu_to_be16(
 					INT_GET(entry->nameidx, ARCH_CONVERT));
-			INT_SET(map->size, ARCH_CONVERT, entsize);
+			map->size = cpu_to_be16(entsize);
 		}
 	}
 
@@ -1766,7 +1743,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Did we remove the first entry?
 	 */
 	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				== INT_GET(hdr->firstused, ARCH_CONVERT))
+				== be16_to_cpu(hdr->firstused))
 		smallest = 1;
 	else
 		smallest = 0;
@@ -1775,18 +1752,18 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
 	memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize);
-	INT_MOD(hdr->usedbytes, ARCH_CONVERT, -entsize);
+	be16_add(&hdr->usedbytes, -entsize);
 	xfs_da_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
 				   entsize));
 
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - args->index)
+	tmp = (be16_to_cpu(hdr->count) - args->index)
 					* sizeof(xfs_attr_leaf_entry_t);
 	memmove((char *)entry, (char *)(entry+1), tmp);
-	INT_MOD(hdr->count, ARCH_CONVERT, -1);
+	be16_add(&hdr->count, -1);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
-	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
+	entry = &leaf->entries[be16_to_cpu(hdr->count)];
 	memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
 
 	/*
@@ -1798,18 +1775,17 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	if (smallest) {
 		tmp = XFS_LBSIZE(mp);
 		entry = &leaf->entries[0];
-		for (i = INT_GET(hdr->count, ARCH_CONVERT)-1;
-						i >= 0; entry++, i--) {
+		for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
 			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr->firstused));
 			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
 							< XFS_LBSIZE(mp));
 			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
 				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
 		}
-		INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
+		hdr->firstused = cpu_to_be16(tmp);
 		if (!hdr->firstused) {
-			INT_SET(hdr->firstused, ARCH_CONVERT,
+			hdr->firstused = cpu_to_be16(
 					tmp - XFS_ATTR_LEAF_NAME_ALIGN);
 		}
 	} else {
@@ -1823,9 +1799,8 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	 * "join" the leaf with a sibling if so.
 	 */
 	tmp  = sizeof(xfs_attr_leaf_hdr_t);
-	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t);
-	tmp += INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+	tmp += be16_to_cpu(leaf->hdr.count) * sizeof(xfs_attr_leaf_entry_t);
+	tmp += be16_to_cpu(leaf->hdr.usedbytes);
 	return(tmp < mp->m_attr_magicpct); /* leaf is < 37% full */
 }
 
@@ -1858,9 +1833,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Save last hashval from dying block for later Btree fixup.
 	 */
 	drop_blk->hashval =
-		INT_GET(drop_leaf->entries[INT_GET(drop_leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
-								ARCH_CONVERT);
+		INT_GET(drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1]
+				.hashval, ARCH_CONVERT);
 
 	/*
 	 * Check if we need a temp buffer, or can we do it in place.
@@ -1874,12 +1848,11 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		 */
 		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
 			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0,
-			     (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+			     be16_to_cpu(drop_hdr->count), mp);
 		} else {
 			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf,
-				  INT_GET(save_hdr->count, ARCH_CONVERT),
-				  (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				  mp);
+				  be16_to_cpu(save_hdr->count),
+				  be16_to_cpu(drop_hdr->count), mp);
 		}
 	} else {
 		/*
@@ -1893,28 +1866,24 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 		tmp_hdr = &tmp_leaf->hdr;
 		tmp_hdr->info = save_hdr->info;	/* struct copy */
 		tmp_hdr->count = 0;
-		INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+		tmp_hdr->firstused = cpu_to_be16(state->blocksize);
 		if (!tmp_hdr->firstused) {
-			INT_SET(tmp_hdr->firstused, ARCH_CONVERT,
+			tmp_hdr->firstused = cpu_to_be16(
 				state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN);
 		}
 		tmp_hdr->usedbytes = 0;
 		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
 			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
-				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(drop_hdr->count), mp);
 			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf,
-				  INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-				 (int)INT_GET(save_hdr->count, ARCH_CONVERT),
-				 mp);
+				  be16_to_cpu(tmp_leaf->hdr.count),
+				  be16_to_cpu(save_hdr->count), mp);
 		} else {
 			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
-				(int)INT_GET(save_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(save_hdr->count), mp);
 			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf,
-				INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
-				mp);
+				be16_to_cpu(tmp_leaf->hdr.count),
+				be16_to_cpu(drop_hdr->count), mp);
 		}
 		memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
 		kmem_free(tmpbuffer, state->blocksize);
@@ -1927,9 +1896,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * Copy out last hashval in each block for B-tree code.
 	 */
 	save_blk->hashval =
-		INT_GET(save_leaf->entries[INT_GET(save_leaf->hdr.count,
-						ARCH_CONVERT)-1].hashval,
-								ARCH_CONVERT);
+		INT_GET(save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1]
+						.hashval, ARCH_CONVERT);
 }
 
 /*========================================================================
@@ -1961,14 +1929,14 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
 
 	/*
 	 * Binary search.  (note: small blocks will skip this loop)
 	 */
 	hashval = args->hashval;
-	probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
+	probe = span = be16_to_cpu(leaf->hdr.count) / 2;
 	for (entry = &leaf->entries[probe]; span > 4;
 		   entry = &leaf->entries[probe]) {
 		span /= 2;
@@ -1981,7 +1949,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	}
 	ASSERT((probe >= 0) && 
 	       (!leaf->hdr.count
-	       || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
+	       || (probe < be16_to_cpu(leaf->hdr.count))));
 	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT)
 							== hashval));
 
@@ -1994,12 +1962,12 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		entry--;
 		probe--;
 	}
-	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	while ((probe < be16_to_cpu(leaf->hdr.count))
 		&& (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
 		entry++;
 		probe++;
 	}
-	if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	if ((probe == be16_to_cpu(leaf->hdr.count))
 		    || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
 		args->index = probe;
 		return(XFS_ERROR(ENOATTR));
@@ -2008,7 +1976,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Duplicate keys may be present, so search all of them for a match.
 	 */
-	for (  ; (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (  ; (probe < be16_to_cpu(leaf->hdr.count))
 			&& (INT_GET(entry->hashval, ARCH_CONVERT) == hashval);
 			entry++, probe++) {
 /*
@@ -2078,9 +2046,9 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(leaf->hdr.count)
 					< (XFS_LBSIZE(args->dp->i_mount)/8));
-	ASSERT(args->index < ((int)INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 
 	entry = &leaf->entries[args->index];
 	if (entry->flags & XFS_ATTR_LOCAL) {
@@ -2149,26 +2117,25 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	hdr_s = &leaf_s->hdr;
 	hdr_d = &leaf_d->hdr;
-	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
-				&& (INT_GET(hdr_s->count, ARCH_CONVERT)
-						< (XFS_LBSIZE(mp)/8)));
-	ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_s->count, ARCH_CONVERT)
+	ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
+	       (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8)));
+	ASSERT(be16_to_cpu(hdr_s->firstused) >=
+		((be16_to_cpu(hdr_s->count)
 					* sizeof(*entry_s))+sizeof(*hdr_s)));
-	ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
-	ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_d->count, ARCH_CONVERT)
+	ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8));
+	ASSERT(be16_to_cpu(hdr_d->firstused) >=
+		((be16_to_cpu(hdr_d->count)
 					* sizeof(*entry_d))+sizeof(*hdr_d)));
 
-	ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
-	ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
-	ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+	ASSERT(start_s < be16_to_cpu(hdr_s->count));
+	ASSERT(start_d <= be16_to_cpu(hdr_d->count));
+	ASSERT(count <= be16_to_cpu(hdr_s->count));
 
 	/*
 	 * Move the entries in the destination leaf up to make a hole?
 	 */
-	if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+	if (start_d < be16_to_cpu(hdr_d->count)) {
+		tmp  = be16_to_cpu(hdr_d->count) - start_d;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_d->entries[start_d];
 		entry_d = &leaf_d->entries[start_d + count];
@@ -2184,7 +2151,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	desti = start_d;
 	for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
 		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
-				>= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+				>= be16_to_cpu(hdr_s->firstused));
 		tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
 #ifdef GROT
 		/*
@@ -2194,15 +2161,15 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 */
 		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
-			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+			be16_add(&hdr_s->usedbytes, -tmp);
+			be16_add(&hdr_s->count, -1);
 			entry_d--;	/* to compensate for ++ in loop hdr */
 			desti--;
 			if ((start_s + i) < offset)
 				result++;	/* insertion index adjustment */
 		} else {
 #endif /* GROT */
-			INT_MOD(hdr_d->firstused, ARCH_CONVERT, -tmp);
+			be16_add(&hdr_d->firstused, -tmp);
 			/* both on-disk, don't endian flip twice */
 			entry_d->hashval = entry_s->hashval;
 			/* both on-disk, don't endian flip twice */
@@ -2215,14 +2182,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
 							<= XFS_LBSIZE(mp));
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
-			INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
-			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
-			INT_MOD(hdr_d->count, ARCH_CONVERT, 1);
-			tmp = INT_GET(hdr_d->count, ARCH_CONVERT)
+			be16_add(&hdr_s->usedbytes, -tmp);
+			be16_add(&hdr_d->usedbytes, tmp);
+			be16_add(&hdr_s->count, -1);
+			be16_add(&hdr_d->count, 1);
+			tmp = be16_to_cpu(hdr_d->count)
 						* sizeof(xfs_attr_leaf_entry_t)
 						+ sizeof(xfs_attr_leaf_hdr_t);
-			ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+			ASSERT(be16_to_cpu(hdr_d->firstused) >= tmp);
 #ifdef GROT
 		}
 #endif /* GROT */
@@ -2231,7 +2198,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Zero out the entries we just copied.
 	 */
-	if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+	if (start_s == be16_to_cpu(hdr_s->count)) {
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s];
 		ASSERT(((char *)entry_s + tmp) <=
@@ -2242,15 +2209,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 		 * Move the remaining entries down to fill the hole,
 		 * then zero the entries at the top.
 		 */
-		tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+		tmp  = be16_to_cpu(hdr_s->count) - count;
 		tmp *= sizeof(xfs_attr_leaf_entry_t);
 		entry_s = &leaf_s->entries[start_s + count];
 		entry_d = &leaf_s->entries[start_s];
 		memmove((char *)entry_d, (char *)entry_s, tmp);
 
 		tmp = count * sizeof(xfs_attr_leaf_entry_t);
-		entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
-							ARCH_CONVERT)];
+		entry_s = &leaf_s->entries[be16_to_cpu(hdr_s->count)];
 		ASSERT(((char *)entry_s + tmp) <=
 		       ((char *)leaf_s + XFS_LBSIZE(mp)));
 		memset((char *)entry_s, 0, tmp);
@@ -2259,14 +2225,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	/*
 	 * Fill in the freemap information
 	 */
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
-					sizeof(xfs_attr_leaf_hdr_t));
-	INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT,
-				INT_GET(hdr_d->count, ARCH_CONVERT)
-					* sizeof(xfs_attr_leaf_entry_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
-				INT_GET(hdr_d->firstused, ARCH_CONVERT)
-			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
+	be16_add(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) *
+			sizeof(xfs_attr_leaf_entry_t));
+	hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused)
+			      - be16_to_cpu(hdr_d->freemap[0].base));
 	hdr_d->freemap[1].base = 0;
 	hdr_d->freemap[2].base = 0;
 	hdr_d->freemap[1].size = 0;
@@ -2287,14 +2250,14 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 	leaf2 = leaf2_bp->data;
 	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
 	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
-	if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
-	    && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
-	    && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
-		      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT))
-		|| (INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT) <
-		      INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT))) ) {
+	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
+	    (be16_to_cpu(leaf2->hdr.count) > 0) &&
+	    ((INT_GET(leaf2->entries[0].hashval, ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[0].hashval, ARCH_CONVERT)) ||
+	     (INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
+		      				ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
+		      				ARCH_CONVERT)))) {
 		return(1);
 	}
 	return(0);
@@ -2311,11 +2274,11 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
 	if (count)
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return(0);
-	return(INT_GET(leaf->entries[INT_GET(leaf->hdr.count,
-				ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+	return (INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval,
+				ARCH_CONVERT));
 }
 
 /*
@@ -2392,8 +2355,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	 */
 	if (context->resynch) {
 		entry = &leaf->entries[0];
-		for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
-							entry++, i++) {
+		for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 			if (INT_GET(entry->hashval, ARCH_CONVERT)
 							== cursor->hashval) {
 				if (cursor->offset == context->dupcnt) {
@@ -2407,7 +2369,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				break;
 			}
 		}
-		if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+		if (i == be16_to_cpu(leaf->hdr.count)) {
 			xfs_attr_trace_l_c("not found", context);
 			return(0);
 		}
@@ -2421,7 +2383,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	 * We have found our place, start copying out the new attributes.
 	 */
 	retval = 0;
-	for (  ; (i < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+	for (  ; (i < be16_to_cpu(leaf->hdr.count))
 	     && (retval == 0); entry++, i++) {
 		attrnames_t	*namesp;
 
@@ -2577,7 +2539,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
 	ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
@@ -2643,7 +2605,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
 	ASSERT(args->index >= 0);
 	entry = &leaf->entries[ args->index ];
 
@@ -2715,13 +2677,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 
 	leaf1 = bp1->data;
 	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index < INT_GET(leaf1->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
 	ASSERT(args->index >= 0);
 	entry1 = &leaf1->entries[ args->index ];
 
 	leaf2 = bp2->data;
 	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-	ASSERT(args->index2 < INT_GET(leaf2->hdr.count, ARCH_CONVERT));
+	ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
 	ASSERT(args->index2 >= 0);
 	entry2 = &leaf2->entries[ args->index2 ];
 
@@ -2971,7 +2933,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	 */
 	count = 0;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
 		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
@@ -2999,7 +2961,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	 */
 	lp = list;
 	entry = &leaf->entries[0];
-	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
 		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 541e34109bb..52b771052c5 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -73,17 +73,17 @@ struct xfs_trans;
 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
 
 typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
-	__uint16_t	base;	 	/* base of free region */
-	__uint16_t	size;	  	/* length of free region */
+	__be16	base;			  /* base of free region */
+	__be16	size;			  /* length of free region */
 } xfs_attr_leaf_map_t;
 
 typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
 	xfs_da_blkinfo_t info;		/* block type, links, etc. */
-	__uint16_t	count;		/* count of active leaf_entry's */
-	__uint16_t	usedbytes;	/* num bytes of names/values stored */
-	__uint16_t	firstused;	/* first used byte in name area */
-	__uint8_t	holes;		/* != 0 if blk needs compaction */
-	__uint8_t	pad1;
+	__be16	count;			/* count of active leaf_entry's */
+	__be16	usedbytes;		/* num bytes of names/values stored */
+	__be16	firstused;		/* first used byte in name area */
+	__u8	holes;			/* != 0 if blk needs compaction */
+	__u8	pad1;
 	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
 					/* N largest free regions */
 } xfs_attr_leaf_hdr_t;
-- 
cgit v1.2.3


From 6b19f2d87da9908acf1e0f48b4e79cf8bc833811 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:02 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25498a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |  14 +++---
 fs/xfs/xfs_attr_leaf.c | 133 ++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_leaf.h |  19 ++++---
 3 files changed, 74 insertions(+), 92 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4a3f3cf6b20..98d0f4d1060 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1770,17 +1770,14 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				break;
 			case XFS_ATTR_LEAF_MAGIC:
 				leaf = bp->data;
-				if (cursor->hashval >
-				    INT_GET(leaf->entries[
-					be16_to_cpu(leaf->hdr.count)-1].hashval,
-							ARCH_CONVERT)) {
+				if (cursor->hashval > be32_to_cpu(leaf->entries[
+				    be16_to_cpu(leaf->hdr.count)-1].hashval)) {
 					xfs_attr_trace_l_cl("wrong blk",
 							   context, leaf);
 					xfs_da_brelse(NULL, bp);
 					bp = NULL;
 				} else if (cursor->hashval <=
-					     INT_GET(leaf->entries[0].hashval,
-							ARCH_CONVERT)) {
+					     be32_to_cpu(leaf->entries[0].hashval)) {
 					xfs_attr_trace_l_cl("maybe wrong blk",
 							   context, leaf);
 					xfs_da_brelse(NULL, bp);
@@ -2289,8 +2286,9 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
 		(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
-		(__psunsigned_t)INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
+		(__psunsigned_t)be32_to_cpu(leaf->entries[
+				be16_to_cpu(leaf->hdr.count)-1].hashval));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 31b55e58e2c..319285c24e4 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -819,7 +819,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 		nargs.namelen = name_loc->namelen;
 		nargs.value = (char *)&name_loc->nameval[nargs.namelen];
 		nargs.valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
-		nargs.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+		nargs.hashval = be32_to_cpu(entry->hashval);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
 		xfs_attr_shortform_add(&nargs, forkoff);
@@ -1104,10 +1104,9 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	be16_add(&map->size,
 		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					  mp->m_sb.sb_blocksize, &tmp));
-	INT_SET(entry->nameidx, ARCH_CONVERT,
-					be16_to_cpu(map->base)
-				      + be16_to_cpu(map->size));
-	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+	entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) +
+				     be16_to_cpu(map->size));
+	entry->hashval = cpu_to_be32(args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
 	entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
 			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
@@ -1120,12 +1119,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	}
 	xfs_da_log_buf(args->trans, bp,
 			  XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-	ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
-						>= INT_GET((entry-1)->hashval,
-							    ARCH_CONVERT)));
+	ASSERT((args->index == 0) ||
+	       (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
 	ASSERT((args->index == be16_to_cpu(hdr->count)-1) ||
-	       (INT_GET(entry->hashval, ARCH_CONVERT)
-			    <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
+	       (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
 
 	/*
 	 * Copy the attribute name and value into the new space.
@@ -1161,8 +1158,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	/*
 	 * Update the control info for this leaf node
 	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				< be16_to_cpu(hdr->firstused)) {
+	if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused)) {
 		/* both on-disk, don't endian-flip twice */
 		hdr->firstused = entry->nameidx;
 	}
@@ -1364,12 +1360,10 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	/*
 	 * Copy out last hashval in each block for B-tree code.
 	 */
-	blk1->hashval =
-	    INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
-			    ARCH_CONVERT);
-	blk2->hashval =
-	    INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
-			    ARCH_CONVERT);
+	blk1->hashval = be32_to_cpu(
+		leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval);
+	blk2->hashval = be32_to_cpu(
+		leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -1668,9 +1662,8 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(hdr->firstused) >=
 	       ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr)));
 	entry = &leaf->entries[args->index];
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= be16_to_cpu(hdr->firstused));
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	ASSERT(be16_to_cpu(entry->nameidx) >= be16_to_cpu(hdr->firstused));
+	ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
 
 	/*
 	 * Scan through free region table:
@@ -1695,10 +1688,10 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		}
 
 		if ((be16_to_cpu(map->base) + be16_to_cpu(map->size))
-				== INT_GET(entry->nameidx, ARCH_CONVERT)) {
+				== be16_to_cpu(entry->nameidx)) {
 			before = i;
 		} else if (be16_to_cpu(map->base)
-			== (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
+			== (be16_to_cpu(entry->nameidx) + entsize)) {
 			after = i;
 		} else if (be16_to_cpu(map->size) < tmp) {
 			tmp = be16_to_cpu(map->size);
@@ -1733,8 +1726,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		 */
 		map = &hdr->freemap[smallest];
 		if (be16_to_cpu(map->size) < entsize) {
-			map->base = cpu_to_be16(
-					INT_GET(entry->nameidx, ARCH_CONVERT));
+			map->base = cpu_to_be16(be16_to_cpu(entry->nameidx));
 			map->size = cpu_to_be16(entsize);
 		}
 	}
@@ -1742,8 +1734,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Did we remove the first entry?
 	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT)
-				== be16_to_cpu(hdr->firstused))
+	if (be16_to_cpu(entry->nameidx) == be16_to_cpu(hdr->firstused))
 		smallest = 1;
 	else
 		smallest = 0;
@@ -1776,12 +1767,12 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		tmp = XFS_LBSIZE(mp);
 		entry = &leaf->entries[0];
 		for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-				>= be16_to_cpu(hdr->firstused));
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT)
-							< XFS_LBSIZE(mp));
-			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
-				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
+			ASSERT(be16_to_cpu(entry->nameidx) >=
+			       be16_to_cpu(hdr->firstused));
+			ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+
+			if (be16_to_cpu(entry->nameidx) < tmp)
+				tmp = be16_to_cpu(entry->nameidx);
 		}
 		hdr->firstused = cpu_to_be16(tmp);
 		if (!hdr->firstused) {
@@ -1832,9 +1823,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save last hashval from dying block for later Btree fixup.
 	 */
-	drop_blk->hashval =
-		INT_GET(drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1]
-				.hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(
+		drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1].hashval);
 
 	/*
 	 * Check if we need a temp buffer, or can we do it in place.
@@ -1895,9 +1885,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Copy out last hashval in each block for B-tree code.
 	 */
-	save_blk->hashval =
-		INT_GET(save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1]
-						.hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(
+		save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1].hashval);
 }
 
 /*========================================================================
@@ -1940,9 +1929,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	for (entry = &leaf->entries[probe]; span > 4;
 		   entry = &leaf->entries[probe]) {
 		span /= 2;
-		if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
+		if (be32_to_cpu(entry->hashval) < hashval)
 			probe += span;
-		else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
+		else if (be32_to_cpu(entry->hashval) > hashval)
 			probe -= span;
 		else
 			break;
@@ -1950,25 +1939,23 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT((probe >= 0) && 
 	       (!leaf->hdr.count
 	       || (probe < be16_to_cpu(leaf->hdr.count))));
-	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT)
-							== hashval));
+	ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
 
 	/*
 	 * Since we may have duplicate hashval's, find the first matching
 	 * hashval in the leaf.
 	 */
-	while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT)
-							>= hashval)) {
+	while ((probe > 0) && (be32_to_cpu(entry->hashval) >= hashval)) {
 		entry--;
 		probe--;
 	}
-	while ((probe < be16_to_cpu(leaf->hdr.count))
-		&& (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
+	while ((probe < be16_to_cpu(leaf->hdr.count)) &&
+	       (be32_to_cpu(entry->hashval) < hashval)) {
 		entry++;
 		probe++;
 	}
-	if ((probe == be16_to_cpu(leaf->hdr.count))
-		    || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
+	if ((probe == be16_to_cpu(leaf->hdr.count)) ||
+	    (be32_to_cpu(entry->hashval) != hashval)) {
 		args->index = probe;
 		return(XFS_ERROR(ENOATTR));
 	}
@@ -1976,8 +1963,8 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	/*
 	 * Duplicate keys may be present, so search all of them for a match.
 	 */
-	for (  ; (probe < be16_to_cpu(leaf->hdr.count))
-			&& (INT_GET(entry->hashval, ARCH_CONVERT) == hashval);
+	for (  ; (probe < be16_to_cpu(leaf->hdr.count)) &&
+			(be32_to_cpu(entry->hashval) == hashval);
 			entry++, probe++) {
 /*
  * GROT: Add code to remove incomplete entries.
@@ -2150,7 +2137,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 	entry_d = &leaf_d->entries[start_d];
 	desti = start_d;
 	for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
-		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
+		ASSERT(be16_to_cpu(entry_s->nameidx)
 				>= be16_to_cpu(hdr_s->firstused));
 		tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
 #ifdef GROT
@@ -2175,11 +2162,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
 			/* both on-disk, don't endian flip twice */
 			entry_d->nameidx = hdr_d->firstused;
 			entry_d->flags = entry_s->flags;
-			ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
+			ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
 							<= XFS_LBSIZE(mp));
 			memmove(XFS_ATTR_LEAF_NAME(leaf_d, desti),
 				XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
-			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
+			ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
 							<= XFS_LBSIZE(mp));
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
 			be16_add(&hdr_s->usedbytes, -tmp);
@@ -2252,12 +2239,12 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
 	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
 	if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
 	    (be16_to_cpu(leaf2->hdr.count) > 0) &&
-	    ((INT_GET(leaf2->entries[0].hashval, ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[0].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval,
-		      				ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval,
-		      				ARCH_CONVERT)))) {
+	    ((be32_to_cpu(leaf2->entries[0].hashval) <
+	      be32_to_cpu(leaf1->entries[0].hashval)) ||
+	     (be32_to_cpu(leaf2->entries[
+			be16_to_cpu(leaf2->hdr.count)-1].hashval) <
+	      be32_to_cpu(leaf1->entries[
+			be16_to_cpu(leaf1->hdr.count)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -2277,8 +2264,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
 		*count = be16_to_cpu(leaf->hdr.count);
 	if (!leaf->hdr.count)
 		return(0);
-	return (INT_GET(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval,
-				ARCH_CONVERT));
+	return be32_to_cpu(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval);
 }
 
 /*
@@ -2356,15 +2342,14 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	if (context->resynch) {
 		entry = &leaf->entries[0];
 		for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-			if (INT_GET(entry->hashval, ARCH_CONVERT)
-							== cursor->hashval) {
+			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
 				if (cursor->offset == context->dupcnt) {
 					context->dupcnt = 0;
 					break;
 				}
 				context->dupcnt++;
-			} else if (INT_GET(entry->hashval, ARCH_CONVERT)
-							> cursor->hashval) {
+			} else if (be32_to_cpu(entry->hashval) >
+					cursor->hashval) {
 				context->dupcnt = 0;
 				break;
 			}
@@ -2387,8 +2372,8 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	     && (retval == 0); entry++, i++) {
 		attrnames_t	*namesp;
 
-		if (INT_GET(entry->hashval, ARCH_CONVERT) != cursor->hashval) {
-			cursor->hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
+			cursor->hashval = be32_to_cpu(entry->hashval);
 			cursor->offset = 0;
 		}
 
@@ -2554,7 +2539,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 		namelen = name_rmt->namelen;
 		name = (char *)name_rmt->name;
 	}
-	ASSERT(INT_GET(entry->hashval, ARCH_CONVERT) == args->hashval);
+	ASSERT(be32_to_cpu(entry->hashval) == args->hashval);
 	ASSERT(namelen == args->namelen);
 	ASSERT(memcmp(name, args->name, namelen) == 0);
 #endif /* DEBUG */
@@ -2706,7 +2691,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 		namelen2 = name_rmt->namelen;
 		name2 = (char *)name_rmt->name;
 	}
-	ASSERT(INT_GET(entry1->hashval, ARCH_CONVERT) == INT_GET(entry2->hashval, ARCH_CONVERT));
+	ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval));
 	ASSERT(namelen1 == namelen2);
 	ASSERT(memcmp(name1, name2, namelen1) == 0);
 #endif /* DEBUG */
@@ -2934,8 +2919,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	count = 0;
 	entry = &leaf->entries[0];
 	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
-		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk)
 				count++;
@@ -2962,8 +2947,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	lp = list;
 	entry = &leaf->entries[0];
 	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
-		if (   INT_GET(entry->nameidx, ARCH_CONVERT)
-		    && ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk) {
 				/* both on-disk, don't endian flip twice */
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 52b771052c5..7fbe4880bb8 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -89,10 +89,10 @@ typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
 } xfs_attr_leaf_hdr_t;
 
 typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
-	xfs_dahash_t	hashval;	/* hash value of name */
-	__uint16_t	nameidx;	/* index into buffer of name/value */
-	__uint8_t	flags;		/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
-	__uint8_t	pad2;		/* unused pad byte */
+	__be32	hashval;		/* hash value of name */
+ 	__be16	nameidx;		/* index into buffer of name/value */
+	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+	__u8	pad2;			/* unused pad byte */
 } xfs_attr_leaf_entry_t;
 
 typedef struct xfs_attr_leaf_name_local {
@@ -143,8 +143,8 @@ typedef struct xfs_attr_leafblock {
 static inline xfs_attr_leaf_name_remote_t *
 xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (xfs_attr_leaf_name_remote_t *) &((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+	return (xfs_attr_leaf_name_remote_t *)
+		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 #define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	\
@@ -152,16 +152,15 @@ xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
 static inline xfs_attr_leaf_name_local_t *
 xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (xfs_attr_leaf_name_local_t *) &((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+	return (xfs_attr_leaf_name_local_t *)
+		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 #define XFS_ATTR_LEAF_NAME(leafp,idx)		\
 	xfs_attr_leaf_name(leafp,idx)
 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
 {
-	return (&((char *)
-		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]);
+	return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
 }
 
 /*
-- 
cgit v1.2.3


From 053b5758cbc096a3f718858f4da9341afbc56b7d Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:09 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_name_local_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25499a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 18 ++++++++----------
 fs/xfs/xfs_attr_leaf.h |  6 +++---
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 319285c24e4..3adfc5dcfe7 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -732,11 +732,11 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
 		if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
 			return(0);
-		if (INT_GET(name_loc->valuelen, ARCH_CONVERT) >= XFS_ATTR_SF_ENTSIZE_MAX)
+		if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
 			return(0);
 		bytes += sizeof(struct xfs_attr_sf_entry)-1
 				+ name_loc->namelen
-				+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
+				+ be16_to_cpu(name_loc->valuelen);
 	}
 	if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
 	    (bytes == sizeof(struct xfs_attr_sf_hdr)))
@@ -818,7 +818,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 		nargs.name = (char *)name_loc->nameval;
 		nargs.namelen = name_loc->namelen;
 		nargs.value = (char *)&name_loc->nameval[nargs.namelen];
-		nargs.valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
+		nargs.valuelen = be16_to_cpu(name_loc->valuelen);
 		nargs.hashval = be32_to_cpu(entry->hashval);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
@@ -1136,10 +1136,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	if (entry->flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		name_loc->namelen = args->namelen;
-		INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
+		name_loc->valuelen = cpu_to_be16(args->valuelen);
 		memcpy((char *)name_loc->nameval, args->name, args->namelen);
 		memcpy((char *)&name_loc->nameval[args->namelen], args->value,
-				   INT_GET(name_loc->valuelen, ARCH_CONVERT));
+				   be16_to_cpu(name_loc->valuelen));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		name_rmt->namelen = args->namelen;
@@ -2042,7 +2042,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
 		ASSERT(name_loc->namelen == args->namelen);
 		ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
-		valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
+		valuelen = be16_to_cpu(name_loc->valuelen);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
 			return(0);
@@ -2282,8 +2282,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
 		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
-						   INT_GET(name_loc->valuelen,
-								ARCH_CONVERT));
+						   be16_to_cpu(name_loc->valuelen));
 	} else {
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index);
 		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen);
@@ -2402,8 +2401,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				retval = xfs_attr_put_listent(context, namesp,
 					(char *)name_loc->nameval,
 					(int)name_loc->namelen,
-					(int)INT_GET(name_loc->valuelen,
-								ARCH_CONVERT));
+					be16_to_cpu(name_loc->valuelen));
 			}
 		} else {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 7fbe4880bb8..ad6f791ec58 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -96,9 +96,9 @@ typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
 } xfs_attr_leaf_entry_t;
 
 typedef struct xfs_attr_leaf_name_local {
-	__uint16_t	valuelen;	/* number of bytes in value */
-	__uint8_t	namelen;	/* length of name bytes */
-	__uint8_t	nameval[1];	/* name/value bytes */
+	__be16	valuelen;		/* number of bytes in value */
+	__u8	namelen;		/* length of name bytes */
+	__u8	nameval[1];		/* name/value bytes */
 } xfs_attr_leaf_name_local_t;
 
 typedef struct xfs_attr_leaf_name_remote {
-- 
cgit v1.2.3


From c0f054e7a44b4bbed8e16dd459f33df63dad98ef Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:18 +1100
Subject: [XFS] endianess annotations for xfs_attr_leaf_name_remote_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25500a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 24 ++++++++++--------------
 fs/xfs/xfs_attr_leaf.h |  8 ++++----
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 3adfc5dcfe7..531417b2c74 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2006,11 +2006,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
 			    ((entry->flags & XFS_ATTR_ROOT) != 0))
 				continue;
 			args->index = probe;
-			args->rmtblkno
-				  = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
+			args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 			args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
-						   INT_GET(name_rmt->valuelen,
-								ARCH_CONVERT));
+						   be32_to_cpu(name_rmt->valuelen));
 			return(XFS_ERROR(EEXIST));
 		}
 	}
@@ -2057,8 +2055,8 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
 		ASSERT(name_rmt->namelen == args->namelen);
 		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
-		valuelen = INT_GET(name_rmt->valuelen, ARCH_CONVERT);
-		args->rmtblkno = INT_GET(name_rmt->valueblk, ARCH_CONVERT);
+		valuelen = be32_to_cpu(name_rmt->valuelen);
+		args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
 		args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
 		if (args->flags & ATTR_KERNOVAL) {
 			args->valuelen = valuelen;
@@ -2413,8 +2411,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				retval = xfs_attr_put_listent(context, namesp,
 					(char *)name_rmt->name,
 					(int)name_rmt->namelen,
-					(int)INT_GET(name_rmt->valuelen,
-								ARCH_CONVERT));
+					be32_to_cpu(name_rmt->valuelen));
 			}
 		}
 		if (retval == 0) {
@@ -2549,8 +2546,8 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
 	if (args->rmtblkno) {
 		ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
-		INT_SET(name_rmt->valueblk, ARCH_CONVERT, args->rmtblkno);
-		INT_SET(name_rmt->valuelen, ARCH_CONVERT, args->valuelen);
+		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+		name_rmt->valuelen = cpu_to_be32(args->valuelen);
 		xfs_da_log_buf(args->trans, bp,
 			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
 	}
@@ -2703,8 +2700,8 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
 	if (args->rmtblkno) {
 		ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
 		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf1, args->index);
-		INT_SET(name_rmt->valueblk, ARCH_CONVERT, args->rmtblkno);
-		INT_SET(name_rmt->valuelen, ARCH_CONVERT, args->valuelen);
+		name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+		name_rmt->valuelen = cpu_to_be32(args->valuelen);
 		xfs_da_log_buf(args->trans, bp1,
 			 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
 	}
@@ -2953,8 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 				lp->valueblk = name_rmt->valueblk;
 				INT_SET(lp->valuelen, ARCH_CONVERT,
 						XFS_B_TO_FSB(dp->i_mount,
-						    INT_GET(name_rmt->valuelen,
-							      ARCH_CONVERT)));
+						    be32_to_cpu(name_rmt->valuelen)));
 				lp++;
 			}
 		}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index ad6f791ec58..51c3ee156b2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -102,10 +102,10 @@ typedef struct xfs_attr_leaf_name_local {
 } xfs_attr_leaf_name_local_t;
 
 typedef struct xfs_attr_leaf_name_remote {
-	xfs_dablk_t	valueblk;	/* block number of value bytes */
-	__uint32_t	valuelen;	/* number of bytes in value */
-	__uint8_t	namelen;	/* length of name bytes */
-	__uint8_t	name[1];	/* name bytes */
+	__be32	valueblk;		/* block number of value bytes */
+	__be32	valuelen;		/* number of bytes in value */
+	__u8	namelen;		/* length of name bytes */
+	__u8	name[1];		/* name bytes */
 } xfs_attr_leaf_name_remote_t;
 
 typedef struct xfs_attr_leafblock {
-- 
cgit v1.2.3


From 3b244aa81ecb06859e0c16abf4c26404c1d86591 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:25 +1100
Subject: [XFS] endianess annotations for xfs_attr_shortform_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25501a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 55 +++++++++++++++++++++++---------------------------
 fs/xfs/xfs_attr_sf.h   |  8 ++++----
 fs/xfs/xfs_inode.c     |  2 +-
 3 files changed, 30 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 531417b2c74..37d2e10f874 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -194,7 +194,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
 	xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
 	hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
 	hdr->count = 0;
-	INT_SET(hdr->totsize, ARCH_CONVERT, sizeof(*hdr));
+	hdr->totsize = cpu_to_be16(sizeof(*hdr));
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 }
 
@@ -224,8 +224,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
-				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+	for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 #ifdef DEBUG
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -248,13 +247,13 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset);
 
 	sfe->namelen = args->namelen;
-	INT_SET(sfe->valuelen, ARCH_CONVERT, args->valuelen);
+	sfe->valuelen = args->valuelen;
 	sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
 			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
 	memcpy(sfe->nameval, args->name, args->namelen);
 	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, 1);
-	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
+	sf->hdr.count++;
+	be16_add(&sf->hdr.totsize, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 
 	xfs_sbversion_add_attr2(mp, args->trans);
@@ -277,7 +276,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	base = sizeof(xfs_attr_sf_hdr_t);
 	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	end = INT_GET(sf->hdr.count, ARCH_CONVERT);
+	end = sf->hdr.count;
 	for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
 					base += size, i++) {
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
@@ -300,11 +299,11 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	 * Fix up the attribute fork data, covering the hole
 	 */
 	end = base + size;
-	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
+	totsize = be16_to_cpu(sf->hdr.totsize);
 	if (end != totsize)
 		memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
-	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, -size);
+	sf->hdr.count--;
+	be16_add(&sf->hdr.totsize, -size);
 
 	/*
 	 * Fix up the start offset of the attribute fork
@@ -360,7 +359,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < sf->hdr.count;
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -391,7 +390,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 	ASSERT(args->dp->i_d.di_aformat == XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < sf->hdr.count;
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
 		if (sfe->namelen != args->namelen)
 			continue;
@@ -404,14 +403,14 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
 		if (args->flags & ATTR_KERNOVAL) {
-			args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+			args->valuelen = sfe->valuelen;
 			return(XFS_ERROR(EEXIST));
 		}
-		if (args->valuelen < INT_GET(sfe->valuelen, ARCH_CONVERT)) {
-			args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		if (args->valuelen < sfe->valuelen) {
+			args->valuelen = sfe->valuelen;
 			return(XFS_ERROR(ERANGE));
 		}
-		args->valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		args->valuelen = sfe->valuelen;
 		memcpy(args->value, &sfe->nameval[args->namelen],
 						    args->valuelen);
 		return(XFS_ERROR(EEXIST));
@@ -438,7 +437,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	dp = args->dp;
 	ifp = dp->i_afp;
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-	size = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
+	size = be16_to_cpu(sf->hdr.totsize);
 	tmpbuffer = kmem_alloc(size, KM_SLEEP);
 	ASSERT(tmpbuffer != NULL);
 	memcpy(tmpbuffer, ifp->if_u1.if_data, size);
@@ -481,11 +480,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	nargs.oknoent = 1;
 
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0; i < sf->hdr.count; i++) {
 		nargs.name = (char *)sfe->nameval;
 		nargs.namelen = sfe->namelen;
 		nargs.value = (char *)&sfe->nameval[nargs.namelen];
-		nargs.valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
+		nargs.valuelen = sfe->valuelen;
 		nargs.hashval = xfs_da_hashname((char *)sfe->nameval,
 						sfe->namelen);
 		nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
@@ -560,10 +559,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 * If the buffer is large enough, do not bother with sorting.
 	 * Note the generous fudge factor of 16 overhead bytes per entry.
 	 */
-	if ((dp->i_afp->if_bytes + INT_GET(sf->hdr.count, ARCH_CONVERT) * 16)
-							< context->bufsize) {
-		for (i = 0, sfe = &sf->list[0];
-				i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	if ((dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize) {
+		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 			attrnames_t	*namesp;
 
 			if (((context->flags & ATTR_SECURE) != 0) !=
@@ -584,14 +581,13 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			if (context->flags & ATTR_KERNOVAL) {
 				ASSERT(context->flags & ATTR_KERNAMELS);
 				context->count += namesp->attr_namelen +
-					INT_GET(sfe->namelen, ARCH_CONVERT) + 1;
+					sfe->namelen + 1;
 			}
 			else {
 				if (xfs_attr_put_listent(context, namesp,
 						   (char *)sfe->nameval,
 						   (int)sfe->namelen,
-						   (int)INT_GET(sfe->valuelen,
-								ARCH_CONVERT)))
+						   (int)sfe->valuelen))
 					break;
 			}
 			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
@@ -603,7 +599,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	/*
 	 * It didn't all fit, so we have to sort everything on hashval.
 	 */
-	sbsize = INT_GET(sf->hdr.count, ARCH_CONVERT) * sizeof(*sbuf);
+	sbsize = sf->hdr.count * sizeof(*sbuf);
 	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
 
 	/*
@@ -611,8 +607,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 * the relevant info from only those that match into a buffer.
 	 */
 	nsbuf = 0;
-	for (i = 0, sfe = &sf->list[0];
-			i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 		if (unlikely(
 		    ((char *)sfe < (char *)sf) ||
 		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
@@ -696,7 +691,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 		} else {
 			if (xfs_attr_put_listent(context, namesp,
 					sbp->name, sbp->namelen,
-					INT_GET(sbp->valuelen, ARCH_CONVERT)))
+					sbp->valuelen))
 				break;
 		}
 		cursor->offset++;
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ffed6ca81a5..f67f917803b 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -32,8 +32,8 @@ struct xfs_inode;
  */
 typedef struct xfs_attr_shortform {
 	struct xfs_attr_sf_hdr {	/* constant-structure header block */
-		__uint16_t totsize;	/* total bytes in shortform list */
-		__uint8_t count;	/* count of active entries */
+		__be16	totsize;	/* total bytes in shortform list */
+		__u8	count;	/* count of active entries */
 	} hdr;
 	struct xfs_attr_sf_entry {
 		__uint8_t namelen;	/* actual length of name (no NULL) */
@@ -66,8 +66,8 @@ typedef struct xfs_attr_sf_sort {
 #define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
 	((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
 #define XFS_ATTR_SF_TOTSIZE(dp)			/* total space in use */ \
-	(INT_GET(((xfs_attr_shortform_t *)	\
-		((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
+	(be16_to_cpu(((xfs_attr_shortform_t *)	\
+		((dp)->i_afp->if_u1.if_data))->hdr.totsize))
 
 #if defined(XFS_ATTR_TRACE)
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b1e95707e7c..7d0ded05a46 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -506,7 +506,7 @@ xfs_iformat(
 	switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
 	case XFS_DINODE_FMT_LOCAL:
 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
-		size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT);
+		size = be16_to_cpu(atp->hdr.totsize);
 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
-- 
cgit v1.2.3


From 984a081a7c89ea7e1b6f47cbc0e5c8ef67ad6e09 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:31 +1100
Subject: [XFS] store xfs_attr_sf_sort in native endian

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25502a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 37d2e10f874..e4071eb1e6c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -513,11 +513,9 @@ xfs_attr_shortform_compare(const void *a, const void *b)
 
 	sa = (xfs_attr_sf_sort_t *)a;
 	sb = (xfs_attr_sf_sort_t *)b;
-	if (INT_GET(sa->hash, ARCH_CONVERT)
-				< INT_GET(sb->hash, ARCH_CONVERT)) {
+	if (sa->hash < sb->hash) {
 		return(-1);
-	} else if (INT_GET(sa->hash, ARCH_CONVERT)
-				> INT_GET(sb->hash, ARCH_CONVERT)) {
+	} else if (sa->hash > sb->hash) {
 		return(1);
 	} else {
 		return(sa->entno - sb->entno);
@@ -631,8 +629,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			continue;
 		}
 		sbp->entno = i;
-		INT_SET(sbp->hash, ARCH_CONVERT,
-			xfs_da_hashname((char *)sfe->nameval, sfe->namelen));
+		sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
 		sbp->name = (char *)sfe->nameval;
 		sbp->namelen = sfe->namelen;
 		/* These are bytes, and both on-disk, don't endian-flip */
@@ -655,12 +652,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	cursor->initted = 1;
 	cursor->blkno = 0;
 	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
-		if (INT_GET(sbp->hash, ARCH_CONVERT) == cursor->hashval) {
+		if (sbp->hash == cursor->hashval) {
 			if (cursor->offset == count) {
 				break;
 			}
 			count++;
-		} else if (INT_GET(sbp->hash, ARCH_CONVERT) > cursor->hashval) {
+		} else if (sbp->hash > cursor->hashval) {
 			break;
 		}
 	}
@@ -680,8 +677,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			((sbp->flags & XFS_ATTR_ROOT) ? &attr_trusted :
 			  &attr_user);
 
-		if (cursor->hashval != INT_GET(sbp->hash, ARCH_CONVERT)) {
-			cursor->hashval = INT_GET(sbp->hash, ARCH_CONVERT);
+		if (cursor->hashval != sbp->hash) {
+			cursor->hashval = sbp->hash;
 			cursor->offset = 0;
 		}
 		if (context->flags & ATTR_KERNOVAL) {
-- 
cgit v1.2.3


From d7929ff670c802dc68d6149d3d0cc5667e18daec Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:36 +1100
Subject: [XFS] store xfs_attr_inactive_list_t in native endian

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25503a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr_leaf.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index e4071eb1e6c..b3d5c35b604 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2938,11 +2938,9 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
 			name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
 			if (name_rmt->valueblk) {
-				/* both on-disk, don't endian flip twice */
-				lp->valueblk = name_rmt->valueblk;
-				INT_SET(lp->valuelen, ARCH_CONVERT,
-						XFS_B_TO_FSB(dp->i_mount,
-						    be32_to_cpu(name_rmt->valuelen)));
+				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
+				lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
+						    be32_to_cpu(name_rmt->valuelen));
 				lp++;
 			}
 		}
@@ -2955,10 +2953,8 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 	error = 0;
 	for (lp = list, i = 0; i < count; i++, lp++) {
 		tmp = xfs_attr_leaf_freextent(trans, dp,
-						     INT_GET(lp->valueblk,
-								ARCH_CONVERT),
-						     INT_GET(lp->valuelen,
-								ARCH_CONVERT));
+				lp->valueblk, lp->valuelen);
+
 		if (error == 0)
 			error = tmp;	/* save only the 1st errno */
 	}
-- 
cgit v1.2.3


From 403432dcb5daa03c1f1c961adb7d2a5daebea94b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:46 +1100
Subject: [XFS] endianess annotations for xfs_da_node_entry_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25504a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      | 13 ++++----
 fs/xfs/xfs_attr_leaf.c |  6 ++--
 fs/xfs/xfs_da_btree.c  | 86 +++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h  |  4 +--
 fs/xfs/xfs_dir.c       |  8 ++---
 fs/xfs/xfs_dir_leaf.c  |  2 +-
 6 files changed, 59 insertions(+), 60 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 98d0f4d1060..5d61c628d0d 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1829,9 +1829,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				i < INT_GET(node->hdr.count, ARCH_CONVERT);
 								btree++, i++) {
 				if (cursor->hashval
-						<= INT_GET(btree->hashval,
-							    ARCH_CONVERT)) {
-					cursor->blkno = INT_GET(btree->before, ARCH_CONVERT);
+						<= be32_to_cpu(btree->hashval)) {
+					cursor->blkno = be32_to_cpu(btree->before);
 					xfs_attr_trace_l_cb("descending",
 							    context, btree);
 					break;
@@ -2228,8 +2227,8 @@ xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
 		(__psunsigned_t)INT_GET(node->hdr.count, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(node->btree[0].hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+		(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
+		(__psunsigned_t)be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval));
 }
 
 /*
@@ -2256,8 +2255,8 @@ xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(btree->hashval, ARCH_CONVERT),
-		(__psunsigned_t)INT_GET(btree->before, ARCH_CONVERT),
+		(__psunsigned_t)be32_to_cpu(btree->hashval),
+		(__psunsigned_t)be32_to_cpu(btree->before),
 		(__psunsigned_t)NULL);
 }
 
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b3d5c35b604..70594bceffe 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -869,7 +869,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	/* both on-disk, don't endian-flip twice */
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+	node->btree[0].before = cpu_to_be32(blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
 	error = 0;
@@ -2809,7 +2809,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 		xfs_da_brelse(*trans, bp);
 		return(0);
 	}
-	child_fsb = INT_GET(node->btree[0].before, ARCH_CONVERT);
+	child_fsb = be32_to_cpu(node->btree[0].before);
 	xfs_da_brelse(*trans, bp);	/* no locks for later trans */
 
 	/*
@@ -2869,7 +2869,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 				&bp, XFS_ATTR_FORK);
 			if (error)
 				return(error);
-			child_fsb = INT_GET(node->btree[i+1].before, ARCH_CONVERT);
+			child_fsb = be32_to_cpu(node->btree[i+1].before);
 			xfs_da_brelse(*trans, bp);
 		}
 		/*
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4f3bb1cb961..d32670c09f3 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -385,10 +385,10 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	if (error)
 		return(error);
 	node = bp->data;
-	INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
-	INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
-	INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
+	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
+	node->btree[0].before = cpu_to_be32(blk1->blkno);
+	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
+	node->btree[1].before = cpu_to_be32(blk2->blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 
 #ifdef DEBUG
@@ -517,9 +517,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Swap the nodes around if that makes it simpler.
 	 */
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
-	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
+	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
+	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
 		tmpnode = node1;
 		node1 = node2;
 		node2 = tmpnode;
@@ -596,8 +596,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 */
 	node1 = blk1->bp->data;
 	node2 = blk2->bp->data;
-	blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
-	blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	blk1->hashval = be32_to_cpu(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	blk2->hashval = be32_to_cpu(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
@@ -638,8 +638,8 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
 		memmove(btree + 1, btree, tmp);
 	}
-	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
-	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
+	btree->hashval = cpu_to_be32(newblk->hashval);
+	btree->before = cpu_to_be32(newblk->blkno);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
 	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
@@ -649,7 +649,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	/*
 	 * Copy the last hash value from the oldblk to propagate upwards.
 	 */
-	oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	oldblk->hashval = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 }
 
 /*========================================================================
@@ -782,7 +782,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	 * Read in the (only) child block, then copy those bytes into
 	 * the root block's buffer and free the original child block.
 	 */
-	child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
+	child = be32_to_cpu(oldroot->btree[0].before);
 	ASSERT(child != 0);
 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
 					     args->whichfork);
@@ -974,14 +974,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		node = blk->bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		btree = &node->btree[ blk->index ];
-		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
+		if (be32_to_cpu(btree->hashval) == lasthash)
 			break;
 		blk->hashval = lasthash;
-		INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
+		btree->hashval = cpu_to_be32(lasthash);
 		xfs_da_log_buf(state->args->trans, blk->bp,
 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
-		lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+		lasthash = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 	}
 }
 
@@ -1022,7 +1022,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	 * Copy the last hash value from the block to propagate upwards.
 	 */
 	btree--;
-	drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
+	drop_blk->hashval = be32_to_cpu(btree->hashval);
 }
 
 /*
@@ -1048,9 +1048,9 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * If the dying block has lower hashvals, then move all the
 	 * elements in the remaining block up to make a hole.
 	 */
-	if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	    (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	     INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
+	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
+	    (be32_to_cpu(drop_node->btree[INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1].hashval) <
+	     be32_to_cpu(save_node->btree[INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval)))
 	{
 		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
 		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
@@ -1082,7 +1082,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save the last hashval in the remaining block for upward propagation.
 	 */
-	save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	save_blk->hashval = be32_to_cpu(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval);
 }
 
 /*========================================================================
@@ -1147,7 +1147,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		blk->magic = be16_to_cpu(curr->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
-			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 
 			/*
 			 * Binary search.  (note: small blocks will skip loop)
@@ -1158,25 +1158,25 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			for (btree = &node->btree[probe]; span > 4;
 				   btree = &node->btree[probe]) {
 				span /= 2;
-				if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
+				if (be32_to_cpu(btree->hashval) < hashval)
 					probe += span;
-				else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
+				else if (be32_to_cpu(btree->hashval) > hashval)
 					probe -= span;
 				else
 					break;
 			}
 			ASSERT((probe >= 0) && (probe < max));
-			ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
+			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
 
 			/*
 			 * Since we may have duplicate hashval's, find the first
 			 * matching hashval in the node.
 			 */
-			while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
+			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
 				btree--;
 				probe--;
 			}
-			while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
+			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
 				btree++;
 				probe++;
 			}
@@ -1186,10 +1186,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			 */
 			if (probe == max) {
 				blk->index = max-1;
-				blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
+				blkno = be32_to_cpu(node->btree[max-1].before);
 			} else {
 				blk->index = probe;
-				blkno = INT_GET(btree->before, ARCH_CONVERT);
+				blkno = be32_to_cpu(btree->before);
 			}
 		}
 		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
@@ -1359,10 +1359,10 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
 	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
-	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+	    ((be32_to_cpu(node2->btree[0].hashval) <
+	      be32_to_cpu(node1->btree[0].hashval)) ||
+	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
+	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -1382,7 +1382,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
 	if (!node->hdr.count)
 		return(0);
-	return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+	return be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 }
 
 /*
@@ -1493,11 +1493,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 			blk->index++;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
 		} else if (!forward && (blk->index > 0)) {
 			blk->index--;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
 		}
 	}
@@ -1535,12 +1535,12 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
-			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
 			if (forward)
 				blk->index = 0;
 			else
 				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
-			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			blkno = be32_to_cpu(node->btree[blk->index].before);
 		} else {
 			ASSERT(level == path->active-1);
 			blk->index = 0;
@@ -1796,7 +1796,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
 		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
-		dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+		dead_hash = be32_to_cpu(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval);
 	}
 	sib_buf = par_buf = NULL;
 	/*
@@ -1863,7 +1863,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
 		for (entno = 0;
 		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
-		     INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
+		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
 		     entno++)
 			continue;
 		if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
@@ -1872,7 +1872,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
+		par_blkno = be32_to_cpu(par_node->btree[entno].before);
 		if (level == dead_level + 1)
 			break;
 		xfs_da_brelse(tp, par_buf);
@@ -1885,7 +1885,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	for (;;) {
 		for (;
 		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
-		     INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
+		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
 		     entno++)
 			continue;
 		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
@@ -1915,7 +1915,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Update the parent entry pointing to the moved block.
 	 */
-	INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
+	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
 	xfs_da_log_buf(tp, par_buf,
 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
 				sizeof(par_node->btree[entno].before)));
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index e727bf456a1..6343c3a4dba 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -69,8 +69,8 @@ typedef struct xfs_da_intnode {
 		__uint16_t level;	/* level above leaves (leaf == 0) */
 	} hdr;
 	struct xfs_da_node_entry {
-		xfs_dahash_t hashval;	/* hash value for this descendant */
-		xfs_dablk_t before;	/* Btree block before this key */
+		__be32	hashval;	/* hash value for this descendant */
+		__be32	before;		/* Btree block before this key */
 	} btree[1];			/* variable sized array of keys */
 } xfs_da_intnode_t;
 typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 7a708e993db..8d1975a541b 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -954,8 +954,8 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
 			for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); btree++, i++) {
-				if (INT_GET(btree->hashval, ARCH_CONVERT) >= cookhash) {
-					bno = INT_GET(btree->before, ARCH_CONVERT);
+				if (be32_to_cpu(btree->hashval) >= cookhash) {
+					bno = be32_to_cpu(btree->before);
 					break;
 				}
 			}
@@ -1129,9 +1129,9 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)(unsigned long)
 			INT_GET(node->hdr.count, ARCH_CONVERT),
 		     (void *)(unsigned long)
-			INT_GET(node->btree[0].hashval, ARCH_CONVERT),
+			be32_to_cpu(node->btree[0].hashval),
 		     (void *)(unsigned long)
-			INT_GET(node->btree[last].hashval, ARCH_CONVERT),
+			be32_to_cpu(node->btree[last].hashval),
 		     NULL, NULL, NULL);
 }
 
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 0b2eca590b3..3c58834fa6a 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -745,7 +745,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
 	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
-	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+	node->btree[0].before = cpu_to_be32(blkno);
 	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
 	xfs_da_log_buf(args->trans, bp1,
 		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
-- 
cgit v1.2.3


From fac80cce0ecc6b10ae165af5b6b9b03151083044 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:29:56 +1100
Subject: [XFS] endianess annotations for xfs_da_node_hdr_t

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25505a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_attr.c      |  10 ++--
 fs/xfs/xfs_attr_leaf.c |   4 +-
 fs/xfs/xfs_da_btree.c  | 139 ++++++++++++++++++++++++-------------------------
 fs/xfs/xfs_da_btree.h  |   4 +-
 fs/xfs/xfs_dir.c       |   8 +--
 fs/xfs/xfs_dir_leaf.c  |   6 ++-
 6 files changed, 86 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5d61c628d0d..093fac476bd 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1825,8 +1825,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				return(XFS_ERROR(EFSCORRUPTED));
 			}
 			btree = node->btree;
-			for (i = 0;
-				i < INT_GET(node->hdr.count, ARCH_CONVERT);
+			for (i = 0; i < be16_to_cpu(node->hdr.count);
 								btree++, i++) {
 				if (cursor->hashval
 						<= be32_to_cpu(btree->hashval)) {
@@ -1836,7 +1835,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 					break;
 				}
 			}
-			if (i == INT_GET(node->hdr.count, ARCH_CONVERT)) {
+			if (i == be16_to_cpu(node->hdr.count)) {
 				xfs_da_brelse(NULL, bp);
 				return(0);
 			}
@@ -2226,9 +2225,10 @@ xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
 				: 0,
 		(__psunsigned_t)context->dupcnt,
 		(__psunsigned_t)context->flags,
-		(__psunsigned_t)INT_GET(node->hdr.count, ARCH_CONVERT),
+		(__psunsigned_t)be16_to_cpu(node->hdr.count),
 		(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
-		(__psunsigned_t)be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval));
+		(__psunsigned_t)be32_to_cpu(node->btree[
+				    be16_to_cpu(node->hdr.count)-1].hashval));
 }
 
 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 70594bceffe..717682747bd 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -870,7 +870,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
 	node->btree[0].hashval =
 		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
 	node->btree[0].before = cpu_to_be32(blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+	node->hdr.count = cpu_to_be16(1);
 	xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
 	error = 0;
 out:
@@ -2804,7 +2804,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 	node = bp->data;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	parent_blkno = xfs_da_blkno(bp);	/* save for re-read later */
-	count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(node->hdr.count);
 	if (!count) {
 		xfs_da_brelse(*trans, bp);
 		return(0);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index d32670c09f3..4bae3a76c67 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -129,7 +129,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
 	node->hdr.info.pad = 0;
 	node->hdr.count = 0;
-	INT_SET(node->hdr.level, ARCH_CONVERT, level);
+	node->hdr.level = cpu_to_be16(level);
 
 	xfs_da_log_buf(tp, bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
@@ -360,7 +360,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	node = bp->data;
 	oldroot = blk1->bp->data;
 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
-		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
+		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
 			     (char *)oldroot);
 	} else {
 		ASSERT(XFS_DIR_IS_V2(mp));
@@ -381,7 +381,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	error = xfs_da_node_create(args,
 		args->whichfork == XFS_DATA_FORK &&
 		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
-		INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
+		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
 	if (error)
 		return(error);
 	node = bp->data;
@@ -389,7 +389,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	node->btree[0].before = cpu_to_be32(blk1->blkno);
 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
 	node->btree[1].before = cpu_to_be32(blk2->blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
+	node->hdr.count = cpu_to_be16(2);
 
 #ifdef DEBUG
 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
@@ -435,7 +435,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	/*
 	 * Do we have to split the node?
 	 */
-	if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
+	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
 		/*
 		 * Allocate a new node, add to the doubly linked chain of
 		 * nodes, then move some of our excess entries into it.
@@ -472,7 +472,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	 * If we had double-split op below us, then add the extra block too.
 	 */
 	node = oldblk->bp->data;
-	if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
+	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
 		oldblk->index++;
 		xfs_da_node_add(state, oldblk, addblk);
 		if (useextra) {
@@ -516,17 +516,17 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Figure out how many entries need to move, and in which direction.
 	 * Swap the nodes around if that makes it simpler.
 	 */
-	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
-	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
-	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
+	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
+	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
 		tmpnode = node1;
 		node1 = node2;
 		node2 = tmpnode;
 	}
 	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
+	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
 	if (count == 0)
 		return;
 	tp = state->args->trans;
@@ -537,7 +537,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		/*
 		 * Move elements in node2 up to make a hole.
 		 */
-		if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
+		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
 			btree_s = &node2->btree[0];
 			btree_d = &node2->btree[count];
@@ -548,13 +548,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		 * Move the req'd B-tree elements from high in node1 to
 		 * low in node2.
 		 */
-		INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
+		be16_add(&node2->hdr.count, count);
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
-		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
+		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
 		btree_d = &node2->btree[0];
 		memcpy(btree_d, btree_s, tmp);
-		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
-
+		be16_add(&node1->hdr.count, -count);
 	} else {
 		/*
 		 * Move the req'd B-tree elements from low in node2 to
@@ -563,21 +562,21 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		count = -count;
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[0];
-		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
+		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
 		memcpy(btree_d, btree_s, tmp);
-		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
+		be16_add(&node1->hdr.count, count);
 		xfs_da_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
 
 		/*
 		 * Move elements in node2 down to fill the hole.
 		 */
-		tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
+		tmp  = be16_to_cpu(node2->hdr.count) - count;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node2->btree[count];
 		btree_d = &node2->btree[0];
 		memmove(btree_d, btree_s, tmp);
-		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
+		be16_add(&node2->hdr.count, -count);
 	}
 
 	/*
@@ -588,7 +587,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	xfs_da_log_buf(tp, blk2->bp,
 		XFS_DA_LOGRANGE(node2, &node2->hdr,
 			sizeof(node2->hdr) +
-			sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
+			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
 
 	/*
 	 * Record the last hashval from each block for upward propagation.
@@ -596,15 +595,15 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 */
 	node1 = blk1->bp->data;
 	node2 = blk2->bp->data;
-	blk1->hashval = be32_to_cpu(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval);
-	blk2->hashval = be32_to_cpu(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
+	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
 
 	/*
 	 * Adjust the expected index for insertion.
 	 */
-	if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
-		blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
-		blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;	/* make it invalid */
+	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
+		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
+		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
 	}
 }
 
@@ -623,7 +622,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	node = oldblk->bp->data;
 	mp = state->mp;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
+	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
 	ASSERT(newblk->blkno != 0);
 	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
@@ -634,22 +633,22 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	 */
 	tmp = 0;
 	btree = &node->btree[ oldblk->index ];
-	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
-		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
+	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
+		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
 		memmove(btree + 1, btree, tmp);
 	}
 	btree->hashval = cpu_to_be32(newblk->hashval);
 	btree->before = cpu_to_be32(newblk->blkno);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
-	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
+	be16_add(&node->hdr.count, 1);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 	/*
 	 * Copy the last hash value from the oldblk to propagate upwards.
 	 */
-	oldblk->hashval = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
 }
 
 /*========================================================================
@@ -775,7 +774,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	/*
 	 * If the root has more than one child, then don't do anything.
 	 */
-	if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
+	if (be16_to_cpu(oldroot->hdr.count) > 1)
 		return(0);
 
 	/*
@@ -790,7 +789,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 		return(error);
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
-	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
+	if (be16_to_cpu(oldroot->hdr.level) == 1) {
 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
@@ -832,7 +831,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	info = blk->bp->data;
 	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
 	node = (xfs_da_intnode_t *)info;
-	count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	count = be16_to_cpu(node->hdr.count);
 	if (count > (state->node_ents >> 1)) {
 		*action = 0;	/* blk over 50%, don't try to join */
 		return(0);	/* blk over 50%, don't try to join */
@@ -888,10 +887,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 		node = (xfs_da_intnode_t *)info;
 		count  = state->node_ents;
 		count -= state->node_ents >> 2;
-		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		count -= be16_to_cpu(node->hdr.count);
 		node = bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		count -= be16_to_cpu(node->hdr.count);
 		xfs_da_brelse(state->args->trans, bp);
 		if (count >= 0)
 			break;	/* fits with at least 25% to spare */
@@ -981,7 +980,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		xfs_da_log_buf(state->args->trans, blk->bp,
 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
-		lasthash = be32_to_cpu(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 	}
 }
 
@@ -996,25 +995,25 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 	int tmp;
 
 	node = drop_blk->bp->data;
-	ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
+	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
 	ASSERT(drop_blk->index >= 0);
 
 	/*
 	 * Copy over the offending entry, or just zero it out.
 	 */
 	btree = &node->btree[drop_blk->index];
-	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
-		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
+	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
+		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, btree + 1, tmp);
 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
 		    XFS_DA_LOGRANGE(node, btree, tmp));
-		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
+		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
 	}
 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
-	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
+	be16_add(&node->hdr.count, -1);
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
@@ -1049,31 +1048,31 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	 * elements in the remaining block up to make a hole.
 	 */
 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
-	    (be32_to_cpu(drop_node->btree[INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1].hashval) <
-	     be32_to_cpu(save_node->btree[INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval)))
+	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
+	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
 	{
-		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
-		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
+		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 		memmove(btree, &save_node->btree[0], tmp);
 		btree = &save_node->btree[0];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
-				(INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
+				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
 				sizeof(xfs_da_node_entry_t)));
 	} else {
-		btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
+		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
 		xfs_da_log_buf(tp, save_blk->bp,
 			XFS_DA_LOGRANGE(save_node, btree,
-				INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
+				be16_to_cpu(drop_node->hdr.count) *
 				sizeof(xfs_da_node_entry_t)));
 	}
 
 	/*
 	 * Move all the B-tree elements from drop_blk to save_blk.
 	 */
-	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 	memcpy(btree, &drop_node->btree[0], tmp);
-	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
+	be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
 
 	xfs_da_log_buf(tp, save_blk->bp,
 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
@@ -1082,7 +1081,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	/*
 	 * Save the last hashval in the remaining block for upward propagation.
 	 */
-	save_blk->hashval = be32_to_cpu(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval);
+	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
 }
 
 /*========================================================================
@@ -1147,12 +1146,12 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		blk->magic = be16_to_cpu(curr->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = blk->bp->data;
-			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 
 			/*
 			 * Binary search.  (note: small blocks will skip loop)
 			 */
-			max = INT_GET(node->hdr.count, ARCH_CONVERT);
+			max = be16_to_cpu(node->hdr.count);
 			probe = span = max / 2;
 			hashval = args->hashval;
 			for (btree = &node->btree[probe]; span > 4;
@@ -1358,11 +1357,11 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 	node2 = node2_bp->data;
 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
-	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
 	    ((be32_to_cpu(node2->btree[0].hashval) <
 	      be32_to_cpu(node1->btree[0].hashval)) ||
-	     (be32_to_cpu(node2->btree[INT_GET(node2->hdr.count, ARCH_CONVERT)-1].hashval) <
-	      be32_to_cpu(node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)-1].hashval)))) {
+	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
+	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
 		return(1);
 	}
 	return(0);
@@ -1379,10 +1378,10 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 	node = bp->data;
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	if (count)
-		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
+		*count = be16_to_cpu(node->hdr.count);
 	if (!node->hdr.count)
 		return(0);
-	return be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 }
 
 /*
@@ -1491,7 +1490,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		ASSERT(blk->bp != NULL);
 		node = blk->bp->data;
 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
 			blk->index++;
 			blkno = be32_to_cpu(node->btree[blk->index].before);
 			break;
@@ -1535,11 +1534,11 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
 			node = (xfs_da_intnode_t *)info;
-			blk->hashval = be32_to_cpu(node->btree[INT_GET(node->hdr.count, ARCH_CONVERT)-1].hashval);
+			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
 			if (forward)
 				blk->index = 0;
 			else
-				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
+				blk->index = be16_to_cpu(node->hdr.count)-1;
 			blkno = be32_to_cpu(node->btree[blk->index].before);
 		} else {
 			ASSERT(level == path->active-1);
@@ -1795,8 +1794,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	} else {
 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
 		dead_node = (xfs_da_intnode_t *)dead_info;
-		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
-		dead_hash = be32_to_cpu(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval);
+		dead_level = be16_to_cpu(dead_node->hdr.level);
+		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
 	}
 	sib_buf = par_buf = NULL;
 	/*
@@ -1854,19 +1853,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		par_node = par_buf->data;
 		if (unlikely(
 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
-		    (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
+		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
 			goto done;
 		}
-		level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
+		level = be16_to_cpu(par_node->hdr.level);
 		for (entno = 0;
-		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     entno < be16_to_cpu(par_node->hdr.count) &&
 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
 		     entno++)
 			continue;
-		if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
+		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
 					 XFS_ERRLEVEL_LOW, mp);
 			error = XFS_ERROR(EFSCORRUPTED);
@@ -1884,11 +1883,11 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	 */
 	for (;;) {
 		for (;
-		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     entno < be16_to_cpu(par_node->hdr.count) &&
 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
 		     entno++)
 			continue;
-		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
+		if (entno < be16_to_cpu(par_node->hdr.count))
 			break;
 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
 		xfs_da_brelse(tp, par_buf);
@@ -1903,7 +1902,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 			goto done;
 		par_node = par_buf->data;
 		if (unlikely(
-		    INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
+		    be16_to_cpu(par_node->hdr.level) != level ||
 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 					 XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6343c3a4dba..243a730d5ec 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -65,8 +65,8 @@ typedef struct xfs_da_blkinfo {
 typedef struct xfs_da_intnode {
 	struct xfs_da_node_hdr {	/* constant-structure header block */
 		xfs_da_blkinfo_t info;	/* block type, links, etc. */
-		__uint16_t count;	/* count of active entries */
-		__uint16_t level;	/* level above leaves (leaf == 0) */
+		__be16	count;		/* count of active entries */
+		__be16	level;		/* level above leaves (leaf == 0) */
 	} hdr;
 	struct xfs_da_node_entry {
 		__be32	hashval;	/* hash value for this descendant */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 8d1975a541b..9cc702a839a 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -953,13 +953,13 @@ xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
 				break;
 			btree = &node->btree[0];
 			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
-			for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); btree++, i++) {
+			for (i = 0; i < be16_to_cpu(node->hdr.count); btree++, i++) {
 				if (be32_to_cpu(btree->hashval) >= cookhash) {
 					bno = be32_to_cpu(btree->before);
 					break;
 				}
 			}
-			if (i == INT_GET(node->hdr.count, ARCH_CONVERT)) {
+			if (i == be16_to_cpu(node->hdr.count)) {
 				xfs_da_brelse(trans, bp);
 				xfs_dir_trace_g_du("node: hash beyond EOF",
 							  dp, uio);
@@ -1118,7 +1118,7 @@ void
 xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 			xfs_da_intnode_t *node)
 {
-	int	last = INT_GET(node->hdr.count, ARCH_CONVERT) - 1;
+	int	last = be16_to_cpu(node->hdr.count) - 1;
 
 	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUN, where,
 		     (void *)dp, (void *)dp->i_mount,
@@ -1127,7 +1127,7 @@ xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
 		     (void *)(unsigned long)uio->uio_resid,
 		     (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
 		     (void *)(unsigned long)
-			INT_GET(node->hdr.count, ARCH_CONVERT),
+			be16_to_cpu(node->hdr.count),
 		     (void *)(unsigned long)
 			be32_to_cpu(node->btree[0].hashval),
 		     (void *)(unsigned long)
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 3c58834fa6a..ac9ac700acf 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -743,10 +743,12 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
 	node = bp1->data;
 	leaf = bp2->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+	node->btree[0].hashval = cpu_to_be32(
+		INT_GET(leaf->entries[
+			INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
 	xfs_da_buf_done(bp2);
 	node->btree[0].before = cpu_to_be32(blkno);
-	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+	node->hdr.count = cpu_to_be16(1);
 	xfs_da_log_buf(args->trans, bp1,
 		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
 	xfs_da_buf_done(bp1);
-- 
cgit v1.2.3


From b2fc6ad01beb550f75457b7d811ff84dc81b210b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 17 Mar 2006 17:30:01 +1100
Subject: [XFS] remove bogus INT_GET for u8 variables in xfs_dir_leaf.c

SGI-PV: 943272
SGI-Modid: xfs-linux-melb:xfs-kern:25506a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_dir_leaf.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index ac9ac700acf..ee88751c3be 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -176,7 +176,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    args->name[0] == sfe->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0)
@@ -193,7 +193,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
 	XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
 	sfe->namelen = args->namelen;
 	memcpy(sfe->name, args->name, sfe->namelen);
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
+	sf->hdr.count++;
 
 	dp->i_d.di_size += size;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -227,7 +227,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 	base = sizeof(xfs_dir_sf_hdr_t);
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
@@ -245,7 +245,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
 		memmove(&((char *)sf)[base], &((char *)sf)[base+size],
 					      dp->i_d.di_size - (base+size));
 	}
-	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+	sf->hdr.count--;
 
 	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
 	dp->i_d.di_size -= size;
@@ -288,7 +288,7 @@ xfs_dir_shortform_lookup(xfs_da_args_t *args)
 		return(XFS_ERROR(EEXIST));
 	}
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0) {
@@ -375,7 +375,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
 		goto out;
 
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0; i < sf->hdr.count; i++) {
 		args.name = (char *)(sfe->name);
 		args.namelen = sfe->namelen;
 		args.hashval = xfs_da_hashname((char *)(sfe->name),
@@ -428,7 +428,7 @@ xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
 	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
 	cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
 	want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
-	nsbuf = INT_GET(sf->hdr.count, ARCH_CONVERT) + 2;
+	nsbuf = sf->hdr.count + 2;
 	sbsize = (nsbuf + 1) * sizeof(*sbuf);
 	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
 
@@ -460,8 +460,7 @@ xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
 	/*
 	 * Scan the directory data for the rest of the entries.
 	 */
-	for (i = 0, sfe = &sf->list[0];
-			i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 
 		if (unlikely(
 		    ((char *)sfe < (char *)sf) ||
@@ -600,7 +599,7 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
 	}
 	ASSERT(args->namelen != 1 || args->name[0] != '.');
 	sfe = &sf->list[0];
-	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+	for (i = sf->hdr.count-1; i >= 0; i--) {
 		if (sfe->namelen == args->namelen &&
 		    sfe->name[0] == args->name[0] &&
 		    memcmp(args->name, sfe->name, args->namelen) == 0) {
-- 
cgit v1.2.3


From 6cc8fef4cbeb0b65d225d7b599c75eb5b40a6534 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Mon, 20 Mar 2006 13:25:48 +1100
Subject: [XFS] Fix compiler warning from xfs_file_compat_invis_ioctl
 prototype.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25509a

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 8bdb33ffc03..02de6e62ee3 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -19,6 +19,6 @@
 #define __XFS_IOCTL32_H__
 
 extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
-extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned);
+extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long);
 
 #endif /* __XFS_IOCTL32_H__ */
-- 
cgit v1.2.3


From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:03 -0500
Subject: NFS: Fix a busy inodes issue...

The nfs_open_context may live longer than the file descriptor that spawned
it, so it needs to carry a reference to the vfsmount. If not, then
generic_shutdown_super() may end up being called before reads and writes
have been flushed out.

Make a couple of functions static while we're at it...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95b7ef..8d5b6691ae3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -973,7 +973,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	return err;
 }
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
 	struct nfs_open_context *ctx;
 
@@ -981,6 +981,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 	if (ctx != NULL) {
 		atomic_set(&ctx->count, 1);
 		ctx->dentry = dget(dentry);
+		ctx->vfsmnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
@@ -1011,6 +1012,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
 		if (ctx->cred != NULL)
 			put_rpccred(ctx->cred);
 		dput(ctx->dentry);
+		mntput(ctx->vfsmnt);
 		kfree(ctx);
 	}
 }
@@ -1019,7 +1021,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1053,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	return ctx;
 }
 
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1078,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+	ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From cd52ed35535ef443f08bf5cd3331d350272885b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:04 -0500
Subject: NFS: Avoid races between writebacks and truncation

Currently, there is no serialisation between NFS asynchronous writebacks
and truncation at the page level due to the fact that nfs_sync_inode()
cannot lock the pages that it is about to write out.

This means that it is possible to be flushing out data (and calling something
like set_page_writeback()) while the page cache is busy evicting the page.
Oops...

Use the hooks provided in try_to_release_page() to ensure that dirty pages
are always written back to storage before we evict them.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c     | 13 +++++++++++++
 fs/nfs/pagelist.c | 10 ++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f53..387809f2d18 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -316,6 +316,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 	return status;
 }
 
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+	/* FIXME: we really should cancel any unstarted writes on this page */
+	return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+	return !nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -324,6 +335,8 @@ struct address_space_operations nfs_file_aops = {
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
 	.commit_write = nfs_commit_write,
+	.invalidatepage = nfs_invalidate_page,
+	.releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e..d6e076c9dbe 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+	BUG_ON(page->mapping->host != inode);
+	SetPagePrivate(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -147,8 +151,10 @@ void nfs_clear_page_writeback(struct nfs_page *req)
  */
 void nfs_clear_request(struct nfs_page *req)
 {
-	if (req->wb_page) {
-		page_cache_release(req->wb_page);
+	struct page *page = req->wb_page;
+	if (page != NULL) {
+		ClearPagePrivate(page);
+		page_cache_release(page);
 		req->wb_page = NULL;
 	}
 }
-- 
cgit v1.2.3


From 1dd594b21b2d98e56f2b1fe92bb222276b28de41 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Mon, 20 Mar 2006 13:44:04 -0500
Subject: NFS: Fix buglet in fs/nfs/write.c

I've been reading through fs/nfs/write.c trying to track down a bug
that seems to be related to pages loosing a refcount and getting
freed too early (you interested in detail??) and I spotted a little
bug which the following patch should fix.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b683550..d6ad449041e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -653,8 +653,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				spin_unlock(&nfsi->req_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
-				if (error < 0)
+				if (error < 0) {
+					if (new)
+						nfs_release_request(new);
 					return ERR_PTR(error);
+				}
 				continue;
 			}
 			spin_unlock(&nfsi->req_lock);
-- 
cgit v1.2.3


From 47831f35b83e43c804215712dd0c834c92e8a441 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:05 -0500
Subject: VFS: Fix __posix_lock_file() copy of private lock area

The struct file_lock->fl_u area must be copied using the fl_copy_lock()
operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d..d2c5306e3db 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -153,6 +153,21 @@ static struct file_lock *locks_alloc_lock(void)
 	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 }
 
+static void locks_release_private(struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_release_private)
+			fl->fl_ops->fl_release_private(fl);
+		fl->fl_ops = NULL;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_release_private)
+			fl->fl_lmops->fl_release_private(fl);
+		fl->fl_lmops = NULL;
+	}
+
+}
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -169,18 +184,7 @@ static void locks_free_lock(struct file_lock *fl)
 	if (!list_empty(&fl->fl_link))
 		panic("Attempting to free lock on active lock list");
 
-	if (fl->fl_ops) {
-		if (fl->fl_ops->fl_release_private)
-			fl->fl_ops->fl_release_private(fl);
-		fl->fl_ops = NULL;
-	}
-
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_release_private)
-			fl->fl_lmops->fl_release_private(fl);
-		fl->fl_lmops = NULL;
-	}
-
+	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -218,11 +222,27 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
 	locks_init_lock(lock);
 }
 
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_copy_lock)
+			fl->fl_ops->fl_copy_lock(new, fl);
+		new->fl_ops = fl->fl_ops;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_copy_lock)
+			fl->fl_lmops->fl_copy_lock(new, fl);
+		new->fl_lmops = fl->fl_lmops;
+	}
+}
+
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
+	locks_release_private(new);
+
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
 	new->fl_file = fl->fl_file;
@@ -232,10 +252,8 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 	new->fl_end = fl->fl_end;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
-	if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
-		fl->fl_ops->fl_copy_lock(new, fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
-		fl->fl_lmops->fl_copy_lock(new, fl);
+
+	locks_copy_private(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -904,7 +922,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 				fl->fl_start = request->fl_start;
 				fl->fl_end = request->fl_end;
 				fl->fl_type = request->fl_type;
-				fl->fl_u = request->fl_u;
+				locks_release_private(fl);
+				locks_copy_private(fl, request);
 				request = fl;
 				added = 1;
 			}
-- 
cgit v1.2.3


From 36943fa4b2701b9ef2d60084c85ecbe634aec252 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:05 -0500
Subject: NLM: nlm_alloc_call should not immediately fail on signal

Currently, nlm_alloc_call tests for a signal before it even tries to
allocate memory.
Fix it so that it tries at least once.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa33..615a988a92a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -291,14 +291,15 @@ nlmclnt_alloc_call(void)
 {
 	struct nlm_rqst	*call;
 
-	while (!signalled()) {
-		call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
-		if (call) {
-			memset(call, 0, sizeof(*call));
+	for(;;) {
+		call = kzalloc(sizeof(*call), GFP_KERNEL);
+		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
 			return call;
 		}
+		if (signalled())
+			break;
 		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
-- 
cgit v1.2.3


From 7bab377fcb495ee2e5a1cd69d235f8d84c76e3af Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:06 -0500
Subject: lockd: Don't expose the process pid to the NLM server

Instead we use the nlm_lockowner->pid.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c | 10 +++++++++-
 fs/lockd/clntproc.c |  7 +++++--
 fs/lockd/svclock.c  |  1 +
 fs/lockd/xdr.c      | 13 ++++++++-----
 fs/lockd/xdr4.c     | 17 ++++++++++-------
 5 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b..8ae79ae4b99 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -125,7 +125,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		struct file_lock *fl_blocked = block->b_lock;
 
-		if (!nlm_compare_locks(fl_blocked, fl))
+		if (fl_blocked->fl_start != fl->fl_start)
+			continue;
+		if (fl_blocked->fl_end != fl->fl_end)
+			continue;
+		/*
+		 * Careful! The NLM server will return the 32-bit "pid" that
+		 * we put on the wire: in this case the lockowner "pid".
+		 */
+		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
 		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
 			continue;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 615a988a92a..acc3eb13a02 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,8 +132,10 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = system_utsname.nodename;
 	lock->oh.data = req->a_owner;
-	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->pid, system_utsname.nodename);
+	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
+				system_utsname.nodename);
+	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	locks_copy_lock(&lock->fl, fl);
 }
 
@@ -159,6 +161,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 	/* set default data area */
 	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
 
 	if (lock->oh.len > NLMCLNT_OHSIZE) {
 		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a..a525a141dd3 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -397,6 +397,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				(long long)fl->fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
+		conflock->svid = fl->fl_pid;
 		conflock->fl = *fl;
 		return nlm_lck_denied;
 	}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 200fbda2c6d..1e984ab14d3 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm_decode_fh(p, &lock->fh))
 	 || !(p = nlm_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
 	else
 		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 	*p++ = htonl(start);
 	*p++ = htonl(len);
 
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a530..906ddc20318 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm4_decode_fh(p, &lock->fh))
 	 || !(p = nlm4_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
 	 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
 		return NULL;
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 
 	start = loff_t_to_s64(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		
 		p = xdr_encode_hyper(p, start);
 		p = xdr_encode_hyper(p, len);
-		dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
-			resp->status, fl->fl_pid, fl->fl_type,
+		dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+			resp->status, (int)resp->lock.svid, fl->fl_type,
 			(long long)fl->fl_start,  (long long)fl->fl_end);
 	}
 
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
-- 
cgit v1.2.3


From 755c1e20cd2ad56e5c567fa05769eb98a3eef72b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:06 -0500
Subject: NFS: writes should not clobber utimes() calls

Ensure that we flush out writes in the case when someone calls utimes() in
order to set the file times.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8d5b6691ae3..5746dc841a6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -859,11 +859,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	lock_kernel();
 	nfs_begin_data_update(inode);
-	/* Write all dirty data if we're changing file permissions or size */
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-		filemap_write_and_wait(inode->i_mapping);
-		nfs_wb_all(inode);
-	}
+	/* Write all dirty data */
+	filemap_write_and_wait(inode->i_mapping);
+	nfs_wb_all(inode);
 	/*
 	 * Return any delegations if we're going to change ACLs
 	 */
-- 
cgit v1.2.3


From ca62b9c3f7b8679ada4de94d2ab7098c6860c3d7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:07 -0500
Subject: NFSv4: Don't invalidate cached attributes if change attribute is
 unchanged

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5746dc841a6..0e1ef97bcf5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1299,34 +1299,35 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 
+	/* Has the inode gone and changed behind our back? */
+	if (nfsi->fileid != fattr->fileid
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+		return -EIO;
+	}
+
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-			nfsi->change_attr != fattr->change_attr) {
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+		if (nfsi->change_attr == fattr->change_attr)
+			goto out;
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
-	/* Has the inode gone and changed behind our back? */
-	if (nfsi->fileid != fattr->fileid
-			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-		return -EIO;
-	}
-
-	cur_size = i_size_read(inode);
- 	new_isize = nfs_size_to_loff_t(fattr->size);
-
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
+
+	cur_size = i_size_read(inode);
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
 	if (cur_size != new_isize) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (nfsi->npages == 0)
@@ -1343,6 +1344,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (inode->i_nlink != fattr->nlink)
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
+out:
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
@@ -1481,15 +1483,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfsi->cache_change_attribute = jiffies;
 	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4)
-	    && nfsi->change_attr != fattr->change_attr) {
-		dprintk("NFS: change_attr change on server for file %s/%ld\n",
-		       inode->i_sb->s_id, inode->i_ino);
-		nfsi->change_attr = fattr->change_attr;
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = jiffies;
-	}
-
 	/* If ctime has changed we should definitely clear access+acl caches */
 	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
 		invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,6 +1512,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  		inode->i_blksize = fattr->du.nfs2.blocksize;
  	}
 
+	if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+		if (nfsi->change_attr != fattr->change_attr) {
+			dprintk("NFS: change_attr change on server for file %s/%ld\n",
+					inode->i_sb->s_id, inode->i_ino);
+			nfsi->change_attr = fattr->change_attr;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			nfsi->cache_change_attribute = jiffies;
+		} else
+			invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+	}
+
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
-- 
cgit v1.2.3


From c8d149f3dbd582a101aa7da7bdd6c3316efd11b4 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Mon, 20 Mar 2006 13:44:07 -0500
Subject: NFS: "const static" vs "static const" in nfs4

My previous "const static" vs "static const" cleanup missed a single case,
patch below takes care of it.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e..305bea201cd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2958,7 +2958,7 @@ static void nfs4_delegreturn_release(void *calldata)
 	kfree(calldata);
 }
 
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
 	.rpc_call_prepare = nfs4_delegreturn_prepare,
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
-- 
cgit v1.2.3


From fb374d24f225f38f13dbffb65dd7ec72daf08dba Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:08 -0500
Subject: NFS: reduce the number of false cache invalidations.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0e1ef97bcf5..24988f430e4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1328,11 +1328,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 	cur_size = i_size_read(inode);
  	new_isize = nfs_size_to_loff_t(fattr->size);
-	if (cur_size != new_isize) {
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-		if (nfsi->npages == 0)
-			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-	}
+	if (cur_size != new_isize && nfsi->npages == 0)
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
-- 
cgit v1.2.3


From 12de3b35ea549c5819f287508d7afab0bf3ac44d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:09 -0500
Subject: SUNRPC: Ensure that rpc_mkpipe returns a refcounted dentry

If not, we cannot guarantee that idmap->idmap_dentry, gss_auth->dentry and
clnt->cl_dentry are valid dentries.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333..32c95a0d93f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -132,6 +132,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
+	dput(idmap->idmap_dentry);
+	idmap->idmap_dentry = NULL;
 	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
-- 
cgit v1.2.3


From 967b9281361481aecf323563886ef972ee88c681 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:09 -0500
Subject: NFSv4: Do not call rpciod_down() before call to destroy_nfsv4_state()

The reason is that the idmapper cleanup may call flush_workqueue() on
rpciod_workqueue.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 24988f430e4..b81149eb26e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2025,10 +2025,11 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	rpciod_down();		/* release rpciod */
 
 	destroy_nfsv4_state(server);
 
+	rpciod_down();
+
 	kfree(server->hostname);
 	kfree(server);
 }
-- 
cgit v1.2.3


From a162a6b804b48c605d1fd35e1861a5d32d00ad3f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:10 -0500
Subject: NFSv4: Kill braindead gcc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nfs4_open_revalidate: 'res' may be used uninitialized
nfs4_callback_compound: ‘hdr_res.nops’ may be used uninitialized
			'op_nr’ may be used uninitialized
encode_getattr_res: ‘savep’ may be used uninitialized

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_xdr.c | 28 ++++++++++++++++------------
 fs/nfs/nfs4proc.c     |  2 +-
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a9..05c38cf40b6 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
 
 static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-	uint32_t *savep;
+	uint32_t *savep = NULL;
 	unsigned status = res->status;
 	
 	if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
-	struct callback_op *op;
-	unsigned int op_nr;
+	struct callback_op *op = &callback_ops[0];
+	unsigned int op_nr = OP_CB_ILLEGAL;
 	unsigned int status = 0;
 	long maxlen;
 	unsigned res;
 
 	dprintk("%s: start\n", __FUNCTION__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (unlikely(status != 0)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-		status = htonl(NFS4ERR_OP_ILLEGAL);
-	} else
-		op = &callback_ops[op_nr];
+	if (likely(status == 0)) {
+		switch (op_nr) {
+			case OP_CB_GETATTR:
+			case OP_CB_RECALL:
+				op = &callback_ops[op_nr];
+				break;
+			default:
+				op_nr = OP_CB_ILLEGAL;
+				op = &callback_ops[0];
+				status = htonl(NFS4ERR_OP_ILLEGAL);
+		}
+	}
 
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
+	hdr_res.nops = NULL;
 	encode_compound_hdr_res(&xdr_out, &hdr_res);
 
 	for (;;) {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 305bea201cd..77a565eba56 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -908,7 +908,7 @@ out_put_state_owner:
 static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
-	struct nfs4_state *res;
+	struct nfs4_state *res = ERR_PTR(-EIO);
 	int err;
 
 	do {
-- 
cgit v1.2.3


From bd6475454c774bd9dbe6078d94bbf72b1d3b65f4 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Mon, 20 Mar 2006 13:44:10 -0500
Subject: NFS: kzalloc conversion in fs/nfs

this converts fs/nfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c  | 4 +---
 fs/nfs/inode.c  | 6 ++----
 fs/nfs/unlink.c | 3 +--
 fs/nfs/write.c  | 6 ++----
 4 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 32c95a0d93f..b89d27f93d6 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,11 +101,9 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	if (clp->cl_idmap != NULL)
 		return;
-        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return;
 
-	memset(idmap, 0, sizeof(*idmap));
-
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b81149eb26e..521d1dcb4cf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1638,10 +1638,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 #endif /* CONFIG_NFS_V3 */
 
 	s = ERR_PTR(-ENOMEM);
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		goto out_err;
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1942,10 +1941,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 		return ERR_PTR(-EINVAL);
 	}
 
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		return ERR_PTR(-ENOMEM);
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d55..0e28189c215 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		goto out;
-	memset(data, 0, sizeof(*data));
 
 	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
 	if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d6ad449041e..92ecf24455c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -100,10 +100,8 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 			p->pagevec = &p->page_array[0];
 		else {
 			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
+			p->pagevec = kzalloc(size, GFP_NOFS);
+			if (!p->pagevec) {
 				mempool_free(p, nfs_commit_mempool);
 				p = NULL;
 			}
-- 
cgit v1.2.3


From c9d5128a10a4974f72674ff3463da4db439e8b04 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Mar 2006 13:44:11 -0500
Subject: NFS: sem2mutex idmap.c

semaphore to mutex conversion.

the conversion was generated via scripts, and the result was validated
automatically via a script as well.

build and boot tested.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b89d27f93d6..3fab5b0cfc5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
-	struct semaphore      idmap_lock;    /* Serializes upcalls */
-	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct mutex          idmap_lock;    /* Serializes upcalls */
+	struct mutex          idmap_im_lock; /* Protects the hashtable */
 	struct idmap_hashtable idmap_user_hash;
 	struct idmap_hashtable idmap_group_hash;
 };
@@ -114,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
 		return;
 	}
 
-        init_MUTEX(&idmap->idmap_lock);
-        init_MUTEX(&idmap->idmap_im_lock);
+        mutex_init(&idmap->idmap_lock);
+        mutex_init(&idmap->idmap_im_lock);
 	init_waitqueue_head(&idmap->idmap_wq);
 	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	if (namelen >= IDMAP_NAMESZ)
 		return -EINVAL;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_name(h, name, namelen);
 	if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return (ret);
 }
 
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
 	im = &idmap->idmap_im;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_id(h, id);
 	if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return ret;
 }
 
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         if (copy_from_user(&im_in, src, mlen) != 0)
 		return (-EFAULT);
 
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	ret = mlen;
 	im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
 	ret = mlen;
 out:
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	return ret;
 }
 
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 
 	if (msg->errno >= 0)
 		return;
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
 	wake_up(&idmap->idmap_wq);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 }
 
 /* 
-- 
cgit v1.2.3


From b4629fe2f094b719847f31be1ee5ab38300038b2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:12 -0500
Subject: VFS: New /proc file /proc/self/mountstats

Create a new file under /proc/self, called mountstats, where mounted file
systems can export information (configuration options, performance counters,
and so on).  Use a mechanism similar to /proc/mounts and s_ops->show_options.

This mechanism does not violate namespace security, and is safe to use while
other processes are unmounting file systems.

Thanks to Mike Waychison for his review and comments.

Test-plan:
Test concurrent mount/unmount operations while cat'ing /proc/self/mountstats.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/namespace.c | 38 ++++++++++++++++++++++++++++++++++++++
 fs/proc/base.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d631..71e75bcf4d2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+	struct vfsmount *mnt = v;
+	int err = 0;
+
+	/* device */
+	if (mnt->mnt_devname) {
+		seq_puts(m, "device ");
+		mangle(m, mnt->mnt_devname);
+	} else
+		seq_puts(m, "no device");
+
+	/* mount point */
+	seq_puts(m, " mounted on ");
+	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_putc(m, ' ');
+
+	/* file system type */
+	seq_puts(m, "with fstype ");
+	mangle(m, mnt->mnt_sb->s_type->name);
+
+	/* optional statistics */
+	if (mnt->mnt_sb->s_op->show_stats) {
+		seq_putc(m, ' ');
+		err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+	}
+
+	seq_putc(m, '\n');
+	return err;
+}
+
+struct seq_operations mountstats_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_vfsstat,
+};
+
 /**
  * may_umount_tree - check if a mount tree is busy
  * @mnt: root of mount tree
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568de..8f1f49ceebe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
 	PROC_TGID_MAPS,
 	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
+	PROC_TGID_MOUNTSTATS,
 	PROC_TGID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
 	PROC_TID_MAPS,
 	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
+	PROC_TID_MOUNTSTATS,
 	PROC_TID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+	E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
 	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
 #endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
 	.poll		= mounts_poll,
 };
 
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &mountstats_op);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		struct namespace *namespace;
+		task_lock(task);
+		namespace = task->namespace;
+		if (namespace)
+			get_namespace(namespace);
+		task_unlock(task);
+
+		if (namespace)
+			m->private = namespace;
+		else {
+			seq_release(inode, file);
+			ret = -EINVAL;
+		}
+	}
+	return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+	.open		= mountstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+};
+
 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 			inode->i_fop = &proc_smaps_operations;
 			break;
 #endif
+		case PROC_TID_MOUNTSTATS:
+		case PROC_TGID_MOUNTSTATS:
+			inode->i_fop = &proc_mountstats_operations;
+			break;
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;
-- 
cgit v1.2.3


From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:12 -0500
Subject: NFS: show retransmit settings when displaying mount options

Sometimes it's important to know the exact RPC retransmit settings the
kernel is using for an NFS mount point.  Add this facility to the NFS
client's show_options method.

Test plan:
Set various retransmit settings via the mount command, and check that the
settings are reflected in /proc/mounts.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 521d1dcb4cf..48683d4bf0f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -396,6 +396,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -629,6 +632,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
+	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", nfss->retrans_count);
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
 	return 0;
@@ -1800,6 +1805,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-- 
cgit v1.2.3


From c8bded96aa8735823e53c95a26177987ebb19a90 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:13 -0500
Subject: NFS: clean up some mount options

Get rid of "lock" and "posix", and spell out "vers=".

Test plan:
None.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 48683d4bf0f..827d69255b1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -591,10 +591,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
-		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
-		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
@@ -603,7 +602,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	char buf[12];
 	char *proto;
 
-	seq_printf(m, ",v%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ)
-- 
cgit v1.2.3


From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:13 -0500
Subject: NFS: introduce mechanism for tracking NFS client metrics

Add a per-superblock performance counter facility to the NFS client.  This
facility mimics the counters available for block devices and for
networking.  Expose these new counters via the new /proc/self/mountstats
interface.

Thanks to Andrew Morton and Trond Myklebust for their review and comments.

Test plan:
fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c  | 103 +++++++++++++++++++++++++++++++++++---
 fs/nfs/iostat.h | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 249 insertions(+), 6 deletions(-)
 create mode 100644 fs/nfs/iostat.h

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 827d69255b1..86b756f44e2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -42,6 +42,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
@@ -65,6 +66,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
@@ -78,6 +80,7 @@ static struct super_operations nfs_sops = {
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -290,6 +293,12 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		no_root_error = -ENOMEM;
+		goto out_no_root;
+	}
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -582,7 +591,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 
 }
 
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
 	static struct proc_nfs_info {
 		int flag;
@@ -598,20 +607,19 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
 	char buf[12];
 	char *proto;
 
 	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
-	if (nfss->acregmin != 3*HZ)
+	if (nfss->acregmin != 3*HZ || showdefaults)
 		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
-	if (nfss->acregmax != 60*HZ)
+	if (nfss->acregmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
-	if (nfss->acdirmin != 30*HZ)
+	if (nfss->acdirmin != 30*HZ || showdefaults)
 		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
-	if (nfss->acdirmax != 60*HZ)
+	if (nfss->acdirmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
@@ -633,8 +641,89 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",proto=%s", proto);
 	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
 	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+	nfs_show_mount_options(m, nfss, 0);
+
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
+
+	return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+	int i, cpu;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct rpc_auth *auth = nfss->client->cl_auth;
+	struct nfs_iostats totals = { };
+
+	seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+	/*
+	 * Display all mount option settings
+	 */
+	seq_printf(m, "\n\topts:\t");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+	nfs_show_mount_options(m, nfss, 1);
+
+	seq_printf(m, "\n\tcaps:\t");
+	seq_printf(m, "caps=0x%x", nfss->caps);
+	seq_printf(m, ",wtmult=%d", nfss->wtmult);
+	seq_printf(m, ",dtsize=%d", nfss->dtsize);
+	seq_printf(m, ",bsize=%d", nfss->bsize);
+	seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+	if (nfss->rpc_ops->version == 4) {
+		seq_printf(m, "\n\tnfsv4:\t");
+		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+	}
+#endif
+
+	/*
+	 * Display security flavor in effect for this mount
+	 */
+	seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+	if (auth->au_flavor)
+		seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+	/*
+	 * Display superblock I/O counters
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct nfs_iostats *stats;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		preempt_disable();
+		stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+		for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+			totals.events[i] += stats->events[i];
+		for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+			totals.bytes[i] += stats->bytes[i];
+
+		preempt_enable();
+	}
+
+	seq_printf(m, "\n\tevents:\t");
+	for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+		seq_printf(m, "%lu ", totals.events[i]);
+	seq_printf(m, "\n\tbytes:\t");
+	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+		seq_printf(m, "%Lu ", totals.bytes[i]);
+
 	return 0;
 }
 
@@ -1742,6 +1831,7 @@ static struct super_operations nfs4_sops = {
 	.clear_inode	= nfs4_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -2015,6 +2105,7 @@ out_err:
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
+	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	return s;
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 00000000000..dc080e50ec5
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,152 @@
+/*
+ *  linux/fs/nfs/iostat.h
+ *
+ *  Declarations for NFS client per-mount statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the health of
+ *  the NFS client and the health of each NFS mount point.  Generally these
+ *  are not for detailed problem diagnosis, but simply to indicate that there
+ *  is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS		"1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written to the
+ *     server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications via
+ *     the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files opened
+ *     with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client.  Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface.  A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+	NFSIOS_NORMALREADBYTES = 0,
+	NFSIOS_NORMALWRITTENBYTES,
+	NFSIOS_DIRECTREADBYTES,
+	NFSIOS_DIRECTWRITTENBYTES,
+	NFSIOS_SERVERREADBYTES,
+	NFSIOS_SERVERWRITTENBYTES,
+	NFSIOS_READPAGES,
+	NFSIOS_WRITEPAGES,
+	__NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging.  The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches.  This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+	NFSIOS_INODEREVALIDATE = 0,
+	NFSIOS_DENTRYREVALIDATE,
+	NFSIOS_DATAINVALIDATE,
+	NFSIOS_ATTRINVALIDATE,
+	NFSIOS_VFSOPEN,
+	NFSIOS_VFSLOOKUP,
+	NFSIOS_VFSACCESS,
+	NFSIOS_VFSUPDATEPAGE,
+	NFSIOS_VFSREADPAGE,
+	NFSIOS_VFSREADPAGES,
+	NFSIOS_VFSWRITEPAGE,
+	NFSIOS_VFSWRITEPAGES,
+	NFSIOS_VFSGETDENTS,
+	NFSIOS_VFSSETATTR,
+	NFSIOS_VFSFLUSH,
+	NFSIOS_VFSFSYNC,
+	NFSIOS_VFSLOCK,
+	NFSIOS_VFSRELEASE,
+	NFSIOS_CONGESTIONWAIT,
+	NFSIOS_SETATTRTRUNC,
+	NFSIOS_EXTENDWRITE,
+	NFSIOS_SILLYRENAME,
+	NFSIOS_SHORTREAD,
+	NFSIOS_SHORTWRITE,
+	__NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+	unsigned long long	bytes[__NFSIOS_BYTESMAX];
+	unsigned long		events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats->events[stat] ++;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats->bytes[stat] += addend;
+	put_cpu_no_resched();
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+	return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+	free_percpu(stats);
+}
+
+#endif
+#endif
-- 
cgit v1.2.3


From 91d5b47023b608227d605d1e916b29dd0215bff7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:14 -0500
Subject: NFS: add I/O performance counters

Invoke the byte and event counter macros where we want to count bytes and
events.

Clean-up: fix a possible NULL dereference in nfs_lock, and simplify
nfs_file_open.

Test-plan:
fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c    |  8 ++++++++
 fs/nfs/direct.c |  7 +++++++
 fs/nfs/file.c   | 20 +++++++++-----------
 fs/nfs/inode.c  |  8 ++++++++
 fs/nfs/read.c   | 12 ++++++++++++
 fs/nfs/write.c  | 18 ++++++++++++++++++
 6 files changed, 62 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead69..151b8dd0ac3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -507,6 +508,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct nfs_fattr fattr;
 	long		res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
 	lock_kernel();
 
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -713,6 +716,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	parent = dget_parent(dentry);
 	lock_kernel();
 	dir = parent->d_inode;
+	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
 	if (!inode) {
@@ -844,6 +848,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -1241,6 +1246,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
 		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1640,6 +1646,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct rpc_cred *cred;
 	int res = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
 	if (mask == 0)
 		goto out;
 	/* Is this sys_access() ? */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4e9b3a1b36c..fc07ce4885d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -54,6 +54,8 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
@@ -67,6 +69,7 @@ struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 	struct list_head	list;		/* nfs_read_data structs */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct inode *		inode;		/* target file of I/O */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 	atomic_t		complete,	/* i/os we're waiting for */
@@ -357,7 +360,9 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
 	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
 				 file_offset);
@@ -572,6 +577,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
                         return page_count;
                 }
 
+		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
 		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
 		nfs_free_user_pages(pages, page_count, 0);
@@ -581,6 +587,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
 				break;
 			return result;
 		}
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 		tot_bytes += result;
 		file_offset += result;
 		if (result < size)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 387809f2d18..1cf07e4ad13 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
-	int (*open)(struct inode *, struct file *);
 	int res;
 
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	/* Do NFSv4 open() call */
-	if ((open = server->rpc_ops->file_open) != NULL)
-		res = open(inode, filp);
+	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
 		filemap_fdatawrite(filp->f_mapping);
+	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
 
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	lock_kernel();
 	/* Ensure that data+attribute caches are up to date after close() */
 	status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_file(inode, iocb->ki_filp);
+	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	lock_kernel();
 	status = nfs_wb_all(inode);
 	if (!status) {
@@ -378,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	if (!count)
 		goto out;
 
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, buf, count, pos);
 out:
 	return result;
@@ -517,9 +520,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
-
-	if (!inode)
-		return -EINVAL;
+	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -544,9 +545,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
-	if (!inode)
-		return -EINVAL;
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 86b756f44e2..8ee74111e51 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -753,6 +753,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -940,6 +942,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct nfs_fattr fattr;
 	int error;
 
+	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
@@ -993,6 +997,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
 		vmtruncate(inode, attr->ia_size);
 	}
@@ -1278,6 +1283,7 @@ int nfs_attribute_timeout(struct inode *inode)
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1294,6 +1300,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		if (S_ISREG(inode->i_mode))
 			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
@@ -1615,6 +1622,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8..ae3ddd24cf8 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,8 @@
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
@@ -133,6 +135,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		}
 		count -= result;
 		rdata->args.pgbase += result;
+		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
 		/* Note: result == 0 should only happen if we're caching
 		 * a write that extends the file and punches a hole.
 		 */
@@ -458,8 +462,11 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
 		task->tk_pid, status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
+
 	/* Is this a short read? */
 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Yes, so retry the read at the end of the data */
@@ -491,6 +498,9 @@ int nfs_readpage(struct file *file, struct page *page)
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page->index);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
 	/*
 	 * Try to flush any pending writes to the file..
 	 *
@@ -570,6 +580,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			nr_pages);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +593,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
 		if (!ret)
+			nfs_add_stats(inode, NFSIOS_READPAGES, err);
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 92ecf24455c..e7c8361cf20 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -134,6 +135,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
 }
 
@@ -223,6 +225,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	        wdata->args.pgbase += result;
 		written += result;
 		count -= result;
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 	} while (count);
 	/* Update file length */
 	nfs_grow_file(page, offset, written);
@@ -279,6 +282,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	int priority = wb_priority(wbc);
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
 	/*
 	 * Note: We need to ensure that we have a reference to the inode
 	 *       if we are to do asynchronous writes. If not, waiting
@@ -343,6 +349,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	struct inode *inode = mapping->host;
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
@@ -354,6 +362,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	if (err < 0)
 		goto out;
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
@@ -596,6 +605,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 
 	if (!bdi_write_congested(bdi))
 		return 0;
+
+	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
 	if (intr) {
 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
 		sigset_t oldset;
@@ -749,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 	struct nfs_page	*req;
 	int		status = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name, count,
@@ -1152,6 +1166,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1177,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
+		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Was this an NFSv2 write or an NFSv3 stable write? */
-- 
cgit v1.2.3


From 006ea73e5fa82915d0ac7a3f15ee7c688433236d Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:14 -0500
Subject: NFS: add hooks to account for NFSERR_JUKEBOX errors

Make an inode or an nfs_server struct available in the logic that handles
JUKEBOX/DELAY type errors so the NFS client can account for them.

This patch is split out from the main nfs iostat patch to highlight minor
architectural changes required to support this statistic.

Test plan:
None.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/iostat.h   | 19 +++++++++++++++----
 fs/nfs/nfs3proc.c | 13 ++++++++-----
 fs/nfs/nfs4proc.c |  5 ++++-
 3 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index dc080e50ec5..7a749515331 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -103,6 +103,7 @@ enum nfs_stat_eventcounters {
 	NFSIOS_SILLYRENAME,
 	NFSIOS_SHORTREAD,
 	NFSIOS_SHORTWRITE,
+	NFSIOS_DELAY,
 	__NFSIOS_COUNTSMAX,
 };
 
@@ -116,28 +117,38 @@ struct nfs_iostats {
 	unsigned long		events[__NFSIOS_COUNTSMAX];
 } ____cacheline_aligned;
 
-static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
 {
 	struct nfs_iostats *iostats;
 	int cpu;
 
 	cpu = get_cpu();
-	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->events[stat] ++;
 	put_cpu_no_resched();
 }
 
-static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
 {
 	struct nfs_iostats *iostats;
 	int cpu;
 
 	cpu = get_cpu();
-	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->bytes[stat] += addend;
 	put_cpu_no_resched();
 }
 
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
 static inline struct nfs_iostats *nfs_alloc_iostats(void)
 {
 	return alloc_percpu(struct nfs_iostats);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f055..7204ba5b2bf 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 extern struct rpc_procinfo nfs3_procedures[];
@@ -58,10 +60,11 @@ nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, i
 		nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
 	if (task->tk_status != -EJUKEBOX)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -447,7 +450,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 	struct rpc_message *msg = &task->tk_msg;
 	struct nfs_fattr	*dir_attr;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, dir->d_inode))
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -748,7 +751,7 @@ static void nfs3_read_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
@@ -786,7 +789,7 @@ static void nfs3_write_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
@@ -833,7 +836,7 @@ static void nfs3_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, data->inode))
 		return;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 77a565eba56..f1ff4fa6cce 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -2755,8 +2756,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 				rpc_wake_up_task(task);
 			task->tk_status = 0;
 			return -EAGAIN;
-		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+			nfs_inc_server_stats((struct nfs_server *) server,
+						NFSIOS_DELAY);
+		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
-- 
cgit v1.2.3


From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:15 -0500
Subject: NFS: report how long an NFS file system has been mounted

Add a field in nfs_server to record a timestamp when a mount succeeds.
Report the number of seconds the file system has been mounted via
nfs_show_stats().

Test plan:
Mount an NFS file system, watch the mountstats reports and compare with
clock time.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8ee74111e51..1b2d782374b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -299,6 +299,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 		goto out_no_root;
 	}
 
+	/* mount time stamp, in seconds */
+	server->mount_time = jiffies;
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -674,6 +677,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
 	nfs_show_mount_options(m, nfss, 1);
 
+	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
 	seq_printf(m, "\n\tcaps:\t");
 	seq_printf(m, "caps=0x%x", nfss->caps);
 	seq_printf(m, ",wtmult=%d", nfss->wtmult);
-- 
cgit v1.2.3


From 4ece3a2d18fd7fe1d4972284a8c98c569020093f Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:22 -0500
Subject: NFS: add RPC I/O statistics to /proc/self/mountstats

NFS client now shows various RPC I/O metrics in /proc/self/mountstats.

Test plan:
Mount/umount while doing "cat /proc/self/mountstats", multiple iterations
of connectathon locking suite.  Test with NFS version 2, 3, and 4.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1b2d782374b..b9f35ca266c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -728,6 +729,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, "\n\tbytes:\t");
 	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
 		seq_printf(m, "%Lu ", totals.bytes[i]);
+	seq_printf(m, "\n");
+
+	rpc_print_iostats(m, nfss->client);
 
 	return 0;
 }
-- 
cgit v1.2.3


From cc0175c1dc1de8f6af0eb0631dcc5b999a6fcc42 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:22 -0500
Subject: SUNRPC: display human-readable procedure name in rpc_iostats output

Add fields to the rpc_procinfo struct that allow the display of a
human-readable name for each procedure in the rpc_iostats output.

Also fix it so that the NFSv4 stats are broken up correctly by
sub-procedure number.  NFSv4 uses only two real RPC procedures:
NULL, and COMPOUND.

Test plan:
Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats".

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c         | 4 ++++
 fs/lockd/xdr.c         | 4 +++-
 fs/lockd/xdr4.c        | 4 +++-
 fs/nfs/mount_clnt.c    | 4 ++++
 fs/nfs/nfs2xdr.c       | 4 +++-
 fs/nfs/nfs3xdr.c       | 6 +++++-
 fs/nfs/nfs4xdr.c       | 2 ++
 fs/nfsd/nfs4callback.c | 2 ++
 8 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0edc03e6796..84ee39e6a3a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -214,12 +214,16 @@ static struct rpc_procinfo	nsm_procedures[] = {
 		.p_encode	= (kxdrproc_t) xdr_encode_mon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat_res,
 		.p_bufsiz	= MAX(SM_mon_sz, SM_monres_sz) << 2,
+		.p_statidx	= SM_MON,
+		.p_name		= "MONITOR",
 	},
 [SM_UNMON] = {
 		.p_proc		= SM_UNMON,
 		.p_encode	= (kxdrproc_t) xdr_encode_unmon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat,
 		.p_bufsiz	= MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+		.p_statidx	= SM_UNMON,
+		.p_name		= "UNMONITOR",
 	},
 };
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1e984ab14d3..766ce06146b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -546,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlmclt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 906ddc20318..36eb175ec33 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -551,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm4_procedures[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f678f..4a134035822 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -137,6 +137,8 @@ static struct rpc_procinfo	mnt_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MNTPROC_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
@@ -146,6 +148,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MOUNTPROC3_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560c89c..8cdc792ff3c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
 	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
 	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
 	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
-	.p_timer    =  timer						\
+	.p_timer    =  timer,						\
+	.p_statidx  =  NFSPROC_##proc,					\
+	.p_name     =  #proc,						\
 	}
 struct rpc_procinfo	nfs_procedures[] = {
     PROC(GETATTR,	fhandle,	attrstat, 1),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b5012bc..2d8701a230f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
 	.p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,			\
 	.p_decode    = (kxdrproc_t) nfs3_xdr_##restype,			\
 	.p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,	\
-	.p_timer     = timer						\
+	.p_timer     = timer,						\
+	.p_statidx   = NFS3PROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 struct rpc_procinfo	nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
 		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
 		.p_timer = 1,
+		.p_name = "GETACL",
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
 		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
 		.p_timer = 0,
+		.p_name = "SETACL",
 	},
 };
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef5778..b95675349ba 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
 	.p_encode = (kxdrproc_t) nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t) nfs4_xdr_##restype,		\
 	.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+	.p_statidx = NFSPROC4_CLNT_##proc,			\
+	.p_name   = #proc,					\
     }
 
 struct rpc_procinfo	nfs4_procedures[] = {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d828662d737..4f391cbf2fd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
         .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
         .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+        .p_statidx = NFSPROC4_CB_##call,				\
+	.p_name   = #proc,                                              \
 }
 
 static struct rpc_procinfo     nfs4_cb_procedures[] = {
-- 
cgit v1.2.3


From dead28da8e3fb32601d38fb32b7021122e0a3d21 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:23 -0500
Subject: SUNRPC: eliminate rpc_call()

Clean-up: replace rpc_call() helper with direct call to rpc_call_sync.

This makes NFSv2 and NFSv3 synchronous calls more computationally
efficient, and reduces stack consumption in functions that used to
invoke rpc_call more than once.

Test plan:
Compile kernel with CONFIG_NFS enabled.  Connectathon on NFS version 2,
version 3, and version 4 mount points.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c      |   7 ++-
 fs/nfs/mount_clnt.c |  13 +++--
 fs/nfs/nfs3acl.c    |  16 ++++--
 fs/nfs/nfs3proc.c   | 140 ++++++++++++++++++++++++++++++++++++++--------------
 fs/nfs/proc.c       | 107 ++++++++++++++++++++++++++++++---------
 5 files changed, 215 insertions(+), 68 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 84ee39e6a3a..5dd52b70859 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	struct rpc_clnt	*clnt;
 	int		status;
 	struct nsm_args	args;
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= res,
+	};
 
 	clnt = nsm_create();
 	if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	args.proc = NLMPROC_NSM_NOTIFY;
 	memset(res, 0, sizeof(*res));
 
-	status = rpc_call(clnt, proc, &args, res, 0);
+	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	status = rpc_call_sync(clnt, &msg, 0);
 	if (status < 0)
 		printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
 			status);
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 4a134035822..c44d87bdddb 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
+	struct rpc_message msg	= {
+		.rpc_argp	= path,
+		.rpc_resp	= &result,
+	};
 	char			hostname[32];
 	int			status;
-	int			call;
 
 	dprintk("NFS:      nfs_mount(%08x:%s)\n",
 			(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	if (IS_ERR(mnt_clnt))
 		return PTR_ERR(mnt_clnt);
 
-	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
-	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	if (version == NFS_MNT3_VERSION)
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+	else
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	return status < 0? status : (result.status? -EACCES : 0);
 }
 
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae94..33287879bd2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 	struct nfs3_getaclres res = {
 		.fattr =	&fattr,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+	};
 	struct posix_acl *acl;
 	int status, count;
 
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 		return NULL;
 
 	dprintk("NFS call getacl\n");
-	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
-			  &args, &res, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	dprintk("NFS reply getacl: %d\n", status);
 
 	/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		.acl_access = acl,
 		.pages = pages,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int status, count;
 
 	status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 
 	dprintk("NFS call setacl\n");
 	nfs_begin_data_update(inode);
-	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
-			  &args, &fattr, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
 	spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7204ba5b2bf..740f8b1ab04 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -43,21 +43,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	return res;
 }
 
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
-		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
-		nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags)	nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
 nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
@@ -75,14 +61,21 @@ static int
 do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(client, &msg, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-		status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_resp = info->fattr;
+		status = rpc_call_sync(client, &msg, 0);
 		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
@@ -110,12 +103,16 @@ static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFS3PROC_GETATTR,
-			  fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -129,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh		= NFS_FH(inode),
 		.sattr		= sattr,
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -155,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
-	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
-		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
-			 fhandle, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_argp = fhandle;
+		msg.rpc_resp = fattr;
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	}
 	dprintk("NFS reply lookup: %d\n", status);
 	if (status >= 0)
 		status = nfs_refresh_inode(dir, &dir_attr);
@@ -183,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= entry->cred
+		.rpc_cred	= entry->cred,
 	};
 	int mode = entry->mask;
 	int status;
@@ -229,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
-			  &args, &fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
@@ -330,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -346,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_post_op_update_inode(dir, &dir_attr);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -477,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.fromattr	= &old_dir_attr,
 		.toattr		= &new_dir_attr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	nfs_fattr_init(&old_dir_attr);
 	nfs_fattr_init(&new_dir_attr);
-	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_post_op_update_inode(old_dir, &old_dir_attr);
 	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
@@ -503,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.dir_attr	= &dir_attr,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
@@ -534,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	if (path->len > NFS3_MAXPATHLEN)
@@ -541,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -563,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int mode = sattr->ia_mode;
 	int status;
 
@@ -572,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -594,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -675,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKNOD],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -693,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -710,11 +759,16 @@ static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsstat *stat)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSSTAT],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= stat,
+	};
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
 }
@@ -723,11 +777,16 @@ static int
 nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -736,11 +795,16 @@ static int
 nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 		   struct nfs_pathconf *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_PATHCONF],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03..2b051ab8bea 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 {
 	struct nfs_fattr *fattr = info->fattr;
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
-	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+	msg.rpc_resp = &fsinfo;
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -90,12 +97,16 @@ static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFSPROC_GETATTR,
-				fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh	= NFS_FH(inode),
 		.sattr	= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READLINK],
+		.rpc_argp	= &args,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
 }
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int status, mode;
 
 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	}
 
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
 		nfs_fattr_init(&fattr);
-		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
 	struct rpc_message	msg = { 
 		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= NULL
 	};
 	int			status;
 
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.toname		= new_name->name,
 		.tolen		= new_name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RENAME],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_mark_for_revalidate(old_dir);
 	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.toname		= name->name,
 		.tolen		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_mark_for_revalidate(inode);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(fattr);
 	fhandle->size = 0;
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RMDIR],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
 		.count		= count,
-		.pages		= &page
+		.pages		= &page,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= cred
+		.rpc_cred	= cred,
 	};
 	int			status;
 
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsstat *stat)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  statfs\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
 		goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsinfo *info)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
 		goto out;
-- 
cgit v1.2.3


From 1e7cb3dc12dbbac690d78c84f9c7cb11132ed121 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:24 -0500
Subject: NFS: directory trace messages

Reuse NFSDBG_DIRCACHE and NFSDBG_LOOKUPCACHE to provide additional
diagnostic messages that trace the operation of the NFS client's
directory behavior.  A few new messages are now generated when NFSDBG_VFS
is active, as well, to trace normal VFS activity.  This compromise
provides better trace debugging for those who use pre-built kernels,
without adding a lot of extra noise to the standard debug settings.

Test-plan:
Enable NFS trace debugging with flags 1, 2, or 4.  You should be able to
see different types of trace messages with each flag setting.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 96 +++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 151b8dd0ac3..609185a15c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -130,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
 {
 	int res = 0;
 
+	dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+			inode->i_sb->s_id, inode->i_ino);
+
 	lock_kernel();
 	/* Call generic open code in order to cache credentials */
 	if (!res)
@@ -173,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	unsigned long	timestamp;
 	int		error;
 
-	dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+			__FUNCTION__, (long long)desc->entry->cookie,
+			page->index);
 
  again:
 	timestamp = jiffies;
@@ -245,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+				__FUNCTION__, (unsigned long long)entry->cookie);
 		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
@@ -253,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
 	return status;
 }
 
@@ -277,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 		if (status)
 			break;
 
-		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+		dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+				(unsigned long long)entry->cookie, desc->current_index);
 
 		if (desc->file->f_pos == desc->current_index) {
 			*desc->dir_cookie = entry->cookie;
@@ -289,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
 	return status;
 }
 
@@ -304,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	struct page	*page;
 	int		status;
 
-	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+	dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+			__FUNCTION__, desc->page_index,
+			(long long) *desc->dir_cookie);
 
 	page = read_cache_page(inode->i_mapping, desc->page_index,
 			       (filler_t *)nfs_readdir_filler, desc);
@@ -325,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	if (status < 0)
 		dir_page_release(desc);
  out:
-	dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
 	return status;
  read_error:
 	page_cache_release(page);
@@ -347,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 
 	/* Always search-by-index from the beginning of the cache */
 	if (*desc->dir_cookie == 0) {
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+				(long long)desc->file->f_pos);
 		desc->page_index = 0;
 		desc->entry->cookie = desc->entry->prev_cookie = 0;
 		desc->entry->eof = 0;
 		desc->current_index = 0;
 	} else
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+				(unsigned long long)*desc->dir_cookie);
 
 	for (;;) {
 		res = find_dirent_page(desc);
@@ -366,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
 	return res;
 }
 
@@ -391,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+			(unsigned long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -428,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+			(unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -454,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+			(unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
@@ -486,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->entry->cookie = desc->entry->prev_cookie = 0;
 	desc->entry->eof = 0;
  out:
-	dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+			__FUNCTION__, status);
 	return status;
  out_release:
 	dir_page_release(desc);
@@ -508,6 +522,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct nfs_fattr fattr;
 	long		res;
 
+	dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			(long long)filp->f_pos);
 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
 
 	lock_kernel();
@@ -569,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 	}
 	unlock_kernel();
-	if (res < 0)
-		return res;
-	return 0;
+	if (res > 0)
+		res = 0;
+	dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			res);
+	return res;
 }
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -602,6 +622,10 @@ out:
  */
 int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 {
+	dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			datasync);
+
 	return 0;
 }
 
@@ -726,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+				__FUNCTION__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
 		goto out_bad;
 	}
 
@@ -759,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
  out_valid:
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 1;
 out_zap_parent:
 	nfs_zap_caches(dir);
@@ -775,6 +803,9 @@ out_zap_parent:
 	d_drop(dentry);
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 0;
 }
 
@@ -917,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	struct dentry *res = NULL;
 	int error;
 
+	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;
@@ -1124,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	int error;
 	int open_flags = 0;
 
-	dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
@@ -1158,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1191,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct iattr attr;
 	int error;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
@@ -1217,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
@@ -1274,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 		sillycounter++;
 		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 
-		dfprintk(VFS, "trying to rename %s to %s\n",
-			 dentry->d_name.name, silly);
+		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+				dentry->d_name.name, silly);
 		
 		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
 		/*
@@ -1687,13 +1721,15 @@ force_lookup:
 		res = PTR_ERR(cred);
 	unlock_kernel();
 out:
+	dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+		inode->i_sb->s_id, inode->i_ino, mask, res);
 	return res;
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
 		res = generic_permission(inode, mask, NULL);
 	unlock_kernel();
-	return res;
+	goto out;
 }
 
 /*
-- 
cgit v1.2.3


From 15dadef9460ad8d3b1d5ede1c1697dc79af44a72 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:24 -0500
Subject: lockd: clean up nlmsvc_lock

Slightly more consistent dprintk error reporting, consolidate some up()'s.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a525a141dd3..42dd105456c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -300,6 +300,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	struct file_lock	*conflock;
 	struct nlm_block	*block;
 	int			error;
+	u32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -329,24 +330,28 @@ again:
 		dprintk("lockd: posix_lock_file returned %d\n", -error);
 		switch(-error) {
 		case 0:
-			return nlm_granted;
+			ret = nlm_granted;
+			goto out;
 		case EDEADLK:
-			return nlm_deadlock;
+			ret = nlm_deadlock;
+			goto out;
 		case EAGAIN:
-			return nlm_lck_denied;
+			ret = nlm_lck_denied;
+			goto out;
 		default:			/* includes ENOLCK */
-			return nlm_lck_denied_nolocks;
+			ret = nlm_lck_denied_nolocks;
+			goto out;
 		}
 	}
 
 	if (!wait) {
-		up(&file->f_sema);
-		return nlm_lck_denied;
+		ret = nlm_lck_denied;
+		goto out_unlock;
 	}
 
 	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		up(&file->f_sema);
-		return nlm_deadlock;
+		ret = nlm_deadlock;
+		goto out_unlock;
 	}
 
 	/* If we don't have a block, create and initialize it. Then
@@ -371,8 +376,12 @@ again:
 		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
 	}
 
+	ret = nlm_lck_blocked;
+out_unlock:
 	up(&file->f_sema);
-	return nlm_lck_blocked;
+out:
+	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+	return ret;
 }
 
 /*
@@ -535,8 +544,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
 		posix_block_lock(conflock, &lock->fl);
-		up(&file->f_sema);
-		return;
+		goto out_unlock;
 	}
 
 	/* Alright, no conflicting lock. Now lock it for real. If the
@@ -547,8 +555,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		up(&file->f_sema);
-		return;
+		goto out_unlock;
 	}
 
 callback:
@@ -565,6 +572,7 @@ callback:
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlm_release_host(block->b_call.a_host);
+out_unlock:
 	up(&file->f_sema);
 }
 
-- 
cgit v1.2.3


From 5de0e5024a4e21251fd80dbfdb83316ce97086bc Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:25 -0500
Subject: lockd: simplify nlmsvc_grant_blocked

Reorganize nlmsvc_grant_blocked() to make full use of posix_lock_file().  Note
that there's no need for separate calls to posix_test_lock(),
posix_locks_deadlock(), or posix_block_lock().

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 42dd105456c..58bbfede94e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -519,7 +519,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
 	struct nlm_lock		*lock = &block->b_call.a_args.lock;
-	struct file_lock	*conflock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
@@ -539,19 +538,15 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
-	if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
-		/* Bummer, we blocked again */
+	error = posix_lock_file(file->f_file, &lock->fl);
+	switch (error) {
+	case 0:
+		break;
+	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		posix_block_lock(conflock, &lock->fl);
 		goto out_unlock;
-	}
-
-	/* Alright, no conflicting lock. Now lock it for real. If the
-	 * following yields an error, this is most probably due to low
-	 * memory. Retry the lock in a few seconds.
-	 */
-	if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-- 
cgit v1.2.3


From a85f193e2fb7d53e48ae6a9d9ea990bfb4cea555 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:25 -0500
Subject: lockd: make nlmsvc_lock use only posix_lock_file

Reorganize nlmsvc_lock() to make full use of posix_lock_file(), which does
eveything nlmsvc_lock() needs - no need to call posix_test_lock(),
posix_locks_deadlock(), or posix_block_lock() separately.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 58bbfede94e..f5398097b84 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -297,7 +297,6 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct file_lock	*conflock;
 	struct nlm_block	*block;
 	int			error;
 	u32			ret;
@@ -320,14 +319,15 @@ again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 
-	if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
-		error = posix_lock_file(file->f_file, &lock->fl);
+	error = posix_lock_file(file->f_file, &lock->fl);
+
+	dprintk("lockd: posix_lock_file returned %d\n", error);
 
+	if (error != -EAGAIN) {
 		if (block)
 			nlmsvc_delete_block(block, 0);
 		up(&file->f_sema);
 
-		dprintk("lockd: posix_lock_file returned %d\n", -error);
 		switch(-error) {
 		case 0:
 			ret = nlm_granted;
@@ -335,9 +335,6 @@ again:
 		case EDEADLK:
 			ret = nlm_deadlock;
 			goto out;
-		case EAGAIN:
-			ret = nlm_lck_denied;
-			goto out;
 		default:			/* includes ENOLCK */
 			ret = nlm_lck_denied_nolocks;
 			goto out;
@@ -349,11 +346,6 @@ again:
 		goto out_unlock;
 	}
 
-	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		ret = nlm_deadlock;
-		goto out_unlock;
-	}
-
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
@@ -369,13 +361,6 @@ again:
 	/* Append to list of blocked */
 	nlmsvc_insert_block(block, NLM_NEVER);
 
-	if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
-		/* Now add block to block list of the conflicting lock
-		   if we haven't done so. */
-		dprintk("lockd: blocking on this lock.\n");
-		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
-	}
-
 	ret = nlm_lck_blocked;
 out_unlock:
 	up(&file->f_sema);
-- 
cgit v1.2.3


From 2e0af86f618c697b44e2d67dff151256c58201c4 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: locks: remove unused posix_block_lock

posix_lock_file() is used to add a blocked lock to Lockd's block, so
posix_block_lock() is no longer needed.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index d2c5306e3db..cb940b142c5 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1954,21 +1954,6 @@ void locks_remove_flock(struct file *filp)
 	unlock_kernel();
 }
 
-/**
- *	posix_block_lock - blocks waiting for a file lock
- *	@blocker: the lock which is blocking
- *	@waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-	locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
 /**
  *	posix_unblock_lock - stop waiting for a file lock
  *      @filp:   how the file was opened
-- 
cgit v1.2.3


From 8dc7c3115b611c00006eac3ee5b108296432aab7 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: locks,lockd: fix race in nlmsvc_testlock

posix_test_lock() returns a pointer to a struct file_lock which is unprotected
and can be removed while in use by the caller.  Move the conflicting lock from
the return to a parameter, and copy the conflicting lock.

In most cases the caller ends up putting the copy of the conflicting lock on
the stack.  On i386, sizeof(struct file_lock) appears to be about 100 bytes.
We're assuming that's reasonable.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c  | 12 +++++-------
 fs/locks.c          | 21 +++++++++++++--------
 fs/nfs/file.c       |  7 +++----
 fs/nfsd/nfs4state.c | 13 ++++++-------
 4 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index f5398097b84..d683dd022e0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -376,8 +376,6 @@ u32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
-	struct file_lock	*fl;
-
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
 				file->f_file->f_dentry->d_inode->i_ino,
@@ -385,14 +383,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
 		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-				fl->fl_type, (long long)fl->fl_start,
-				(long long)fl->fl_end);
+				conflock->fl.fl_type,
+				(long long)conflock->fl.fl_start,
+				(long long)conflock->fl.fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
-		conflock->svid = fl->fl_pid;
-		conflock->fl = *fl;
+		conflock->svid = conflock->fl.fl_pid;
 		return nlm_lck_denied;
 	}
 
diff --git a/fs/locks.c b/fs/locks.c
index cb940b142c5..231b23c12c3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -672,8 +672,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
 	return result;
 }
 
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+		struct file_lock *conflock)
 {
 	struct file_lock *cfl;
 
@@ -684,9 +685,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 		if (posix_locks_conflict(cfl, fl))
 			break;
 	}
+	if (cfl) {
+		locks_copy_lock(conflock, cfl);
+		unlock_kernel();
+		return 1;
+	}
 	unlock_kernel();
-
-	return (cfl);
+	return 0;
 }
 
 EXPORT_SYMBOL(posix_test_lock);
@@ -1563,7 +1568,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock flock;
 	int error;
 
@@ -1587,7 +1592,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1717,7 +1722,7 @@ out:
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock64 flock;
 	int error;
 
@@ -1741,7 +1746,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1cf07e4ad13..ee140c53dba 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -392,15 +392,14 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct file_lock *cfl;
+	struct file_lock cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
 	/* Try local locking first */
-	cfl = posix_test_lock(filp, fl);
-	if (cfl != NULL) {
-		locks_copy_lock(fl, cfl);
+	if (posix_test_lock(filp, fl, &cfl)) {
+		locks_copy_lock(fl, &cfl);
 		goto out;
 	}
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb6454..f6ab762bea9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
-	struct file_lock *conflock;
+	struct file_lock conflock;
 	int status = 0;
 	unsigned int strhashval;
 
@@ -2775,11 +2775,11 @@ conflicting_lock:
 	/* XXX There is a race here. Future patch needed to provide 
 	 * an atomic posix_lock_and_test_file
 	 */
-	if (!(conflock = posix_test_lock(filp, &file_lock))) {
+	if (!posix_test_lock(filp, &file_lock, &conflock)) {
 		status = nfserr_serverfault;
 		goto out;
 	}
-	nfs4_set_lock_denied(conflock, &lock->lk_denied);
+	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
-	struct file_lock *conflicting_lock;
+	struct file_lock conflock;
 	int status;
 
 	if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	file.f_dentry = current_fh->fh_dentry;
 
 	status = nfs_ok;
-	conflicting_lock = posix_test_lock(&file, &file_lock);
-	if (conflicting_lock) {
+	if (posix_test_lock(&file, &file_lock, &conflock)) {
 		status = nfserr_denied;
-		nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+		nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
 	}
 out:
 	nfs4_unlock_state();
-- 
cgit v1.2.3


From 7117bf3dfb10b534a017260d9fc643bc1d0afd2a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 13:44:26 -0500
Subject: lockd: Remove FL_LOCKD flag

Currently lockd identifies its own locks using the FL_LOCKD flag.  This
doesn't scale well to multiple lock managers--if we did this in nfsv4 too,
for example, we'd be left with only one free flag bit.

Instead, we just check whether the file manager ops (fl_lmops) set on this
lock are our own.

The only use for this is in nlm_traverse_locks, which uses it to find locks
that need cleaning up when freeing a host or a file.

In the long run it might be nice to do reference counting instead of
traversing all the locks like this....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 2 --
 fs/lockd/svcsubs.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d683dd022e0..d50946dcddd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -313,8 +313,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	/* Get existing block (in case client is busy-waiting) */
 	block = nlmsvc_lookup_block(file, lock, 0);
 
-	lock->fl.fl_flags |= FL_LOCKD;
-
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177..601e5b3dfe2 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
 again:
 	file->f_locks = 0;
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_LOCKD))
+		if (fl->fl_lmops != &nlmsvc_lock_operations)
 			continue;
 
 		/* update current lock count */
-- 
cgit v1.2.3


From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:27 -0500
Subject: NFS: Cleanup of NFS write code in preparation for asynchronous
 o_direct

This patch inverts the callback hierarchy for NFS write calls.

Instead of having the NFSv2/v3/v4-specific code set up the RPC callback
ops, we allow the original caller to do so. This allows for more
flexibility w.r.t. how to set up and tear down the nfs_write_data
structure while still allowing the NFSv3/v4 code to perform error
handling.

The greater flexibility is needed by the asynchronous O_DIRECT code, which
wants to be able to hold on to the original nfs_write_data structures after
the WRITE RPC call has completed in order to be able to replay them if the
COMMIT call determines that the server has rebooted.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c | 66 ++++++++++-----------------------------
 fs/nfs/nfs4proc.c | 54 ++++++++------------------------
 fs/nfs/proc.c     | 24 +++------------
 fs/nfs/write.c    | 92 +++++++++++++++++++++++++++++++++++++++----------------
 4 files changed, 101 insertions(+), 135 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 740f8b1ab04..c4f7de8830e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -849,29 +849,17 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_write_ops = {
-	.rpc_call_done = nfs3_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			stable;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -879,45 +867,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
+	data->args.stable = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
-		if (!NFS_I(inode)->ncommit)
-			stable = NFS_FILE_SYNC;
-		else
-			stable = NFS_DATA_SYNC;
-	} else
-		stable = NFS_UNSTABLE;
-	data->args.stable = stable;
-
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+		data->args.stable = NFS_FILE_SYNC;
+		if (NFS_I(data->inode)->ncommit)
+			data->args.stable = NFS_DATA_SYNC;
+	}
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_commit_ops = {
-	.rpc_call_done = nfs3_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
 		.rpc_argp	= &data->args,
@@ -925,12 +896,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int
@@ -970,7 +936,9 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
 	.write_setup	= nfs3_proc_write_setup,
+	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_done	= nfs3_commit_done,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f1ff4fa6cce..ef4dc315ecc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2388,32 +2388,23 @@ nfs4_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
-	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_write_ops = {
-	.rpc_call_done = nfs4_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
 		.rpc_argp = &data->args,
@@ -2423,7 +2414,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	struct inode *inode = data->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
-	int flags;
 	
 	if (how & FLUSH_STABLE) {
 		if (!NFS_I(inode)->ncommit)
@@ -2438,57 +2428,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 
 	data->timestamp   = jiffies;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
-	/* Call back common NFS writeback processing */
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_commit_ops = {
-	.rpc_call_done = nfs4_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};	
-	struct inode *inode = data->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	int flags;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 	
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);	
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 /*
@@ -3648,7 +3618,9 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.write_setup	= nfs4_proc_write_setup,
+	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_done	= nfs4_commit_done,
 	.file_open      = nfs_open,
 	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 2b051ab8bea..608aa5932a1 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -654,26 +654,15 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_write_ops = {
-	.rpc_call_done = nfs_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -684,12 +673,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static void
@@ -736,6 +721,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.write_setup	= nfs_proc_write_setup,
+	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e7c8361cf20..5912274ff1a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -77,12 +77,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
+static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 			   unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
 mempool_t *nfs_wdata_mempool;
@@ -872,10 +874,12 @@ static inline int flush_task_priority(int how)
  */
 static void nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
 		int how)
 {
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -896,6 +900,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -959,14 +966,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_writeback_done_partial;
 
 		if (nbytes > wsize) {
-			nfs_write_rpcsetup(req, data, wsize, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					wsize, offset, how);
 			offset += wsize;
 			nbytes -= wsize;
 		} else {
-			nfs_write_rpcsetup(req, data, nbytes, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					nbytes, offset, how);
 			nbytes = 0;
 		}
 		nfs_execute_write(data);
@@ -1020,9 +1028,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_writeback_done_full;
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, count, 0, how);
+	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
 	nfs_execute_write(data);
 	return 0;
@@ -1066,8 +1073,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 /*
  * Handle a write reply that flushed part of a page.
  */
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req = data->req;
 	struct page		*page = req->wb_page;
 
@@ -1077,11 +1085,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		req->wb_bytes,
 		(long long)req_offset(req));
 
-	if (status < 0) {
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	if (task->tk_status < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		req->wb_context->error = status;
-		dprintk(", error = %d\n", status);
+		req->wb_context->error = task->tk_status;
+		dprintk(", error = %d\n", task->tk_status);
 	} else {
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1102,6 +1113,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		nfs_writepage_release(req);
 }
 
+static const struct rpc_call_ops nfs_write_partial_ops = {
+	.rpc_call_done = nfs_writeback_done_partial,
+	.rpc_release = nfs_writedata_release,
+};
+
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1109,11 +1125,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	struct page		*page;
 
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1126,13 +1146,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (status < 0) {
+		if (task->tk_status < 0) {
 			ClearPageUptodate(page);
 			SetPageError(page);
-			req->wb_context->error = status;
+			req->wb_context->error = task->tk_status;
 			end_page_writeback(page);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", status);
+			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
 		end_page_writeback(page);
@@ -1154,18 +1174,28 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_write_full_ops = {
+	.rpc_call_done = nfs_writeback_done_full,
+	.rpc_release = nfs_writedata_release,
+};
+
+
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	int status;
 
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	status = NFS_PROTO(data->inode)->write_done(task, data);
+	if (status != 0)
+		return status;
 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1210,7 +1240,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 				argp->stable = NFS_FILE_SYNC;
 			}
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
@@ -1221,11 +1251,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-
-	/*
-	 * Process the nfs_page list
-	 */
-	data->complete(data, task->tk_status);
+	return 0;
 }
 
 
@@ -1239,10 +1265,12 @@ void nfs_commit_release(void *wdata)
  * Set up the argument/result storage required for the RPC call.
  */
 static void nfs_commit_rpcsetup(struct list_head *head,
-		struct nfs_write_data *data, int how)
+		struct nfs_write_data *data,
+		int how)
 {
 	struct nfs_page		*first;
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -1262,7 +1290,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
-	
+
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -1303,7 +1334,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
@@ -1312,6 +1343,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
@@ -1345,6 +1380,11 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
 	}
 	sub_page_state(nr_unstable,res);
 }
+
+static const struct rpc_call_ops nfs_commit_ops = {
+	.rpc_call_done = nfs_commit_done,
+	.rpc_release = nfs_commit_release,
+};
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-- 
cgit v1.2.3


From ec06c096edec0755534c7126f4caded69de131c2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:27 -0500
Subject: NFS: Cleanup of NFS read code

Same callback hierarchy inversion as for the NFS write calls. This patch is
not strictly speaking needed by the O_DIRECT code, but avoids confusing
differences between the asynchronous read and write code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c   | 17 ++++++++++++----
 fs/nfs/nfs3proc.c | 27 ++++++--------------------
 fs/nfs/nfs4proc.c | 33 +++++++++----------------------
 fs/nfs/proc.c     | 25 +++++-------------------
 fs/nfs/read.c     | 58 +++++++++++++++++++++++++++++++++++++++----------------
 5 files changed, 74 insertions(+), 86 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fc07ce4885d..3f87a72bd13 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -218,14 +218,17 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
  */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	if (likely(status >= 0))
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (likely(task->tk_status >= 0))
 		atomic_add(data->res.count, &dreq->count);
 	else
-		atomic_set(&dreq->error, status);
+		atomic_set(&dreq->error, task->tk_status);
 
 	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
 		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
@@ -234,6 +237,11 @@ static void nfs_direct_read_result(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_direct_ops = {
+	.rpc_call_done = nfs_direct_read_result,
+	.rpc_release = nfs_readdata_release,
+};
+
 /**
  * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
  * @dreq: address of nfs_direct_req struct for this request
@@ -280,10 +288,11 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		data->res.eof = 0;
 		data->res.count = bytes;
 
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_read_direct_ops, data);
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
 		rpc_execute(&data->task);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c4f7de8830e..cf186f0d2b3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -811,29 +811,18 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (nfs3_async_handle_jukebox(task, data->inode))
-		return;
+		return -EAGAIN;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_read_ops = {
-	.rpc_call_done = nfs3_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
 		.rpc_argp	= &data->args,
@@ -841,12 +830,7 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -935,6 +919,7 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
+	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ef4dc315ecc..bad1eae5608 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2345,47 +2345,31 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, server) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status > 0)
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task, calldata);
+		renew_lease(server, data->timestamp);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_read_ops = {
-	.rpc_call_done = nfs4_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};
-	struct inode *inode = data->inode;
-	int flags;
 
 	data->timestamp   = jiffies;
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -3617,6 +3601,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
+	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 608aa5932a1..9dd85cac2df 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -613,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -625,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_read_ops = {
-	.rpc_call_done = nfs_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
 		.rpc_argp	= &data->args,
@@ -646,12 +635,7 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -720,6 +704,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
+	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ae3ddd24cf8..2da255f0247 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -36,8 +36,8 @@
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
 mempool_t *nfs_rdata_mempool;
@@ -200,9 +200,11 @@ static void nfs_readpage_release(struct nfs_page *req)
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
 	struct inode		*inode;
+	int flags;
 
 	data->req	  = req;
 	data->inode	  = inode = req->wb_context->dentry->d_inode;
@@ -220,6 +222,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->res.eof     = 0;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct. */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
@@ -307,14 +312,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_readpage_result_partial;
 
 		if (nbytes > rsize) {
-			nfs_read_rpcsetup(req, data, rsize, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					rsize, offset);
 			offset += rsize;
 			nbytes -= rsize;
 		} else {
-			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					nbytes, offset);
 			nbytes = 0;
 		}
 		nfs_execute_read(data);
@@ -360,8 +366,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_readpage_result_full;
-	nfs_read_rpcsetup(req, data, count, 0);
+	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
 	nfs_execute_read(data);
 	return 0;
@@ -395,12 +400,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (status >= 0) {
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (task->tk_status >= 0) {
 		unsigned int request = data->args.count;
 		unsigned int result = data->res.count;
 
@@ -419,20 +427,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+	.rpc_call_done = nfs_readpage_result_partial,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	unsigned int count = data->res.count;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 		struct page *page = req->wb_page;
 		nfs_list_remove_request(req);
 
-		if (status >= 0) {
+		if (task->tk_status >= 0) {
 			if (count < PAGE_CACHE_SIZE) {
 				if (count < req->wb_bytes)
 					memclear_highpage_flush(page,
@@ -448,19 +464,27 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+	.rpc_call_done = nfs_readpage_result_full,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
-	int status = task->tk_status;
+	int status;
 
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, status);
+		task->tk_pid, task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
@@ -474,14 +498,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		task->tk_status = -EIO;
 	}
 	spin_lock(&data->inode->i_lock);
 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&data->inode->i_lock);
-	data->complete(data, status);
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From b8a32e2b8b7fefff994c89d398b6ac920a195b43 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:28 -0500
Subject: NFS: clean up NFS client's a_ops->direct_IO method

The NFS client's a_ops->direct_IO method, nfs_direct_IO, is required to
be present to allow NFS files to be opened with O_DIRECT, but is never
called because the NFS client shunts reads and writes to files opened
with O_DIRECT directly to its own routines.

Gut the nfs_direct_IO function.  This eliminates the only part of the
NFS client's direct I/O path that requires support for multi-segment
iovs, allowing further simplification in subsequent patches.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions
of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 70 +++++++++++++++++++--------------------------------------
 1 file changed, 23 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3f87a72bd13..8096d326bd7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -78,6 +78,29 @@ struct nfs_direct_req {
 };
 
 
+/**
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O.  However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
+ */
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+	struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+			dentry->d_name.name, (long long) pos, nr_segs);
+
+	return -EINVAL;
+}
+
 /**
  * nfs_get_user_pages - find and set up pages underlying user's buffer
  * rw: direction (read or write)
@@ -605,53 +628,6 @@ static ssize_t nfs_direct_write(struct inode *inode,
 	return tot_bytes;
 }
 
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- */
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t result = -EINVAL;
-	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx;
-	struct dentry *dentry = file->f_dentry;
-	struct inode *inode = dentry->d_inode;
-
-	/*
-	 * No support for async yet
-	 */
-	if (!is_sync_kiocb(iocb))
-		return result;
-
-	ctx = (struct nfs_open_context *)file->private_data;
-	switch (rw) {
-	case READ:
-		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_read(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	case WRITE:
-		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_write(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	default:
-		break;
-	}
-	return result;
-}
-
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
-- 
cgit v1.2.3


From d4cc948ba97980c55a308eab167a695109796456 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:28 -0500
Subject: NFS: update comments and function definitions in fs/nfs/direct.c

Update to latest coding style standards.  Remove block comments on
statically defined functions, and place function definitions all on
one line.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 125 +++++++-------------------------------------------------
 1 file changed, 15 insertions(+), 110 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8096d326bd7..d38c3dc052a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -101,16 +101,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
-/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
- */
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
-		struct page ***pages)
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
 	unsigned long page_count;
@@ -147,14 +138,7 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
 	return result;
 }
 
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 {
 	int i;
 	for (i = 0; i < npages; i++) {
@@ -166,22 +150,13 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 	kfree(pages);
 }
 
-/**
- * nfs_direct_req_release - release  nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
- *
+/*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
@@ -232,11 +207,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 	return dreq;
 }
 
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
+/*
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
@@ -265,21 +236,11 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 	.rpc_release = nfs_readdata_release,
 };
 
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+/*
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-		struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
 {
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
@@ -337,11 +298,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 	} while (count != 0);
 }
 
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
+/*
  * Collects and returns the final error value/byte-count.
  */
 static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
@@ -364,22 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-/**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		unsigned int nr_pages)
+static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -404,22 +346,11 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
 	return result;
 }
 
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- *                   then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
  * We've already pushed out any non-direct writes so that this read
  * will see them when we read from the server.
  */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
+static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -457,21 +388,7 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
 	return tot_bytes;
 }
 
-/**
- * nfs_direct_write_seg - Write out one iov segment.  Generate separate
- *                        write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		int nr_pages)
+static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
 	size_t request;
@@ -573,22 +490,12 @@ sync_retry:
 	goto retry;
 }
 
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- *                    then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
  * Upon return, generic_file_direct_IO invalidates any cached pages
  * that non-direct readers might access, so they will pick up these
  * writes immediately.
  */
-static ssize_t nfs_direct_write(struct inode *inode,
-		struct nfs_open_context *ctx, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
+static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -649,8 +556,7 @@ static ssize_t nfs_direct_write(struct inode *inode,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
 	loff_t *ppos = &iocb->ki_pos;
@@ -717,8 +623,7 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
-- 
cgit v1.2.3


From 5dd602f20688e08c85ac91e0451c4e6321ed25d7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:29 -0500
Subject: NFS: use size_t type for holding rsize bytes in NFS O_DIRECT read
 path

size_t is used for holding byte counts, so use it for variables storing rsize.
Note that the write path will be updated as we add support for async
O_DIRECT writes.

Test plan:
Need to verify that existing comparisons against new size_t variables behave
correctly.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d38c3dc052a..8f5d2dfd5a8 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -161,7 +161,7 @@ static void nfs_direct_req_release(struct kref *kref)
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
@@ -244,14 +244,14 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *
 {
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
-	unsigned int rsize = NFS_SERVER(inode)->rsize;
 
 	curpage = 0;
 	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
-		unsigned int bytes;
+		size_t bytes;
 
 		bytes = rsize;
 		if (count < rsize)
-- 
cgit v1.2.3


From 0cdd80d07fb0f558dfdb30f6e0b9905f5e5475f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:29 -0500
Subject: NFS: remove support for multi-segment iovs in the direct read path

Eliminate the persistent use of automatic storage in all parts of the NFS
client's direct read path to pave the way for introducing support for aio
against files opened with the O_DIRECT flag.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 66 +++++++++++++--------------------------------------------
 1 file changed, 15 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f5d2dfd5a8..6ecde9602f9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -321,7 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -346,48 +346,6 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context
 	return result;
 }
 
-/*
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
-
-		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
-	}
-
-	return tot_bytes;
-}
-
 static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
@@ -559,16 +517,13 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
-	loff_t *ppos = &iocb->ki_pos;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = buf,
-		.iov_len = count,
-	};
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -580,7 +535,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
 	retval = 0;
 	if (!count)
@@ -590,9 +545,18 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	page_count = nfs_get_user_pages(READ, (unsigned long) buf,
+						count, &pages);
+	if (page_count < 0) {
+		nfs_free_user_pages(pages, 0, 0);
+		retval = page_count;
+		goto out;
+	}
+
+	retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos,
+						pages, page_count);
 	if (retval > 0)
-		*ppos = pos + retval;
+		iocb->ki_pos = pos + retval;
 
 out:
 	return retval;
-- 
cgit v1.2.3


From 99514f8fdda2beef1ca922b7f9d89c1a2c57fec0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:30 -0500
Subject: NFS: make iocb available everywhere in direct read path

Pass the iocb argument all the way down to the direct read request
scheduler, and make it available in nfs_direct_read_result.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6ecde9602f9..6cbddc51acb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -68,6 +68,8 @@ static kmem_cache_t *nfs_direct_cachep;
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 	struct list_head	list;		/* nfs_read_data structs */
+	struct file *		filp;		/* file descriptor */
+	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of I/O */
 	struct page **		pages;		/* pages in our buffer */
@@ -240,8 +242,12 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
 {
+	struct file *file = dreq->filp;
+	struct inode *inode = file->f_mapping->host;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t rsize = NFS_SERVER(inode)->rsize;
@@ -321,10 +327,11 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	return (ssize_t) result;
 }
 
-static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -335,11 +342,11 @@ static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	dreq->inode = inode;
+	dreq->filp = iocb->ki_filp;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
-				 file_offset);
+	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
 	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -520,10 +527,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	int page_count;
 	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -553,7 +557,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 		goto out;
 	}
 
-	retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos,
+	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
 						pages, page_count);
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
-- 
cgit v1.2.3


From 487b83723ed4d4eaafd5109f36560da4f15c6578 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:30 -0500
Subject: NFS: support EIOCBQUEUED return in direct read path

For async iocb's, the NFS direct read path should return EIOCBQUEUED and
call aio_complete when all the requested reads are finished.  The
synchronous part of the NFS direct read path behaves exactly as it was
before.

Test plan:
aio-stress with "-O".  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6cbddc51acb..094456c3df9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -177,6 +177,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	kref_init(&dreq->kref);
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
+	dreq->iocb = NULL;
 	atomic_set(&dreq->count, 0);
 	atomic_set(&dreq->error, 0);
 
@@ -213,6 +214,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
  * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
  */
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
@@ -228,7 +233,13 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 
 	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
 		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		wake_up(&dreq->wait);
+		if (dreq->iocb) {
+			long res = atomic_read(&dreq->error);
+			if (!res)
+				res = atomic_read(&dreq->count);
+			aio_complete(dreq->iocb, res, 0);
+		} else
+			wake_up(&dreq->wait);
 		kref_put(&dreq->kref, nfs_direct_req_release);
 	}
 }
@@ -309,8 +320,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
  */
 static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 {
-	int result = 0;
+	int result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+ 	if (dreq->iocb)
+		goto out;
 
+	result = 0;
 	if (intr) {
 		result = wait_event_interruptible(dreq->wait,
 					(atomic_read(&dreq->complete) == 0));
@@ -323,6 +339,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
 	if (!result)
 		result = atomic_read(&dreq->count);
 
+out:
 	kref_put(&dreq->kref, nfs_direct_req_release);
 	return (ssize_t) result;
 }
@@ -343,6 +360,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	dreq->npages = nr_pages;
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
@@ -534,8 +553,6 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	if (!is_sync_kiocb(iocb))
-		goto out;
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-- 
cgit v1.2.3


From bc0fb201b34b12e2d16e8cbd5bb078c1db936304 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for waiting for direct I/O to complete

We're about to add asynchrony to the NFS direct write path.  Begin by
abstracting out the common pieces in the read path.

The first piece is nfs_direct_read_wait, which works the same whether the
process is waiting for a read or a write.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 57 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 094456c3df9..2593f47eaff 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -158,6 +158,30 @@ static void nfs_direct_req_release(struct kref *kref)
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+	int result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+	if (dreq->iocb)
+		goto out;
+
+	result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+out:
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
 /*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
@@ -213,7 +237,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 /*
  * We must hold a reference to all the pages in this direct read request
  * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  *
  * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
  * can't trust the iocb is still valid here if this is a synchronous
@@ -315,35 +339,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-/*
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = -EIOCBQUEUED;
-
-	/* Async requests don't wait here */
- 	if (dreq->iocb)
-		goto out;
-
-	result = 0;
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-out:
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
 static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
@@ -366,7 +361,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
 	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
-	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
-- 
cgit v1.2.3


From 93619e5989173614bef0013b0bb8a3fe3dbd5a95 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for allocating nfs_direct_req

Factor out a small common piece of the path that allocate nfs_direct_req
structures.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2593f47eaff..489f736d0f5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -152,6 +152,24 @@ static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 	kfree(pages);
 }
 
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
+{
+	struct nfs_direct_req *dreq;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	dreq->iocb = NULL;
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	return dreq;
+}
+
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
@@ -194,17 +212,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return NULL;
 
-	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
-	INIT_LIST_HEAD(&dreq->list);
-	dreq->iocb = NULL;
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
-
 	list = &dreq->list;
 	for(;;) {
 		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
-- 
cgit v1.2.3


From 63ab46abc70b01cb0711301f5ddb08c1c0bb9b1c Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:31 -0500
Subject: NFS: create common routine for handling direct I/O completion

Factor out the common piece of completing an NFS direct I/O request.

Test plan:
Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489f736d0f5..4df21ce28e1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -200,6 +200,30 @@ out:
 	return (ssize_t) result;
 }
 
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
+ *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+	if (dreq->iocb) {
+		long res = atomic_read(&dreq->error);
+		if (!res)
+			res = atomic_read(&dreq->count);
+		aio_complete(dreq->iocb, res, 0);
+	} else
+		wake_up(&dreq->wait);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
 /*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
@@ -245,15 +269,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 	return dreq;
 }
 
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- *
- * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
- * can't trust the iocb is still valid here if this is a synchronous
- * request.  If the waiter is woken prematurely, the iocb is long gone.
- */
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
@@ -266,17 +281,8 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 	else
 		atomic_set(&dreq->error, task->tk_status);
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		if (dreq->iocb) {
-			long res = atomic_read(&dreq->error);
-			if (!res)
-				res = atomic_read(&dreq->count);
-			aio_complete(dreq->iocb, res, 0);
-		} else
-			wake_up(&dreq->wait);
-		kref_put(&dreq->kref, nfs_direct_req_release);
-	}
+	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+		nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
-- 
cgit v1.2.3


From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:32 -0500
Subject: NFS: make direct write path generate write requests concurrently

Duplicate infrastructure from direct read path that will allow write
path to generate multiple write requests concurrently.  This will
enable us to add support for aio in this path.

Temporarily we will lose the ability to do UNSTABLE writes followed by
a COMMIT in the direct write path.  However, all applications I am
aware of that use NFS O_DIRECT currently write in relatively small
chunks, so this should not be inconvenient in any way.

Test plan:
Millions of fsx-odirect ops. OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 240 +++++++++++++++++++++++++++++++++++++-------------------
 fs/nfs/write.c  |   3 +-
 2 files changed, 160 insertions(+), 83 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4df21ce28e1..dea3239cdde 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -384,106 +384,185 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	return result;
 }
 
-static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
 {
-	const unsigned int wsize = NFS_SERVER(inode)->wsize;
-	size_t request;
-	int curpage, need_commit;
-	ssize_t result, tot_bytes;
-	struct nfs_writeverf first_verf;
-	struct nfs_write_data *wdata;
-
-	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
-	if (!wdata)
-		return -ENOMEM;
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int writes = 0;
+	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	wdata->inode = inode;
-	wdata->cred = ctx->cred;
-	wdata->args.fh = NFS_FH(inode);
-	wdata->args.context = ctx;
-	wdata->args.stable = NFS_UNSTABLE;
-	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata->args.stable = NFS_FILE_SYNC;
-	wdata->res.fattr = &wdata->fattr;
-	wdata->res.verf = &wdata->verf;
+	dreq = nfs_direct_req_alloc();
+	if (!dreq)
+		return NULL;
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_write_data *data = nfs_writedata_alloc(wpages);
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_write_data, pages);
+				list_del(&data->pages);
+				nfs_writedata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		writes++;
+		if (nbytes <= wsize)
+			break;
+		nbytes -= wsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, writes);
+	return dreq;
+}
+
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;
+
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = task->tk_status;
+
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+	/* If the server fell back to an UNSTABLE write, it's an error. */
+	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
+		status = -EIO;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+		nfs_direct_complete(dreq);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+	.rpc_call_done = nfs_direct_write_result,
+	.rpc_release = nfs_writedata_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ *
+ * XXX: For now, support only FILE_SYNC writes.  Later we may add
+ *      support for UNSTABLE + COMMIT.
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+{
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	unsigned int curpage, pgbase;
 
-	nfs_begin_data_update(inode);
-retry:
-	need_commit = 0;
-	tot_bytes = 0;
 	curpage = 0;
-	request = count;
-	wdata->args.pgbase = user_addr & ~PAGE_MASK;
-	wdata->args.offset = file_offset;
+	pgbase = user_addr & ~PAGE_MASK;
 	do {
-		wdata->args.count = request;
-		if (wdata->args.count > wsize)
-			wdata->args.count = wsize;
-		wdata->args.pages = &pages[curpage];
+		struct nfs_write_data *data;
+		size_t bytes;
+
+		bytes = wsize;
+		if (count < wsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_write_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.count = bytes;
+
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_write_direct_ops, data);
+		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 
-		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata->args.count, (long long) wdata->args.offset,
-			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+		data->task.tk_priority = RPC_PRIORITY_NORMAL;
+		data->task.tk_cookie = (unsigned long) inode;
 
 		lock_kernel();
-		result = NFS_PROTO(inode)->write(wdata);
+		rpc_execute(&data->task);
 		unlock_kernel();
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			goto out;
-		}
+		dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
 
-		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata->verf.verifier,
-						sizeof(first_verf.verifier));
-		if (wdata->verf.committed != NFS_FILE_SYNC) {
-			need_commit = 1;
-			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
-					sizeof(first_verf.verifier)))
-				goto sync_retry;
-		}
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
 
-		tot_bytes += result;
+		count -= bytes;
+	} while (count != 0);
+}
 
-		/* in case of a short write: stop now, let the app recover */
-		if (result < wdata->args.count)
-			break;
+static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
 
-		wdata->args.offset += result;
-		wdata->args.pgbase += result;
-		curpage += wdata->args.pgbase >> PAGE_SHIFT;
-		wdata->args.pgbase &= ~PAGE_MASK;
-		request -= result;
-	} while (request != 0);
+	dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize);
+	if (!dreq)
+		return -ENOMEM;
 
-	/*
-	 * Commit data written so far, even in the event of an error
-	 */
-	if (need_commit) {
-		wdata->args.count = tot_bytes;
-		wdata->args.offset = file_offset;
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->commit(wdata);
-		unlock_kernel();
+	nfs_begin_data_update(inode);
 
-		if (result < 0 || memcmp(&first_verf.verifier,
-					 &wdata->verf.verifier,
-					 sizeof(first_verf.verifier)) != 0)
-			goto sync_retry;
-	}
-	result = tot_bytes;
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count,
+				  file_offset);
+	result = nfs_direct_write_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
 
-out:
 	nfs_end_data_update(inode);
-	nfs_writedata_free(wdata);
-	return result;
 
-sync_retry:
-	wdata->args.stable = NFS_FILE_SYNC;
-	goto retry;
+	return result;
 }
 
 /*
@@ -515,7 +594,6 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
 		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
-		nfs_free_user_pages(pages, page_count, 0);
 
 		if (result <= 0) {
 			if (tot_bytes > 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5912274ff1a..875f5b06053 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -77,7 +77,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1183,7 +1182,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
 /*
  * This function is called when the WRITE call is complete.
  */
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
-- 
cgit v1.2.3


From 47989d7454398827500d0e73766270986a3b488f Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:32 -0500
Subject: NFS: remove support for multi-segment iovs in the direct write path

Eliminate the persistent use of automatic storage in all parts of the
NFS client's direct write path to pave the way for introducing support
for aio against files opened with the O_DIRECT flag.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 68 +++++++++++++++------------------------------------------
 1 file changed, 17 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index dea3239cdde..9a7d4590705 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -510,6 +510,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.count = bytes;
+		data->res.verf = &data->verf;
 
 		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 				&nfs_write_direct_ops, data);
@@ -538,7 +539,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -552,6 +553,8 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
@@ -565,50 +568,6 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context
 	return result;
 }
 
-/*
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
-
-		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
-		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
-	}
-	return tot_bytes;
-}
-
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
@@ -701,14 +660,13 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = (char __user *)buf,
-	};
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -729,17 +687,25 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	retval = 0;
 	if (!count)
 		goto out;
-	iov.iov_len = count,
 
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_READ, buf, count))
 		goto out;
 
 	retval = nfs_sync_mapping(mapping);
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	page_count = nfs_get_user_pages(WRITE, (unsigned long) buf,
+						count, &pages);
+	if (page_count < 0) {
+		nfs_free_user_pages(pages, 0, 0);
+		retval = page_count;
+		goto out;
+	}
+
+	retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count,
+					pos, pages, page_count);
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
 	if (retval > 0)
-- 
cgit v1.2.3


From c89f2ee5f9223b864725f7344f24a037dfa76568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:33 -0500
Subject: NFS: make iocb available everywhere in direct write path

Pass the iocb argument all the way down to the direct write request
scheduler, and make it available in nfs_direct_write_result.

Test plan:
Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
Millions of fsx-odirect ops.  OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 46 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9a7d4590705..9d57a299824 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -424,29 +424,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 	return dreq;
 }
 
-/*
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = 0;
-
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -480,8 +457,12 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * XXX: For now, support only FILE_SYNC writes.  Later we may add
  *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
 {
+	struct file *file = dreq->filp;
+	struct inode *inode = file->f_mapping->host;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t wsize = NFS_SERVER(inode)->wsize;
@@ -539,10 +520,11 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -552,15 +534,18 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->filp = iocb->ki_filp;
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
 
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count,
-				  file_offset);
-	result = nfs_direct_write_wait(dreq, clnt->cl_intr);
+	nfs_direct_write_schedule(dreq, user_addr, count, file_offset);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	nfs_end_data_update(inode);
@@ -663,10 +648,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	int page_count;
 	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
@@ -704,7 +686,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 		goto out;
 	}
 
-	retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count,
+	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
-- 
cgit v1.2.3


From 9eafa8cc521b489f205bf7b0634c99e34e046606 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:33 -0500
Subject: NFS: support EIOCBQUEUED return in direct write path

For async iocb's, the NFS direct write path now returns EIOCBQUEUED,
and calls aio_complete when all the requested writes are finished.  The
synchronous part of the NFS direct write path behaves exactly as it
was before.

Shared mapped NFS files will have some coherency difficulties when
accessed concurrently with aio+dio.  Will need to explore how this
is handled in the local file system case.

Test plan:
aio-stress with "-O". OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9d57a299824..df86e526702 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -441,8 +441,10 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 	else
 		atomic_set(&dreq->error, status);
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete)))
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_end_data_update(data->inode);
 		nfs_direct_complete(dreq);
+	}
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -548,8 +550,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
-	nfs_end_data_update(inode);
-
 	return result;
 }
 
@@ -655,10 +655,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	retval = -EINVAL;
-	if (!is_sync_kiocb(iocb))
-		goto out;
-
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
 		goto out;
@@ -688,8 +684,18 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
+
+	/*
+	 * XXX: nfs_end_data_update() already ensures this file's
+	 *      cached data is subsequently invalidated.  Do we really
+	 *      need to call invalidate_inode_pages2() again here?
+	 *
+	 *      For aio writes, this invalidation will almost certainly
+	 *      occur before the writes complete.  Kind of racey.
+	 */
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
+
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
-- 
cgit v1.2.3


From 88467055f7654302c12df74e5fe4d12516656a39 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:34 -0500
Subject: NFS: clean up comments and tab damage in direct.c

Clean up tab damage and comments.  Replace "file_offset" with more commonly
used "pos".

Test plan:
Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 54 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index df86e526702..bcbc213b403 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
  *
  * There are important applications whose performance or correctness
  * depends on uncached access to file data.  Database clusters
- * (multiple copies of the same instance running on separate hosts) 
+ * (multiple copies of the same instance running on separate hosts)
  * implement their own cache coherency protocol that subsumes file
- * system cache protocols.  Applications that process datasets 
- * considerably larger than the client's memory do not always benefit 
- * from a local cache.  A streaming video server, for instance, has no 
+ * system cache protocols.  Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache.  A streaming video server, for instance, has no
  * need to cache the contents of a file.
  *
  * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
  * 08 Jun 2003	Port to 2.5 APIs  --cel
  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
  * 15 Sep 2004	Parallel async reads  --cel
+ * 04 May 2005	support O_DIRECT with aio  --cel
  *
  */
 
@@ -67,11 +68,11 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
-	struct list_head	list;		/* nfs_read_data structs */
+	struct list_head	list;		/* nfs_read/write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
-	struct inode *		inode;		/* target file of I/O */
+	struct inode *		inode;		/* target file of i/o */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 	atomic_t		complete,	/* i/os we're waiting for */
@@ -110,7 +111,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 	size_t array_size;
 
 	/* set an arbitrary limit to prevent type overflow */
-	/* XXX: this can probably be as large as INT_MAX */
 	if (size > MAX_DIRECTIO_SIZE) {
 		*pages = NULL;
 		return -EFBIG;
@@ -294,7 +294,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -322,7 +322,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -347,7 +347,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
@@ -356,7 +356,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -377,7 +377,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, user_addr, count, file_offset);
+	nfs_direct_read_schedule(dreq, user_addr, count, pos);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -459,7 +459,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * XXX: For now, support only FILE_SYNC writes.  Later we may add
  *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -487,7 +487,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -513,7 +513,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
@@ -522,7 +522,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 	} while (count != 0);
 }
 
-static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
@@ -546,7 +546,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, user_addr, count, file_offset);
+	nfs_direct_write_schedule(dreq, user_addr, count, pos);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
@@ -557,18 +557,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change.  So our preference is simply
+ * READ where the file size could change.  Our preference is simply
  * to do all reads the application wants, and the server will take
  * care of managing the end of file boundary.
- * 
+ *
  * This function also eliminates unnecessarily updating the file's
  * atime locally, as the NFS server sets the file's atime, and this
  * client must read the updated atime from the server back into its
@@ -621,8 +621,8 @@ out:
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -703,6 +703,10 @@ out:
 	return retval;
 }
 
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
 int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -715,6 +719,10 @@ int nfs_init_directcache(void)
 	return 0;
 }
 
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
 void nfs_destroy_directcache(void)
 {
 	if (kmem_cache_destroy(nfs_direct_cachep))
-- 
cgit v1.2.3


From 15ce4a0c1ce0d5e288398cb9e5493fd4e55e2025 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:34 -0500
Subject: NFS: Replace atomic_t variables in nfs_direct_req with a single spin
 lock

Three atomic_t variables cause a lot of bus locking.  Because they are all
used in the same places in the code, just use a single spin lock.

Now that the atomic_t variables are gone, we can remove the request size
limitation since the code no longer depends on the limited width of atomic_t
on some platforms.

Test plan:
Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions of fsx
operations, iozone, OraSim.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 81 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bcbc213b403..3de7c4b0796 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -58,7 +58,6 @@
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
 static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
@@ -68,6 +67,8 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
+
+	/* I/O parameters */
 	struct list_head	list;		/* nfs_read/write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
@@ -75,12 +76,14 @@ struct nfs_direct_req {
 	struct inode *		inode;		/* target file of i/o */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
-	atomic_t		complete,	/* i/os we're waiting for */
-				count,		/* bytes actually processed */
+
+	/* completion state */
+	spinlock_t		lock;		/* protect completion state */
+	int			outstanding;	/* i/os we're waiting for */
+	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
 };
 
-
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -110,12 +113,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 	unsigned long page_count;
 	size_t array_size;
 
-	/* set an arbitrary limit to prevent type overflow */
-	if (size > MAX_DIRECTIO_SIZE) {
-		*pages = NULL;
-		return -EFBIG;
-	}
-
 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	page_count -= user_addr >> PAGE_SHIFT;
 
@@ -164,8 +161,10 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
 	dreq->iocb = NULL;
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
+	spin_lock_init(&dreq->lock);
+	dreq->outstanding = 0;
+	dreq->count = 0;
+	dreq->error = 0;
 
 	return dreq;
 }
@@ -181,19 +180,18 @@ static void nfs_direct_req_release(struct kref *kref)
  */
 static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 {
-	int result = -EIOCBQUEUED;
+	ssize_t result = -EIOCBQUEUED;
 
 	/* Async requests don't wait here */
 	if (dreq->iocb)
 		goto out;
 
-	result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
+	result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0));
 
 	if (!result)
-		result = atomic_read(&dreq->error);
+		result = dreq->error;
 	if (!result)
-		result = atomic_read(&dreq->count);
+		result = dreq->count;
 
 out:
 	kref_put(&dreq->kref, nfs_direct_req_release);
@@ -214,9 +212,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
 
 	if (dreq->iocb) {
-		long res = atomic_read(&dreq->error);
+		long res = (long) dreq->error;
 		if (!res)
-			res = atomic_read(&dreq->count);
+			res = (long) dreq->count;
 		aio_complete(dreq->iocb, res, 0);
 	} else
 		wake_up(&dreq->wait);
@@ -233,7 +231,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = nfs_direct_req_alloc();
@@ -259,13 +256,12 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		reads++;
+		dreq->outstanding++;
 		if (nbytes <= rsize)
 			break;
 		nbytes -= rsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, reads);
 	return dreq;
 }
 
@@ -276,13 +272,21 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+
+	spin_lock(&dreq->lock);
+
 	if (likely(task->tk_status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, task->tk_status);
+		dreq->error = task->tk_status;
+
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
+	}
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete)))
-		nfs_direct_complete(dreq);
+	spin_unlock(&dreq->lock);
+	nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -388,7 +392,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int writes = 0;
 	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = nfs_direct_req_alloc();
@@ -414,16 +417,19 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		writes++;
+		dreq->outstanding++;
 		if (nbytes <= wsize)
 			break;
 		nbytes -= wsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, writes);
 	return dreq;
 }
 
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -436,15 +442,22 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
 		status = -EIO;
 
+	spin_lock(&dreq->lock);
+
 	if (likely(status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, status);
+		dreq->error = status;
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_end_data_update(data->inode);
-		nfs_direct_complete(dreq);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
 	}
+
+	spin_unlock(&dreq->lock);
+
+	nfs_end_data_update(data->inode);
+	nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
-- 
cgit v1.2.3


From a37ec012d7fd352648c8455d3396ea24001efcd3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:35 -0500
Subject: NFS: fix data_update accounting in NFS direct I/O path

^C against "iozone -I" is hitting the assertion in nfs_clear_inode().

Test plan:
"iozone -i0 -I -a -c" against a slow server, then control C.  This should
not cause an oops.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3de7c4b0796..737990dd4df 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -219,6 +219,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	} else
 		wake_up(&dreq->wait);
 
+	iput(dreq->inode);
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
 
@@ -374,6 +375,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	igrab(inode);
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
 	if (!is_sync_kiocb(iocb))
@@ -549,6 +551,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	igrab(inode);
 	dreq->inode = inode;
 	dreq->filp = iocb->ki_filp;
 	if (!is_sync_kiocb(iocb))
-- 
cgit v1.2.3


From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:35 -0500
Subject: NFS: Make nfs_commit_alloc() extern

We need to use nfs_commit_alloc() in fs/nfs/direct.c.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 875f5b06053..f7c8be0becc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -91,7 +91,7 @@ static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
@@ -112,7 +112,7 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 	return p;
 }
 
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_write_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
-- 
cgit v1.2.3


From fad61490419b3e494f300e9b2579810ef3bcda31 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:36 -0500
Subject: nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes

Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not
necessary.  This simplifies the internal logic, but this could be a
difficult workload for some servers.

Instead, let's send UNSTABLE writes, and after they all complete, send a
COMMIT for the dirty range.  After the COMMIT returns successfully, then do
the wake_up or fire off aio_complete().

Test plan:
Async direct I/O tests against Solaris (or any server that requires
committed unstable writes).  Reboot server during test.

Based on an earlier patch by Chuck Lever <cel@netapp.com>

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 199 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 737990dd4df..f0f2053c7a6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,11 +69,15 @@ struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 
 	/* I/O parameters */
-	struct list_head	list;		/* nfs_read/write_data structs */
+	struct list_head	list,		/* nfs_read/write_data structs */
+				rewrite_list;	/* saved nfs_write_data structs */
 	struct file *		filp;		/* file descriptor */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
+	unsigned long		user_addr;	/* location of user's buffer */
+	size_t			user_count;	/* total bytes to move */
+	loff_t			pos;		/* starting offset in file */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 
@@ -82,8 +86,18 @@ struct nfs_direct_req {
 	int			outstanding;	/* i/os we're waiting for */
 	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+
+	/* commit state */
+	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	int			flags;
+#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
+
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -160,11 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	kref_init(&dreq->kref);
 	init_waitqueue_head(&dreq->wait);
 	INIT_LIST_HEAD(&dreq->list);
+	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
 	spin_lock_init(&dreq->lock);
 	dreq->outstanding = 0;
 	dreq->count = 0;
 	dreq->error = 0;
+	dreq->flags = 0;
 
 	return dreq;
 }
@@ -299,7 +315,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -307,11 +323,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
 							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
 	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
 		size_t bytes;
@@ -373,6 +391,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	if (!dreq)
 		return -ENOMEM;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	igrab(inode);
@@ -383,13 +404,137 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, user_addr, count, pos);
+	nfs_direct_read_schedule(dreq);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
 }
 
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
+{
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	while (!list_empty(&dreq->list)) {
+		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+		list_del(&data->pages);
+		nfs_writedata_release(data);
+	}
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+	struct list_head *pos;
+
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	list_for_each(pos, &dreq->list)
+		dreq->outstanding++;
+	dreq->count = 0;
+
+	nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+	if (unlikely(task->tk_status < 0)) {
+		dreq->error = task->tk_status;
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+	if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+
+	dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+	nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+	.rpc_call_done = nfs_direct_commit_result,
+	.rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+{
+	struct file *file = dreq->filp;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)
+							file->private_data;
+	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task *task = &data->task;
+
+	data->inode = dreq->inode;
+	data->cred = ctx->cred;
+
+	data->args.fh = NFS_FH(data->inode);
+	data->args.offset = dreq->pos;
+	data->args.count = dreq->user_count;
+	data->res.count = 0;
+	data->res.fattr = &data->fattr;
+	data->res.verf = &data->verf;
+
+	rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+				&nfs_commit_direct_ops, data);
+	NFS_PROTO(data->inode)->commit_setup(data, 0);
+
+	data->task.tk_priority = RPC_PRIORITY_NORMAL;
+	data->task.tk_cookie = (unsigned long)data->inode;
+	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+	dreq->commit_data = NULL;
+
+	dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
+
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+}
+
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	int flags = dreq->flags;
+
+	dreq->flags = 0;
+	switch (flags) {
+		case NFS_ODIRECT_DO_COMMIT:
+			nfs_direct_commit_schedule(dreq);
+			break;
+		case NFS_ODIRECT_RESCHED_WRITES:
+			nfs_direct_write_reschedule(dreq);
+			break;
+		default:
+			nfs_end_data_update(inode);
+			if (dreq->commit_data != NULL)
+				nfs_commit_free(dreq->commit_data);
+			nfs_direct_free_writedata(dreq);
+			nfs_direct_complete(dreq);
+	}
+}
+
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = nfs_commit_alloc(0);
+	if (dreq->commit_data != NULL)
+		dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = NULL;
+}
+
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	nfs_end_data_update(inode);
+	nfs_direct_free_writedata(dreq);
+	nfs_direct_complete(dreq);
+}
+#endif
+
 static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
 {
 	struct list_head *list;
@@ -424,14 +569,13 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
 			break;
 		nbytes -= wsize;
 	}
+
+	nfs_alloc_commit_data(dreq);
+
 	kref_get(&dreq->kref);
 	return dreq;
 }
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -440,41 +584,62 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 
 	if (nfs_writeback_done(task, data) != 0)
 		return;
-	/* If the server fell back to an UNSTABLE write, it's an error. */
-	if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
-		status = -EIO;
 
 	spin_lock(&dreq->lock);
 
 	if (likely(status >= 0))
 		dreq->count += data->res.count;
 	else
-		dreq->error = status;
+		dreq->error = task->tk_status;
 
+	if (data->res.verf->committed != NFS_FILE_SYNC) {
+		switch (dreq->flags) {
+			case 0:
+				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+				dreq->flags = NFS_ODIRECT_DO_COMMIT;
+				break;
+			case NFS_ODIRECT_DO_COMMIT:
+				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+					dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				}
+		}
+	}
+	/* In case we have to resend */
+	data->args.stable = NFS_FILE_SYNC;
+
+	spin_unlock(&dreq->lock);
+}
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+static void nfs_direct_write_release(void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	spin_lock(&dreq->lock);
 	if (--dreq->outstanding) {
 		spin_unlock(&dreq->lock);
 		return;
 	}
-
 	spin_unlock(&dreq->lock);
 
-	nfs_end_data_update(data->inode);
-	nfs_direct_complete(dreq);
+	nfs_direct_write_complete(dreq, data->inode);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
 	.rpc_call_done = nfs_direct_write_result,
-	.rpc_release = nfs_writedata_release,
+	.rpc_release = nfs_direct_write_release,
 };
 
 /*
  * For each nfs_write_data struct that was allocated on the list, dispatch
  * an NFS WRITE operation
- *
- * XXX: For now, support only FILE_SYNC writes.  Later we may add
- *      support for UNSTABLE + COMMIT.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 {
 	struct file *file = dreq->filp;
 	struct inode *inode = file->f_mapping->host;
@@ -482,11 +647,13 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 							file->private_data;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int curpage, pgbase;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_write_data *data;
 		size_t bytes;
@@ -496,7 +663,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 			bytes = count;
 
 		data = list_entry(list->next, struct nfs_write_data, pages);
-		list_del_init(&data->pages);
+		list_move_tail(&data->pages, &dreq->rewrite_list);
 
 		data->inode = inode;
 		data->cred = ctx->cred;
@@ -512,7 +679,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
 
 		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 				&nfs_write_direct_ops, data);
-		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
+		NFS_PROTO(inode)->write_setup(data, sync);
 
 		data->task.tk_priority = RPC_PRIORITY_NORMAL;
 		data->task.tk_cookie = (unsigned long) inode;
@@ -544,11 +711,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	int sync = 0;
 
-	dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize);
+	dreq = nfs_direct_write_alloc(count, wsize);
 	if (!dreq)
 		return -ENOMEM;
+	if (dreq->commit_data == NULL || count < wsize)
+		sync = FLUSH_STABLE;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
 	igrab(inode);
@@ -562,7 +736,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, user_addr, count, pos);
+	nfs_direct_write_schedule(dreq, sync);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
-- 
cgit v1.2.3


From a8881f5a5c723f82da84b786d3ca83a0df9e0c33 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:36 -0500
Subject: NFS: O_DIRECT async IO may lose context

The struct nfs_direct_req currently keeps a pointer to the file descriptor
without referencing it. This may cause problems if the parent process is
killed.

The nfs_open_context should normally have all the information that we're
currently using the filp for, and unlike fput(), is safe to release from
an rpciod process context.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f0f2053c7a6..e6585b9ff45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -71,7 +71,7 @@ struct nfs_direct_req {
 	/* I/O parameters */
 	struct list_head	list,		/* nfs_read/write_data structs */
 				rewrite_list;	/* saved nfs_write_data structs */
-	struct file *		filp;		/* file descriptor */
+	struct nfs_open_context	*ctx;		/* file open context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
@@ -176,6 +176,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	INIT_LIST_HEAD(&dreq->list);
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
+	dreq->ctx = NULL;
 	spin_lock_init(&dreq->lock);
 	dreq->outstanding = 0;
 	dreq->count = 0;
@@ -188,6 +189,9 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+	if (dreq->ctx != NULL)
+		put_nfs_open_context(dreq->ctx);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
@@ -235,7 +239,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	} else
 		wake_up(&dreq->wait);
 
-	iput(dreq->inode);
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
 
@@ -317,10 +320,8 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
  */
 static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
-	struct file *file = dreq->filp;
-	struct inode *inode = file->f_mapping->host;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t count = dreq->user_count;
@@ -396,9 +397,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
-	igrab(inode);
 	dreq->inode = inode;
-	dreq->filp = iocb->ki_filp;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
@@ -462,14 +462,11 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	struct file *file = dreq->filp;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
 	struct nfs_write_data *data = dreq->commit_data;
 	struct rpc_task *task = &data->task;
 
 	data->inode = dreq->inode;
-	data->cred = ctx->cred;
+	data->cred = dreq->ctx->cred;
 
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = dreq->pos;
@@ -641,10 +638,8 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  */
 static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 {
-	struct file *file = dreq->filp;
-	struct inode *inode = file->f_mapping->host;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)
-							file->private_data;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
 	size_t count = dreq->user_count;
@@ -725,9 +720,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
-	igrab(inode);
 	dreq->inode = inode;
-	dreq->filp = iocb->ki_filp;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-- 
cgit v1.2.3


From 5db3a7b2cabe8f0957683f798c4f8fa8605f9ebb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:37 -0500
Subject: NFS: Debugging code for nfs_direct_(read|write)_schedule()

Make sure that we're doing our list accounting correctly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6585b9ff45..1e3725e51df 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -339,6 +339,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 		if (count < rsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_read_data, pages);
 		list_del_init(&data->pages);
 
@@ -378,6 +379,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
 static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
@@ -657,6 +659,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 		if (count < wsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_write_data, pages);
 		list_move_tail(&data->pages, &dreq->rewrite_list);
 
@@ -697,6 +700,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
 static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
-- 
cgit v1.2.3


From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:37 -0500
Subject: NFS: Uninline nfs_writedata_(alloc|free) and
 nfs_readdata_(alloc|free)

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c  | 32 +++++++++++++++++++++++++++++++-
 fs/nfs/write.c | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2da255f0247..3961524fd4a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -40,10 +40,40 @@ static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_rdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f7c8be0becc..647e3217c2e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -86,7 +86,7 @@ static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
@@ -119,6 +119,36 @@ void nfs_commit_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_commit_mempool);
 }
 
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_wdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_writedata_free(struct nfs_write_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_wdata_mempool);
+}
+
 void nfs_writedata_release(void *wdata)
 {
 	nfs_writedata_free(wdata);
-- 
cgit v1.2.3


From 0996905f9301c2ff4c021982c42a15b35e74bf1c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:38 -0500
Subject: lockd: posix_test_lock() should not call locks_copy_lock()

The caller of posix_test_lock() should never need to look at the lock
private data, so do not copy that information. This also means that there
is no need to call the fl_release_private methods.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 231b23c12c3..c83b5dbe0ed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -239,17 +239,25 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 {
-	locks_release_private(new);
-
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
-	new->fl_file = fl->fl_file;
+	new->fl_file = NULL;
 	new->fl_flags = fl->fl_flags;
 	new->fl_type = fl->fl_type;
 	new->fl_start = fl->fl_start;
 	new->fl_end = fl->fl_end;
+	new->fl_ops = NULL;
+	new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	locks_release_private(new);
+
+	__locks_copy_lock(new, fl);
+	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
 
@@ -686,7 +694,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl,
 			break;
 	}
 	if (cfl) {
-		locks_copy_lock(conflock, cfl);
+		__locks_copy_lock(conflock, cfl);
 		unlock_kernel();
 		return 1;
 	}
-- 
cgit v1.2.3


From 09c7938c5640a6f22bef074ca6b803dccfdb93e3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:38 -0500
Subject: lockd: Fix server-side lock blocking code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 73 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d50946dcddd..1d3a74df93f 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -193,6 +193,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
+	block->b_call.a_args.lock.fl.fl_flags |= FL_SLEEP;
 	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
 	block->b_call.a_args.cookie = *cookie;	/* see above */
 
@@ -228,19 +229,18 @@ failed:
  * can be closed hereafter.
  */
 static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+nlmsvc_delete_block(struct nlm_block *block)
 {
 	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
 	struct nlm_file		*file = block->b_file;
 	struct nlm_block	**bp;
-	int status = 0;
+	int status;
 
 	dprintk("lockd: deleting block %p...\n", block);
 
 	/* Remove block from list */
 	nlmsvc_remove_block(block);
-	if (unlock)
-		status = posix_unblock_lock(file->f_file, fl);
+	status = posix_unblock_lock(file->f_file, fl);
 
 	/* If the block is in the middle of a GRANT callback,
 	 * don't kill it yet. */
@@ -282,7 +282,7 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 			block->b_host->h_inuse = 1;
 		else if (action == NLM_ACT_UNLOCK) {
 			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block, 1);
+				nlmsvc_delete_block(block);
 		}
 	}
 	up(&file->f_sema);
@@ -297,7 +297,7 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct nlm_block	*block;
+	struct nlm_block	*block, *newblock = NULL;
 	int			error;
 	u32			ret;
 
@@ -310,59 +310,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				wait);
 
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
-
+	lock->fl.fl_flags &= ~FL_SLEEP;
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
+	/* Get existing block (in case client is busy-waiting) */
+	block = nlmsvc_lookup_block(file, lock, 0);
+	if (block == NULL) {
+		if (newblock != NULL)
+			lock = &newblock->b_call.a_args.lock.fl;
+	} else
+		lock = &block->b_call.a_args.lock.fl;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
 
 	dprintk("lockd: posix_lock_file returned %d\n", error);
 
-	if (error != -EAGAIN) {
-		if (block)
-			nlmsvc_delete_block(block, 0);
-		up(&file->f_sema);
-
-		switch(-error) {
+	switch(error) {
 		case 0:
 			ret = nlm_granted;
 			goto out;
-		case EDEADLK:
+		case -EAGAIN:
+			break;
+		case -EDEADLK:
 			ret = nlm_deadlock;
 			goto out;
 		default:			/* includes ENOLCK */
 			ret = nlm_lck_denied_nolocks;
 			goto out;
-		}
 	}
 
-	if (!wait) {
-		ret = nlm_lck_denied;
-		goto out_unlock;
-	}
+	ret = nlm_lck_denied;
+	if (!wait)
+		goto out;
+
+	ret = nlm_lck_blocked;
+	if (block != NULL)
+		goto out;
 
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
 	 * to claim it while doing host garbage collection */
-	if (block == NULL) {
+	if (newblock == NULL) {
 		up(&file->f_sema);
 		dprintk("lockd: blocking on this lock (allocating).\n");
-		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+		if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
 			return nlm_lck_denied_nolocks;
 		goto again;
 	}
 
 	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
+	nlmsvc_insert_block(newblock, NLM_NEVER);
+	newblock = NULL;
 
-	ret = nlm_lck_blocked;
-out_unlock:
-	up(&file->f_sema);
 out:
+	up(&file->f_sema);
+	if (newblock != NULL)
+		nlmsvc_delete_block(newblock);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -445,7 +451,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 
 	down(&file->f_sema);
 	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block, 1);
+		status = nlmsvc_delete_block(block);
 	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
@@ -519,7 +525,11 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
+	posix_unblock_lock(file->f_file, &lock->fl);
+	lock->fl.fl_flags |= FL_SLEEP;
 	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
+
 	switch (error) {
 	case 0:
 		break;
@@ -630,11 +640,8 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
+			nlmsvc_delete_block(block);
 			up(&file->f_sema);
-			if (status == NLM_LCK_GRANTED)
-				nlmsvc_delete_block(block, 0);
-			else
-				nlmsvc_delete_block(block, 1);
 		}
 	}
 	nlm_release_file(file);
@@ -661,7 +668,7 @@ nlmsvc_retry_blocked(void)
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
 		if (block->b_done)
-			nlmsvc_delete_block(block, 0);
+			nlmsvc_delete_block(block);
 		else
 			nlmsvc_grant_blocked(block);
 	}
-- 
cgit v1.2.3


From 6849c0cab69f5d1a0fc7b05fa5bfb3dec53f86df Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Add refcounting to struct nlm_block

Otherwise, the block may disappear from underneath us when in
nlmsvc_retry_blocked.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 94 ++++++++++++++++++++++++++----------------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1d3a74df93f..20caeceffb1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,6 +39,7 @@
 #define nlm_deadlock	nlm_lck_denied
 #endif
 
+static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
@@ -58,6 +59,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
 	struct nlm_block **bp, *b;
 
 	dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+	kref_get(&block->b_count);
 	if (block->b_queued)
 		nlmsvc_remove_block(block);
 	bp = &nlm_blocked;
@@ -90,6 +92,7 @@ nlmsvc_remove_block(struct nlm_block *block)
 		if (b == block) {
 			*bp = block->b_next;
 			block->b_queued = 0;
+			nlmsvc_release_block(block);
 			return 1;
 		}
 	}
@@ -123,6 +126,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				*head = block->b_next;
 				block->b_queued = 0;
 			}
+			kref_get(&block->b_count);
 			return block;
 		}
 	}
@@ -155,6 +159,8 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
 			break;
 	}
 
+	if (block != NULL)
+		kref_get(&block->b_count);
 	return block;
 }
 
@@ -188,6 +194,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	memset(block, 0, sizeof(*block));
 	locks_init_lock(&block->b_call.a_args.lock.fl);
 	locks_init_lock(&block->b_call.a_res.lock.fl);
+	kref_init(&block->b_count);
 
 	if (!nlmclnt_setgrantargs(&block->b_call, lock))
 		goto failed_free;
@@ -228,27 +235,24 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static int
-nlmsvc_delete_block(struct nlm_block *block)
+static int nlmsvc_unlink_block(struct nlm_block *block)
 {
-	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
-	struct nlm_file		*file = block->b_file;
-	struct nlm_block	**bp;
 	int status;
-
-	dprintk("lockd: deleting block %p...\n", block);
+	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call.a_args.lock.fl);
 	nlmsvc_remove_block(block);
-	status = posix_unblock_lock(file->f_file, fl);
+	return status;
+}
 
-	/* If the block is in the middle of a GRANT callback,
-	 * don't kill it yet. */
-	if (block->b_incall) {
-		nlmsvc_insert_block(block, NLM_NEVER);
-		block->b_done = 1;
-		return status;
-	}
+static void nlmsvc_free_block(struct kref *kref)
+{
+	struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+	struct nlm_file		*file = block->b_file;
+	struct nlm_block	**bp;
+
+	dprintk("lockd: freeing block %p...\n", block);
 
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
@@ -262,7 +266,12 @@ nlmsvc_delete_block(struct nlm_block *block)
 		nlm_release_host(block->b_host);
 	nlmclnt_freegrantargs(&block->b_call);
 	kfree(block);
-	return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+	if (block != NULL)
+		kref_put(&block->b_count, nlmsvc_free_block);
 }
 
 /*
@@ -282,7 +291,7 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 			block->b_host->h_inuse = 1;
 		else if (action == NLM_ACT_UNLOCK) {
 			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block);
+				nlmsvc_unlink_block(block);
 		}
 	}
 	up(&file->f_sema);
@@ -318,9 +327,9 @@ again:
 	block = nlmsvc_lookup_block(file, lock, 0);
 	if (block == NULL) {
 		if (newblock != NULL)
-			lock = &newblock->b_call.a_args.lock.fl;
+			lock = &newblock->b_call.a_args.lock;
 	} else
-		lock = &block->b_call.a_args.lock.fl;
+		lock = &block->b_call.a_args.lock;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -363,12 +372,10 @@ again:
 
 	/* Append to list of blocked */
 	nlmsvc_insert_block(newblock, NLM_NEVER);
-	newblock = NULL;
-
 out:
 	up(&file->f_sema);
-	if (newblock != NULL)
-		nlmsvc_delete_block(newblock);
+	nlmsvc_release_block(newblock);
+	nlmsvc_release_block(block);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -450,8 +457,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block);
+	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) {
+		status = nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+	}
 	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
@@ -514,7 +523,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	down(&file->f_sema);
 
 	/* Unlink block request from list */
-	nlmsvc_remove_block(block);
+	nlmsvc_unlink_block(block);
 
 	/* If b_granted is true this means we've been here before.
 	 * Just retry the grant callback, possibly refreshing the RPC
@@ -525,7 +534,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
-	posix_unblock_lock(file->f_file, &lock->fl);
 	lock->fl.fl_flags |= FL_SLEEP;
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -548,16 +556,15 @@ callback:
 	/* Lock was granted by VFS. */
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
-	block->b_incall  = 1;
 
 	/* Schedule next grant callback in 30 seconds */
 	nlmsvc_insert_block(block, 30 * HZ);
 
 	/* Call the client */
-	nlm_get_host(block->b_call.a_host);
+	kref_get(&block->b_count);
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
-		nlm_release_host(block->b_call.a_host);
+		nlmsvc_release_block(block);
 out_unlock:
 	up(&file->f_sema);
 }
@@ -573,20 +580,10 @@ out_unlock:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block;
+	struct nlm_block	*block = container_of(call, struct nlm_block, b_call);
 	unsigned long		timeout;
-	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
-	dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
-		nlmdbg_cookie2a(&call->a_args.cookie),
-		NIPQUAD(peer_addr->sin_addr.s_addr));
-	if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
-		dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
-			nlmdbg_cookie2a(&call->a_args.cookie),
-			NIPQUAD(peer_addr->sin_addr.s_addr));
-		return;
-	}
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -603,9 +600,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	block->b_incall = 0;
-
-	nlm_release_host(call->a_host);
+	nlmsvc_release_block(block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
@@ -631,20 +626,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 
 	file->f_count++;
 	down(&file->f_sema);
-	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
-			up(&file->f_sema);
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
-			nlmsvc_delete_block(block);
-			up(&file->f_sema);
+			nlmsvc_unlink_block(block);
 		}
 	}
+	up(&file->f_sema);
 	nlm_release_file(file);
+	nlmsvc_release_block(block);
 }
 
 /*
@@ -667,10 +661,12 @@ nlmsvc_retry_blocked(void)
 			break;
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
+		kref_get(&block->b_count);
 		if (block->b_done)
-			nlmsvc_delete_block(block);
+			nlmsvc_unlink_block(block);
 		else
 			nlmsvc_grant_blocked(block);
+		nlmsvc_release_block(block);
 	}
 
 	if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
-- 
cgit v1.2.3


From 5e1abf8cb713a0b94f5a400c7b9b797990cd9dec Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Clean up of the server-side GRANTED code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 43 ----------------------------------------
 fs/lockd/svclock.c  | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index acc3eb13a02..80ae3127699 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -147,49 +147,6 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 		fl->fl_ops->fl_release_private(fl);
 }
 
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
-	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
-	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
-	call->a_args.lock.oh.len = lock->oh.len;
-
-	/* set default data area */
-	call->a_args.lock.oh.data = call->a_owner;
-	call->a_args.lock.svid = lock->fl.fl_pid;
-
-	if (lock->oh.len > NLMCLNT_OHSIZE) {
-		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmclnt_freegrantargs(call);
-			return 0;
-		}
-		call->a_args.lock.oh.data = (u8 *) data;
-	}
-
-	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
-	return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
-		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-}
-
 /*
  * This is the main entry point for the NLM client.
  */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 20caeceffb1..3c7dd956d9c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -43,6 +43,8 @@ static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
@@ -196,7 +198,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	locks_init_lock(&block->b_call.a_res.lock.fl);
 	kref_init(&block->b_count);
 
-	if (!nlmclnt_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(&block->b_call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
@@ -264,7 +266,7 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	if (block->b_host)
 		nlm_release_host(block->b_host);
-	nlmclnt_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(&block->b_call);
 	kfree(block);
 }
 
@@ -298,6 +300,49 @@ nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 	return 0;
 }
 
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.oh.len = lock->oh.len;
+
+	/* set default data area */
+	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
+
+	if (lock->oh.len > NLMCLNT_OHSIZE) {
+		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+		if (!data) {
+			nlmsvc_freegrantargs(call);
+			return 0;
+		}
+		call->a_args.lock.oh.data = (u8 *) data;
+	}
+
+	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+	return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+	struct file_lock *fl = &call->a_args.lock.fl;
+	/*
+	 * Check whether we allocated memory for the owner.
+	 */
+	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
+		kfree(call->a_args.lock.oh.data);
+	}
+	if (fl->fl_ops && fl->fl_ops->fl_release_private)
+		fl->fl_ops->fl_release_private(fl);
+	if (fl->fl_lmops && fl->fl_lmops->fl_release_private)
+		fl->fl_lmops->fl_release_private(fl);
+}
+
 /*
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
@@ -600,11 +645,16 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	nlmsvc_release_block(block);
+}
+
+void nlmsvc_grant_release(void *data)
+{
+	nlmsvc_release_block(data);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
 	.rpc_call_done = nlmsvc_grant_callback,
+	.rpc_release = nlmsvc_grant_release,
 };
 
 /*
-- 
cgit v1.2.3


From 686517f1ad1630c11964d668b556aab79b8c942e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:39 -0500
Subject: lockd: Make nlmsvc_create_block() use nlmsvc_lookup_host()

Currently it uses nlmclnt_lookup_host(), which puts the resulting host
structure on a different list.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svc4proc.c | 3 +--
 fs/lockd/svclock.c  | 3 +--
 fs/lockd/svcproc.c  | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06..ac4a700af01 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -483,8 +483,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	if (!(call = nlmclnt_alloc_call()))
 		return rpc_system_err;
 
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (!host) {
 		kfree(call);
 		return rpc_system_err;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3c7dd956d9c..a95d260b713 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -185,8 +185,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	struct nlm_rqst		*call;
 
 	/* Create host handle for callback */
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1f..4986fbe4454 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -508,8 +508,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	if (!(call = nlmclnt_alloc_call()))
 		return rpc_system_err;
 
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (!host) {
 		kfree(call);
 		return rpc_system_err;
-- 
cgit v1.2.3


From 04266473ecf5cdca242201d9f1ed890afe070fb6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:40 -0500
Subject: lockd: Make lockd use rpc_new_client() instead of rpc_create_client

When doing NLM_GRANTED requests, lockd may end up blocking if we use
rpc_create_client() due to the synchronous call to rpc_ping(). Instead, use
rpc_new_client().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8a..100e7822970 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -191,11 +191,12 @@ nlm_bind_host(struct nlm_host *host)
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
 		if (IS_ERR(clnt))
 			goto forgetit;
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
+		clnt->cl_softrtry = 1; /* All queries are soft */
 
 		host->h_rpcclnt = clnt;
 	}
-- 
cgit v1.2.3


From 26bcbf965f857c710adafd16cf424f043006b5dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Mar 2006 13:44:40 -0500
Subject: lockd: stop abusing file_lock_list

Currently lockd directly access the file_lock_list from fs/locks.c.
It does so to mark locks granted or reclaimable.  This is very
suboptimal, because a) lockd needs to poke into locks.c internals, and
b) it needs to iterate over all locks in the system for marking locks
granted or reclaimable.

This patch adds lists for granted and reclaimable locks to the nlm_host
structure instead, and adds locks to those.

nlmclnt_lock:
	now adds the lock to h_granted instead of setting the
	NFS_LCK_GRANTED, still O(1)

nlmclnt_mark_reclaim:
	goes away completely, replaced by a list_splice_init.
	Complexity reduced from O(locks in the system) to O(1)

reclaimer:
	iterates over h_reclaim now, complexity reduced from
	O(locks in the system) to O(locks per nlm_host)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c | 54 +++++++++--------------------------------------------
 fs/lockd/clntproc.c | 11 ++++++-----
 fs/lockd/host.c     |  2 ++
 fs/locks.c          |  5 +----
 4 files changed, 18 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8ae79ae4b99..0fc0ee267b0 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -154,34 +154,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
  * server crash.
  */
 
-/*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- *        Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
-	struct file_lock *fl;
-	struct inode *inode;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
-			continue;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
-	}
-}
-
 /*
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
@@ -194,7 +166,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
 	host->h_state++;
 	host->h_nextrebind = 0;
 	nlm_rebind_host(host);
-	nlmclnt_mark_reclaim(host);
+
+	/*
+	 * Mark the locks for reclaiming.
+	 */
+	list_splice_init(&host->h_granted, &host->h_reclaim);
+
 	dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
@@ -223,9 +200,7 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
-	struct list_head *tmp;
-	struct file_lock *fl;
-	struct inode *inode;
+	struct file_lock *fl, *next;
 
 	daemonize("%s-reclaim", host->h_name);
 	allow_signal(SIGKILL);
@@ -237,20 +212,9 @@ reclaimer(void *ptr)
 
 	/* First, reclaim all locks that have been marked. */
 restart:
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
-			continue;
+	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+		list_del(&fl->fl_u.nfs_fl.list);
 
-		fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
 		nlmclnt_reclaim(host, fl);
 		if (signalled())
 			break;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 80ae3127699..cb469431bd1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -465,7 +465,6 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 {
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
-	fl->fl_u.nfs_fl.flags = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
@@ -552,8 +551,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
 		fl->fl_flags |= FL_SLEEP;
+		list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
@@ -619,9 +618,11 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/* Clean the GRANTED flag now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call. */
-	fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
+	/*
+	 * Remove from the granted list now so the lock doesn't get
+	 * reclaimed while we're stuck in the unlock call.
+	 */
+	list_del(&fl->fl_u.nfs_fl.list);
 
 	/*
 	 * Note: the server is supposed to either grant us the unlock
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 100e7822970..f456f8ed9ac 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 	nlm_hosts[hash]    = host;
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
+	INIT_LIST_HEAD(&host->h_granted);
+	INIT_LIST_HEAD(&host->h_reclaim);
 
 	if (++nrhosts > NLM_HOST_MAX)
 		next_gc = 0;
diff --git a/fs/locks.c b/fs/locks.c
index c83b5dbe0ed..56f996e98bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
 static kmem_cache_t *filelock_cache;
-- 
cgit v1.2.3


From 4c060b531006e0711db32a132d6ac7661594b280 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:41 -0500
Subject: lockd: Fix Oopses due to list manipulation errors.

The patch "stop abusing file_lock_list introduces a couple of bugs since
the locks may be copied and need to be removed from the lists when they are
destroyed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c |  7 ++++---
 fs/lockd/clntproc.c | 15 ++++++---------
 fs/lockd/host.c     |  7 +++++--
 3 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 0fc0ee267b0..7cf41c1e1a8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -213,11 +213,12 @@ reclaimer(void *ptr)
 	/* First, reclaim all locks that have been marked. */
 restart:
 	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
-		list_del(&fl->fl_u.nfs_fl.list);
+		list_del_init(&fl->fl_u.nfs_fl.list);
 
-		nlmclnt_reclaim(host, fl);
 		if (signalled())
-			break;
+			continue;
+		if (nlmclnt_reclaim(host, fl) == 0)
+			list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		goto restart;
 	}
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index cb469431bd1..3e90356b488 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -446,12 +446,14 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-	memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
-	nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+	new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+	new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+	list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 	fl->fl_ops = NULL;
 }
@@ -466,6 +468,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+	INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
 
@@ -552,7 +555,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
 		fl->fl_flags |= FL_SLEEP;
-		list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
+		/* Ensure the resulting lock will get added to granted list */
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
@@ -618,12 +621,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/*
-	 * Remove from the granted list now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call.
-	 */
-	list_del(&fl->fl_u.nfs_fl.list);
-
 	/*
 	 * Note: the server is supposed to either grant us the unlock
 	 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f456f8ed9ac..112ebf8b8df 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -245,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
 {
 	if (host != NULL) {
 		dprintk("lockd: release host %s\n", host->h_name);
-		atomic_dec(&host->h_count);
 		BUG_ON(atomic_read(&host->h_count) < 0);
+		if (atomic_dec_and_test(&host->h_count)) {
+			BUG_ON(!list_empty(&host->h_lockowners));
+			BUG_ON(!list_empty(&host->h_granted));
+			BUG_ON(!list_empty(&host->h_reclaim));
+		}
 	}
 }
 
@@ -334,7 +338,6 @@ nlm_gc_hosts(void)
 					rpc_destroy_client(host->h_rpcclnt);
 				}
 			}
-			BUG_ON(!list_empty(&host->h_lockowners));
 			kfree(host);
 			nrhosts--;
 		}
-- 
cgit v1.2.3


From 35576cba57f1c042b87d6586b3229d13067264c6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:41 -0500
Subject: NLM: nlmclnt_cancel_callback should accept NLM_LCK_DENIED errors

NLM_LCK_DENIED is a valid error return for an NLM_CANCEL call by the
client.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3e90356b488..c25044f3b66 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -750,6 +750,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	switch (req->a_res.status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
+	case NLM_LCK_DENIED:
 		/* Everything's good */
 		break;
 	case NLM_LCK_DENIED_NOLOCKS:
-- 
cgit v1.2.3


From 606bbba06b11ebcbdf3a4fcd8cce4507c5bd7a4b Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Mon, 20 Mar 2006 13:44:42 -0500
Subject: NFS: fix compiler warnings on 64-bit platforms

Introduced by NFS aio+dio patches.

Test plan:
Compile kernel with CONFIG_NFS enabled on 64-bit hardware.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1e3725e51df..b4bbf6d7592 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -365,7 +365,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
@@ -686,7 +686,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
-- 
cgit v1.2.3


From 6b45d858ed6821dd687efd3b68929de2e4954fec Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:43 -0500
Subject: NFS: Clean up nfs_get_user_pages

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 59 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b4bbf6d7592..58830429e42 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -59,7 +59,6 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
 
 /*
@@ -121,6 +120,18 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		struct page *page = pages[i];
+		if (do_dirty && !PageCompound(page))
+			set_page_dirty_lock(page);
+		page_cache_release(page);
+	}
+	kfree(pages);
+}
+
 static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
@@ -138,31 +149,23 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz
 					page_count, (rw == READ), 0,
 					*pages, NULL);
 		up_read(&current->mm->mmap_sem);
-		/*
-		 * If we got fewer pages than expected from get_user_pages(),
-		 * the user buffer runs off the end of a mapping; return EFAULT.
-		 */
-		if (result >= 0 && result < page_count) {
-			nfs_free_user_pages(*pages, result, 0);
+		if (result != page_count) {
+			/*
+			 * If we got fewer pages than expected from
+			 * get_user_pages(), the user buffer runs off the
+			 * end of a mapping; return EFAULT.
+			 */
+			if (result >= 0) {
+				nfs_free_user_pages(*pages, result, 0);
+				result = -EFAULT;
+			} else
+				kfree(*pages);
 			*pages = NULL;
-			result = -EFAULT;
 		}
 	}
 	return result;
 }
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
-	int i;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (do_dirty && !PageCompound(page))
-			set_page_dirty_lock(page);
-		page_cache_release(page);
-	}
-	kfree(pages);
-}
-
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
@@ -788,13 +791,11 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (retval)
 		goto out;
 
-	page_count = nfs_get_user_pages(READ, (unsigned long) buf,
+	retval = nfs_get_user_pages(READ, (unsigned long) buf,
 						count, &pages);
-	if (page_count < 0) {
-		nfs_free_user_pages(pages, 0, 0);
-		retval = page_count;
+	if (retval < 0)
 		goto out;
-	}
+	page_count = retval;
 
 	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
 						pages, page_count);
@@ -862,13 +863,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	if (retval)
 		goto out;
 
-	page_count = nfs_get_user_pages(WRITE, (unsigned long) buf,
+	retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
 						count, &pages);
-	if (page_count < 0) {
-		nfs_free_user_pages(pages, 0, 0);
-		retval = page_count;
+	if (retval < 0)
 		goto out;
-	}
+	page_count = retval;
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
 					pos, pages, page_count);
-- 
cgit v1.2.3


From d72b7a6b26b9009b7a05117fe2e04b3a73ae4a5c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:43 -0500
Subject: NFS: O_DIRECT needs to use a completion

Now that we have aio writes, it is possible for dreq->outstanding to be
zero, but for the I/O not to have completed. Convert struct nfs_direct_req
to use a completion to signal when the I/O is done.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 58830429e42..cbef57a16ff 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -72,7 +72,6 @@ struct nfs_direct_req {
 				rewrite_list;	/* saved nfs_write_data structs */
 	struct nfs_open_context	*ctx;		/* file open context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
-	wait_queue_head_t	wait;		/* wait for i/o completion */
 	struct inode *		inode;		/* target file of i/o */
 	unsigned long		user_addr;	/* location of user's buffer */
 	size_t			user_count;	/* total bytes to move */
@@ -85,6 +84,7 @@ struct nfs_direct_req {
 	int			outstanding;	/* i/os we're waiting for */
 	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+	struct completion	completion;	/* wait for i/o completion */
 
 	/* commit state */
 	struct nfs_write_data *	commit_data;	/* special write_data for commits */
@@ -175,7 +175,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 		return NULL;
 
 	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
+	init_completion(&dreq->completion);
 	INIT_LIST_HEAD(&dreq->list);
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
@@ -209,7 +209,7 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 	if (dreq->iocb)
 		goto out;
 
-	result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0));
+	result = wait_for_completion_interruptible(&dreq->completion);
 
 	if (!result)
 		result = dreq->error;
@@ -239,8 +239,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 		if (!res)
 			res = (long) dreq->count;
 		aio_complete(dreq->iocb, res, 0);
-	} else
-		wake_up(&dreq->wait);
+	}
+	complete_all(&dreq->completion);
 
 	kref_put(&dreq->kref, nfs_direct_req_release);
 }
-- 
cgit v1.2.3


From 3a649b884637c4fdff50a6beebc3dc0e6082e048 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:44 -0500
Subject: NLM: Simplify client locks

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c | 43 ++++++++++++++++---------------------------
 fs/lockd/clntproc.c | 39 ++++++++++++++++-----------------------
 2 files changed, 32 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 7cf41c1e1a8..bce74446870 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
 {
 	struct nlm_wait *block;
 
-	BUG_ON(req->a_block != NULL);
 	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block == NULL)
-		return -ENOMEM;
-	block->b_host = host;
-	block->b_lock = fl;
-	init_waitqueue_head(&block->b_wait);
-	block->b_status = NLM_LCK_BLOCKED;
-
-	list_add(&block->b_list, &nlm_blocked);
-	req->a_block = block;
-
-	return 0;
+	if (block != NULL) {
+		block->b_host = host;
+		block->b_lock = fl;
+		init_waitqueue_head(&block->b_wait);
+		block->b_status = NLM_LCK_BLOCKED;
+		list_add(&block->b_list, &nlm_blocked);
+	}
+	return block;
 }
 
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block = req->a_block;
-
 	if (block == NULL)
 		return;
-	req->a_block = NULL;
 	list_del(&block->b_list);
 	kfree(block);
 }
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
 /*
  * Block on a lock
  */
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
-	struct nlm_wait	*block = req->a_block;
 	long ret;
 
 	/* A borken server might ask us to block even if we didn't
 	 * request it. Just say no!
 	 */
-	if (!req->a_args.block)
+	if (block == NULL)
 		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
 	ret = wait_event_interruptible_timeout(block->b_wait,
 			block->b_status != NLM_LCK_BLOCKED,
 			timeout);
-
-	if (block->b_status != NLM_LCK_BLOCKED) {
-		req->a_res.status = block->b_status;
-		block->b_status = NLM_LCK_BLOCKED;
-	}
-
-	return ret;
+	if (ret < 0)
+		return -ERESTARTSYS;
+	req->a_res.status = block->b_status;
+	return 0;
 }
 
 /*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index c25044f3b66..8af01710585 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -136,15 +136,14 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
 				system_utsname.nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
-	locks_copy_lock(&lock->fl, fl);
+	lock->fl.fl_start = fl->fl_start;
+	lock->fl.fl_end = fl->fl_end;
+	lock->fl.fl_type = fl->fl_type;
 }
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-	struct file_lock *fl = &req->a_args.lock.fl;
-
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
+	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /*
@@ -455,7 +454,6 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
 	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
-	fl->fl_ops = NULL;
 }
 
 static struct file_lock_operations nlmclnt_lock_ops = {
@@ -515,41 +513,36 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	long timeout;
-	int status;
+	struct nlm_wait *block = NULL;
+	int status = -ENOLCK;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
 					host->h_name);
-		status = -ENOLCK;
 		goto out;
 	}
 
-	if (req->a_args.block) {
-		status = nlmclnt_prepare_block(req, host, fl);
-		if (status < 0)
-			goto out;
-	}
+	block = nlmclnt_prepare_block(host, fl);
 	for(;;) {
 		status = nlmclnt_call(req, NLMPROC_LOCK);
 		if (status < 0)
 			goto out_unblock;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (!req->a_args.block)
 			break;
-		/* Wait on an NLM blocking lock */
-		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
 		/* Did a reclaimer thread notify us of a server reboot? */
 		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
 			continue;
 		if (resp->status != NLM_LCK_BLOCKED)
 			break;
-		if (timeout >= 0)
-			continue;
-		/* We were interrupted. Send a CANCEL request to the server
+		/* Wait on an NLM blocking lock */
+		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		/* if we were interrupted. Send a CANCEL request to the server
 		 * and exit
 		 */
-		status = (int)timeout;
-		goto out_unblock;
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
 	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
@@ -560,7 +553,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	}
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
-	nlmclnt_finish_block(req);
+	nlmclnt_finish_block(block);
 	/* Cancel the blocked request if it is still pending */
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
-- 
cgit v1.2.3


From e4cd038a45a46ffbe06a1a72f3f15246e5b041ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:44 -0500
Subject: NLM: Fix nlmclnt_test to not copy private part of locks

The struct file_lock does not carry a properly initialised lock,
so don't copy it as if it were.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 4 +++-
 fs/nfs/file.c       | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8af01710585..7a239864b8b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -434,7 +434,9 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 		/*
 		 * Report the conflicting lock back to the application.
 		 */
-		locks_copy_lock(fl, &req->a_res.lock.fl);
+		fl->fl_start = req->a_res.lock.fl.fl_start;
+		fl->fl_end = req->a_res.lock.fl.fl_start;
+		fl->fl_type = req->a_res.lock.fl.fl_type;
 		fl->fl_pid = 0;
 	} else {
 		return nlm_stat_to_errno(req->a_res.status);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ee140c53dba..6bcbc4d676c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -399,7 +399,10 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 	lock_kernel();
 	/* Try local locking first */
 	if (posix_test_lock(filp, fl, &cfl)) {
-		locks_copy_lock(fl, &cfl);
+		fl->fl_start = cfl.fl_start;
+		fl->fl_end = cfl.fl_end;
+		fl->fl_type = cfl.fl_type;
+		fl->fl_pid = cfl.fl_pid;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 92737230dd3f1478033819d4bc20339f8da852da Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: NLM: Add nlmclnt_release_call

Add a helper function to simplify the freeing of NLM client requests.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 195 ++++++++++++++++++++--------------------------------
 fs/lockd/svc4proc.c |  34 ++++-----
 fs/lockd/svclock.c  |  66 ++++++++----------
 fs/lockd/svcproc.c  |  33 ++++-----
 4 files changed, 129 insertions(+), 199 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 7a239864b8b..3f8ad7c54ef 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -152,9 +152,8 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
 	struct nlm_host		*host;
-	struct nlm_rqst		reqst, *call = &reqst;
+	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
 	int			status, proto, vers;
@@ -168,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
 
-	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	if (host == NULL)
 		return -ENOLCK;
 
-	/* Create RPC client handle if not there, and copy soft
-	 * and intr flags from NFS client. */
-	if (host->h_rpcclnt == NULL) {
-		struct rpc_clnt	*clnt;
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return -ENOMEM;
 
-		/* Bind an rpc client to this host handle (does not
-		 * perform a portmapper lookup) */
-		if (!(clnt = nlm_bind_host(host))) {
-			status = -ENOLCK;
-			goto done;
-		}
-		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr = nfssrv->client->cl_intr;
-	}
+	nlmclnt_locks_init_private(fl, host);
+	/* Set up the argument struct */
+	nlmclnt_setlockargs(call, fl);
 
 	/* Keep the old signal mask */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -197,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-		call = nlmclnt_alloc_call();
-		if (!call) {
-			status = -ENOMEM;
-			goto out_restore;
-		}
 		call->a_flags = RPC_TASK_ASYNC;
-	} else {
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-		memset(call, 0, sizeof(*call));
-		locks_init_lock(&call->a_args.lock.fl);
-		locks_init_lock(&call->a_res.lock.fl);
 	}
-	call->a_host = host;
-
-	nlmclnt_locks_init_private(fl, host);
-
-	/* Set up the argument struct */
-	nlmclnt_setlockargs(call, fl);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -229,24 +206,26 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	else
 		status = -EINVAL;
 
- out_restore:
+	fl->fl_ops->fl_release_private(fl);
+	fl->fl_ops = NULL;
+
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-done:
 	dprintk("lockd: clnt proc returns %d\n", status);
-	nlm_release_host(host);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
  */
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
 	struct nlm_rqst	*call;
 
@@ -255,16 +234,30 @@ nlmclnt_alloc_call(void)
 		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
+			call->a_host = host;
 			return call;
 		}
 		if (signalled())
 			break;
-		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
+	nlm_release_host(host);
 	return NULL;
 }
 
+void nlm_release_call(struct nlm_rqst *call)
+{
+	nlm_release_host(call->a_host);
+	nlmclnt_release_lockargs(call);
+	kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+	return nlm_release_call(data);
+}
+
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 {
 	DEFINE_WAIT(wait);
@@ -361,7 +354,7 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -369,48 +362,23 @@ int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops
 		.rpc_argp	= &req->a_args,
 		.rpc_resp	= &req->a_res,
 	};
-	int		status;
-
-	dprintk("lockd: call procedure %d on %s (async)\n",
-			(int)proc, host->h_name);
-
-	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
-
-        /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
-	return status;
-}
-
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
-{
-	struct nlm_host	*host = req->a_host;
-	struct rpc_clnt	*clnt;
-	struct nlm_args	*argp = &req->a_args;
-	struct nlm_res	*resp = &req->a_res;
-	struct rpc_message msg = {
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	int		status;
+	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
 			(int)proc, host->h_name);
 
 	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
+	clnt = nlm_bind_host(host);
+	if (clnt == NULL)
+		goto out_err;
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 
-	/* Increment host refcount */
-	nlm_get_host(host);
         /* bootstrap and kick off the async RPC call */
         status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-	if (status < 0)
-		nlm_release_host(host);
+	if (status == 0)
+		return 0;
+out_err:
+	nlm_release_call(req);
 	return status;
 }
 
@@ -423,26 +391,28 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 	int	status;
 
 	status = nlmclnt_call(req, NLMPROC_TEST);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
-	status = req->a_res.status;
-	if (status == NLM_LCK_GRANTED) {
-		fl->fl_type = F_UNLCK;
-	} if (status == NLM_LCK_DENIED) {
-		/*
-		 * Report the conflicting lock back to the application.
-		 */
-		fl->fl_start = req->a_res.lock.fl.fl_start;
-		fl->fl_end = req->a_res.lock.fl.fl_start;
-		fl->fl_type = req->a_res.lock.fl.fl_type;
-		fl->fl_pid = 0;
-	} else {
-		return nlm_stat_to_errno(req->a_res.status);
+	switch (req->a_res.status) {
+		case NLM_LCK_GRANTED:
+			fl->fl_type = F_UNLCK;
+			break;
+		case NLM_LCK_DENIED:
+			/*
+			 * Report the conflicting lock back to the application.
+			 */
+			fl->fl_start = req->a_res.lock.fl.fl_start;
+			fl->fl_end = req->a_res.lock.fl.fl_start;
+			fl->fl_type = req->a_res.lock.fl.fl_type;
+			fl->fl_pid = 0;
+			break;
+		default:
+			status = nlm_stat_to_errno(req->a_res.status);
 	}
-
-	return 0;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
@@ -560,7 +530,7 @@ out_unblock:
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
-	nlmclnt_release_lockargs(req);
+	nlm_release_call(req);
 	return status;
 }
 
@@ -623,32 +593,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	 */
 	do_vfs_lock(fl);
 
-	if (req->a_flags & RPC_TASK_ASYNC) {
-		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					&nlmclnt_unlock_ops);
-		/* Hrmf... Do the unlock early since locks_remove_posix()
-		 * really expects us to free the lock synchronously */
-		if (status < 0) {
-			nlmclnt_release_lockargs(req);
-			kfree(req);
-		}
-		return status;
-	}
+	if (req->a_flags & RPC_TASK_ASYNC)
+		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 
 	status = nlmclnt_call(req, NLMPROC_UNLOCK);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
+	status = 0;
 	if (resp->status == NLM_LCK_GRANTED)
-		return 0;
+		goto out;
 
 	if (resp->status != NLM_LCK_DENIED_NOLOCKS)
 		printk("lockd: unexpected unlock status: %d\n", resp->status);
-
 	/* What to do now? I'm out of my depth... */
-
-	return -ENOLCK;
+	status = -ENOLCK;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -670,9 +632,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 	if (status != NLM_LCK_GRANTED)
 		printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
  retry_rebind:
 	nlm_rebind_host(req->a_host);
@@ -682,6 +641,7 @@ die:
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
 	.rpc_call_done = nlmclnt_unlock_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
@@ -703,20 +663,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	req = nlmclnt_alloc_call();
+	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
 		return -ENOMEM;
-	req->a_host  = host;
 	req->a_flags = RPC_TASK_ASYNC;
 
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-	if (status < 0) {
-		nlmclnt_release_lockargs(req);
-		kfree(req);
-	}
+	status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
@@ -757,9 +712,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	}
 
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
 
 retry_cancel:
@@ -773,6 +725,7 @@ retry_cancel:
 
 static const struct rpc_call_ops nlmclnt_cancel_ops = {
 	.rpc_call_done = nlmclnt_cancel_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index ac4a700af01..cb51c702582 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -480,43 +480,37 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 
-	if (!(call = nlmclnt_alloc_call()))
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
 		return rpc_system_err;
 
-	host = nlmsvc_lookup_host(rqstp);
-	if (!host) {
-		kfree(call);
+	call = nlm_alloc_call(host);
+	if (call == NULL)
 		return rpc_system_err;
-	}
+
 
 	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		goto error;
-
+	if (nlm_async_call(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
 	return rpc_success;
- error:
-	kfree(call);
-	nlm_release_host(host);
-	return rpc_system_err;
 }
 
 static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = data;
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
 
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
 }
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
 	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
 };
 
 /*
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a95d260b713..185bf7ea1c0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -117,12 +117,12 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end, lock->fl.fl_type);
 	for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
-		fl = &block->b_call.a_args.lock.fl;
+		fl = &block->b_call->a_args.lock.fl;
 		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
 				block->b_file, fl->fl_pid,
 				(long long)fl->fl_start,
 				(long long)fl->fl_end, fl->fl_type,
-				nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
 			if (remove) {
 				*head = block->b_next;
@@ -156,7 +156,7 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
 	for (block = nlm_blocked; block; block = block->b_next) {
 		dprintk("cookie: head of blocked queue %p, block %p\n", 
 			nlm_blocked, block);
-		if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
 				&& nlm_cmp_addr(sin, &block->b_host->h_addr))
 			break;
 	}
@@ -182,28 +182,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 {
 	struct nlm_block	*block;
 	struct nlm_host		*host;
-	struct nlm_rqst		*call;
+	struct nlm_rqst		*call = NULL;
 
 	/* Create host handle for callback */
 	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return NULL;
+
 	/* Allocate memory for block, and initialize arguments */
-	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
 		goto failed;
-	memset(block, 0, sizeof(*block));
-	locks_init_lock(&block->b_call.a_args.lock.fl);
-	locks_init_lock(&block->b_call.a_res.lock.fl);
 	kref_init(&block->b_count);
 
-	if (!nlmsvc_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
-	block->b_call.a_args.lock.fl.fl_flags |= FL_SLEEP;
-	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
-	block->b_call.a_args.cookie = *cookie;	/* see above */
+	call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+	call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+	call->a_args.cookie = *cookie;	/* see above */
 
 	dprintk("lockd: created block %p...\n", block);
 
@@ -217,16 +219,16 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	file->f_blocks  = block;
 
 	/* Set up RPC arguments for callback */
-	call = &block->b_call;
-	call->a_host    = host;
+	block->b_call = call;
 	call->a_flags   = RPC_TASK_ASYNC;
+	call->a_block = block;
 
 	return block;
 
 failed_free:
 	kfree(block);
 failed:
-	nlm_release_host(host);
+	nlm_release_call(call);
 	return NULL;
 }
 
@@ -242,7 +244,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block)
 	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
-	status = posix_unblock_lock(block->b_file->f_file, &block->b_call.a_args.lock.fl);
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
 	nlmsvc_remove_block(block);
 	return status;
 }
@@ -263,9 +265,8 @@ static void nlmsvc_free_block(struct kref *kref)
 		}
 	}
 
-	if (block->b_host)
-		nlm_release_host(block->b_host);
-	nlmsvc_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(block->b_call);
+	nlm_release_call(block->b_call);
 	kfree(block);
 }
 
@@ -316,10 +317,8 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 	if (lock->oh.len > NLMCLNT_OHSIZE) {
 		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmsvc_freegrantargs(call);
+		if (!data)
 			return 0;
-		}
 		call->a_args.lock.oh.data = (u8 *) data;
 	}
 
@@ -329,17 +328,8 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 
 static void nlmsvc_freegrantargs(struct nlm_rqst *call)
 {
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
+	if (call->a_args.lock.oh.data != call->a_owner)
 		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_release_private)
-		fl->fl_lmops->fl_release_private(fl);
 }
 
 /*
@@ -371,9 +361,9 @@ again:
 	block = nlmsvc_lookup_block(file, lock, 0);
 	if (block == NULL) {
 		if (newblock != NULL)
-			lock = &newblock->b_call.a_args.lock;
+			lock = &newblock->b_call->a_args.lock;
 	} else
-		lock = &block->b_call.a_args.lock;
+		lock = &block->b_call->a_args.lock;
 
 	error = posix_lock_file(file->f_file, &lock->fl);
 	lock->fl.fl_flags &= ~FL_SLEEP;
@@ -523,7 +513,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
 
 	dprintk("lockd: VFS unblock notification for block %p\n", fl);
 	for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
-		if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
 			nlmsvc_insert_block(block, 0);
 			svc_wake_up(block->b_daemon);
 			return;
@@ -558,7 +548,7 @@ static void
 nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
-	struct nlm_lock		*lock = &block->b_call.a_args.lock;
+	struct nlm_lock		*lock = &block->b_call->a_args.lock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
@@ -606,7 +596,7 @@ callback:
 
 	/* Call the client */
 	kref_get(&block->b_count);
-	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlmsvc_release_block(block);
 out_unlock:
@@ -624,7 +614,7 @@ out_unlock:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block = container_of(call, struct nlm_block, b_call);
+	struct nlm_block	*block = call->a_block;
 	unsigned long		timeout;
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4986fbe4454..956d1d71e2a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -505,43 +505,36 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 
-	if (!(call = nlmclnt_alloc_call()))
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
 		return rpc_system_err;
 
-	host = nlmsvc_lookup_host(rqstp);
-	if (!host) {
-		kfree(call);
+	call = nlm_alloc_call(host);
+	if (call == NULL)
 		return rpc_system_err;
-	}
 
 	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		goto error;
-
+	if (nlm_async_call(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
 	return rpc_success;
- error:
-	nlm_release_host(host);
-	kfree(call);
-	return rpc_system_err;
 }
 
 static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = data;
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
 
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
 }
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
 	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
 };
 
 /*
-- 
cgit v1.2.3


From d47166244860eb5dfdb12ee4703968beef8a0db2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: lockd: Add helper for *_RES callbacks

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c |  27 +++++++---
 fs/lockd/svc4proc.c | 150 +++++++++++++++++++---------------------------------
 fs/lockd/svcproc.c  | 143 +++++++++++++++++++------------------------------
 3 files changed, 128 insertions(+), 192 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3f8ad7c54ef..f96e38155b5 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -354,14 +354,10 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
-	struct rpc_message msg = {
-		.rpc_argp	= &req->a_args,
-		.rpc_resp	= &req->a_res,
-	};
 	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
@@ -371,10 +367,10 @@ int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk
 	clnt = nlm_bind_host(host);
 	if (clnt == NULL)
 		goto out_err;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	msg->rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
+        status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
 	if (status == 0)
 		return 0;
 out_err:
@@ -382,6 +378,23 @@ out_err:
 	return status;
 }
 
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_args,
+		.rpc_resp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
+
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
+
 /*
  * TEST for the presence of a conflicting lock
  */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index cb51c702582..a2dd9ccb9b3 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
 /*
  * Obtain client and file from arguments
  */
@@ -233,84 +229,90 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
 
-	if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
 
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
 }
 
 /*
@@ -471,48 +473,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	host = nlmsvc_lookup_host(rqstp);
-	if (host == NULL)
-		return rpc_system_err;
-
-	call = nlm_alloc_call(host);
-	if (call == NULL)
-		return rpc_system_err;
-
-
-	call->a_flags = RPC_TASK_ASYNC;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlm_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		return rpc_system_err;
-	return rpc_success;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
-			-task->tk_status);
-}
-
-static void nlm4svc_callback_release(void *data)
-{
-	nlm_release_call(data);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
-	.rpc_call_done = nlm4svc_callback_exit,
-	.rpc_release = nlm4svc_callback_release,
-};
-
 /*
  * NLM Server procedures.
  */
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 956d1d71e2a..d210cf304e9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
 #ifdef CONFIG_LOCKD_V4
 static u32
 cast_to_nlm(u32 status, u32 vers)
@@ -261,84 +257,92 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
 
-	if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
 
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
 static int
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
 static int
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
 }
 
 /*
@@ -496,47 +500,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 	return rpc_success;
 }
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	host = nlmsvc_lookup_host(rqstp);
-	if (host == NULL)
-		return rpc_system_err;
-
-	call = nlm_alloc_call(host);
-	if (call == NULL)
-		return rpc_system_err;
-
-	call->a_flags = RPC_TASK_ASYNC;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlm_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		return rpc_system_err;
-	return rpc_success;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
-			-task->tk_status);
-}
-
-static void nlmsvc_callback_release(void *data)
-{
-	nlm_release_call(data);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
-	.rpc_call_done = nlmsvc_callback_exit,
-	.rpc_release = nlmsvc_callback_release,
-};
-
 /*
  * NLM Server procedures.
  */
-- 
cgit v1.2.3


From 6041b79192bdf0e7ab18ea6859effa5d8311391b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:45 -0500
Subject: lockd: Fix a typo in nlmsvc_grant_release()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 185bf7ea1c0..c16c94f5a85 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -638,7 +638,9 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 
 void nlmsvc_grant_release(void *data)
 {
-	nlmsvc_release_block(data);
+	struct nlm_rqst		*call = data;
+
+	nlmsvc_release_block(call->a_block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
-- 
cgit v1.2.3


From f25bc34967d76610d17bc70769d7c220976eeeb1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:46 -0500
Subject: NFSv4: Ensure nfs_callback_down() calls svc_destroy()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a77..2c042f8d70b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -73,6 +73,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 		svc_process(serv, rqstp);
 	}
 
+	svc_exit_thread(rqstp);
 	nfs_callback_info.pid = 0;
 	complete(&nfs_callback_info.stopped);
 	unlock_kernel();
-- 
cgit v1.2.3


From 3e4f6290ca4df7464ee066123f2bca4298c2dab4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:46 -0500
Subject: NFSv4: Send the delegation stateid for SETATTR calls

In the case where we hold a delegation stateid, use that in for inside
SETATTR calls.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 19 +++++++++++++++++++
 fs/nfs/delegation.h |  1 +
 fs/nfs/nfs4proc.c   | 27 +++++++++++++--------------
 3 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e..d3be923d4e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 		nfs_free_delegation(delegation);
 	}
 }
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int res = 0;
+
+	if (nfsi->delegation_state == 0)
+		return 0;
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL) {
+		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+		res = 1;
+	}
+	spin_unlock(&clp->cl_lock);
+	return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce77..3858694652f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bad1eae5608..62aed077fc2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1018,12 +1018,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 	return res;
 }
 
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_setattrargs  arg = {
-                .fh             = fhandle,
+                .fh             = NFS_FH(inode),
                 .iap            = sattr,
 		.server		= server,
 		.bitmask = server->attr_bitmask,
@@ -1042,7 +1042,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 
-	if (state != NULL) {
+	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+		/* Use that stateid */
+	} else if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
 		nfs4_copy_stateid(&arg.stateid, state, current->files);
 	} else
@@ -1054,16 +1056,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 	return status;
 }
 
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(server,
-				_nfs4_do_setattr(server, fattr, fhandle, sattr,
-					state),
+				_nfs4_do_setattr(inode, fattr, sattr, state),
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -1504,8 +1505,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	if (ctx != NULL)
 		state = ctx->state;
 
-	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
-			NFS_FH(inode), sattr, state);
+	status = nfs4_do_setattr(inode, fattr, sattr, state);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	if (ctx != NULL)
@@ -1824,8 +1824,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	d_instantiate(dentry, igrab(state->inode));
 	if (flags & O_EXCL) {
 		struct nfs_fattr fattr;
-		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
-		                     NFS_FH(state->inode), sattr, state);
+		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
 	}
-- 
cgit v1.2.3


From 51581f3bf922512880f52a7777923fd6dcfc792b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:47 -0500
Subject: NFSv4: SETCLIENTID_CONFIRM should handle
 NFS4ERR_DELAY/NFS4ERR_RESOURCE

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 21 +++++++++++++++++++--
 fs/nfs/nfs4state.c |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62aed077fc2..31000326aba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2849,8 +2849,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2874,6 +2873,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	long timeout;
+	int err;
+	do {
+		err = _nfs4_proc_setclientid_confirm(clp, cred);
+		switch (err) {
+			case 0:
+				return err;
+			case -NFS4ERR_RESOURCE:
+				/* The IBM lawyers misread another document! */
+			case -NFS4ERR_DELAY:
+				err = nfs4_delay(clp->cl_rpcclient, &timeout);
+		}
+	} while (err == 0);
+	return err;
+}
+
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7d..96e5b82c153 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
 				NIPQUAD(clp->cl_addr.s_addr), -status);
+	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
 
-- 
cgit v1.2.3


From d9f6eb75d4900782a095b98470decfe98971f920 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:47 -0500
Subject: lockd: blocks should hold a reference to the nlm_file

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 81 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c16c94f5a85..ce754efe284 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -103,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
 }
 
 /*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
  */
 static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	**head, *block;
 	struct file_lock	*fl;
@@ -124,10 +123,6 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				(long long)fl->fl_end, fl->fl_type,
 				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
-			if (remove) {
-				*head = block->b_next;
-				block->b_queued = 0;
-			}
 			kref_get(&block->b_count);
 			return block;
 		}
@@ -213,6 +208,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
 	block->b_file   = file;
+	file->f_count++;
 
 	/* Add to file's list of blocks */
 	block->b_fnext  = file->f_blocks;
@@ -257,6 +253,7 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	dprintk("lockd: freeing block %p...\n", block);
 
+	down(&file->f_sema);
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
 		if (*bp == block) {
@@ -264,9 +261,11 @@ static void nlmsvc_free_block(struct kref *kref)
 			break;
 		}
 	}
+	up(&file->f_sema);
 
 	nlmsvc_freegrantargs(block->b_call);
 	nlm_release_call(block->b_call);
+	nlm_release_file(block->b_file);
 	kfree(block);
 }
 
@@ -276,6 +275,36 @@ static void nlmsvc_release_block(struct nlm_block *block)
 		kref_put(&block->b_count, nlmsvc_free_block);
 }
 
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+		block->b_host->h_inuse = 1;
+	up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+restart:
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+		if (host != NULL && host != block->b_host)
+			continue;
+		if (!block->b_queued)
+			continue;
+		kref_get(&block->b_count);
+		up(&file->f_sema);
+		nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+		goto restart;
+	}
+	up(&file->f_sema);
+}
+
 /*
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
@@ -283,20 +312,10 @@ static void nlmsvc_release_block(struct nlm_block *block)
 int
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
-	struct nlm_block	*block, *next;
-	/* XXX: Will everything get cleaned up if we don't unlock here? */
-
-	down(&file->f_sema);
-	for (block = file->f_blocks; block; block = next) {
-		next = block->b_fnext;
-		if (action == NLM_ACT_MARK)
-			block->b_host->h_inuse = 1;
-		else if (action == NLM_ACT_UNLOCK) {
-			if (host == NULL || host == block->b_host)
-				nlmsvc_unlink_block(block);
-		}
-	}
-	up(&file->f_sema);
+	if (action == NLM_ACT_MARK)
+		nlmsvc_act_mark(host, file);
+	else
+		nlmsvc_act_unlock(host, file);
 	return 0;
 }
 
@@ -358,7 +377,7 @@ again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
+	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
 		if (newblock != NULL)
 			lock = &newblock->b_call->a_args.lock;
@@ -491,11 +510,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) {
+	block = nlmsvc_lookup_block(file, lock);
+	up(&file->f_sema);
+	if (block != NULL) {
 		status = nlmsvc_unlink_block(block);
 		nlmsvc_release_block(block);
 	}
-	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
 
@@ -553,9 +573,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 
 	dprintk("lockd: grant blocked lock %p\n", block);
 
-	/* First thing is lock the file */
-	down(&file->f_sema);
-
 	/* Unlink block request from list */
 	nlmsvc_unlink_block(block);
 
@@ -578,12 +595,12 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		goto out_unlock;
+		return;
 	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		goto out_unlock;
+		return;
 	}
 
 callback:
@@ -599,8 +616,6 @@ callback:
 	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
 		nlmsvc_release_block(block);
-out_unlock:
-	up(&file->f_sema);
 }
 
 /*
@@ -665,8 +680,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 		return;
 	file = block->b_file;
 
-	file->f_count++;
-	down(&file->f_sema);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
@@ -677,8 +690,6 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 			nlmsvc_unlink_block(block);
 		}
 	}
-	up(&file->f_sema);
-	nlm_release_file(file);
 	nlmsvc_release_block(block);
 }
 
-- 
cgit v1.2.3


From 01d0ae8beaee75d954900109619b700fe68707d9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFSv4: Fix an oops in nfs4_fill_super

The mount statistics patches introduced a call to nfs_free_iostats that is
not only redundant, but actually causes an oops.

Also fix a memory leak due to the lack of a call to nfs_free_iostats on
unmount.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c  | 13 ++++++-------
 fs/nfs/iostat.h |  3 ++-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b9f35ca266c..17654bffc3c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -281,6 +281,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 
 	sb->s_magic      = NFS_SUPER_MAGIC;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (server->io_stats == NULL)
+		return -ENOMEM;
+
 	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
 	/* Did getting the root inode fail? */
 	if (IS_ERR(root_inode)) {
@@ -294,12 +298,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
-	server->io_stats = nfs_alloc_iostats();
-	if (!server->io_stats) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
-	}
-
 	/* mount time stamp, in seconds */
 	server->mount_time = jiffies;
 
@@ -1822,6 +1820,7 @@ static void nfs_kill_super(struct super_block *s)
 
 	rpciod_down();		/* release rpciod */
 
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
@@ -2122,7 +2121,6 @@ out_err:
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
-	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	return s;
 }
@@ -2143,6 +2141,7 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	rpciod_down();
 
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 7a749515331..6350ecbde58 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -156,7 +156,8 @@ static inline struct nfs_iostats *nfs_alloc_iostats(void)
 
 static inline void nfs_free_iostats(struct nfs_iostats *stats)
 {
-	free_percpu(stats);
+	if (stats != NULL)
+		free_percpu(stats);
 }
 
 #endif
-- 
cgit v1.2.3


From 03f28e3a2059fc466761d872122f30acb7be61ae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFS: Make nfs_fhget() return appropriate error values

Currently it returns NULL, which usually gets interpreted as ENOMEM. In
fact it can mean a host of issues.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 10 +++++-----
 fs/nfs/inode.c    | 15 +++++++--------
 fs/nfs/nfs4proc.c |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 609185a15c9..06c48b385c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -901,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
-	if (!inode)
+	res = (struct dentry *)inode;
+	if (IS_ERR(res))
 		goto out_unlock;
 no_entry:
 	res = d_add_unique(dentry, inode);
@@ -1096,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		return NULL;
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-	if (!inode) {
+	if (IS_ERR(inode)) {
 		dput(dentry);
 		return NULL;
 	}
@@ -1134,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		if (error < 0)
 			goto out_err;
 	}
-	error = -ENOMEM;
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
-	if (inode == NULL)
+	error = PTR_ERR(inode);
+	if (IS_ERR(inode))
 		goto out_err;
 	d_instantiate(dentry, inode);
 	return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 17654bffc3c..a0cda53461b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -241,7 +241,6 @@ static struct inode *
 nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
 {
 	struct nfs_server	*server = NFS_SB(sb);
-	struct inode *rooti;
 	int			error;
 
 	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -250,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
 		return ERR_PTR(error);
 	}
 
-	rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
-	if (!rooti)
-		return ERR_PTR(-ENOMEM);
-	return rooti;
+	return nfs_fhget(sb, rootfh, fsinfo->fattr);
 }
 
 /*
@@ -853,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		.fh	= fh,
 		.fattr	= fattr
 	};
-	struct inode *inode = NULL;
+	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -866,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 	hash = nfs_fattr_to_ino_t(fattr);
 
-	if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+	inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+	if (inode == NULL) {
+		inode = ERR_PTR(-ENOMEM);
 		goto out_no_inode;
+	}
 
 	if (inode->i_state & I_NEW) {
 		struct nfs_inode *nfsi = NFS_I(inode);
@@ -936,7 +935,7 @@ out:
 	return inode;
 
 out_no_inode:
-	printk("nfs_fhget: iget failed\n");
+	dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
 	goto out;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 31000326aba..02c7d8c04c5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -336,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
 	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 		goto out;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
-	if (inode == NULL)
+	if (IS_ERR(inode))
 		goto out;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
-- 
cgit v1.2.3


From a9a801787a761616589a6526d7a29c13f4deb3d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:48 -0500
Subject: NFS, NLM: Allow blocking locks to respect signals

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6bcbc4d676c..5263b2864a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -443,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -464,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	else
 		status = do_vfs_lock(filp, fl);
 	unlock_kernel();
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -507,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	nfs_sync_mapping(filp->f_mapping);
 	nfs_zap_caches(inode);
 out:
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
-- 
cgit v1.2.3


From 1dd761e9070aa2e543df3db41bd75ed4b8f2fab9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:49 -0500
Subject: NFSv4: Ensure the callback daemon flushes signals

If the callback daemon is signalled, but is unable to exit because it still
has users, then we need to flush signals. If not, then svc_recv() can
never sleep, and so we hang.
If we flush signals, then we also have to be prepared to resend them when
we want the thread to exit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 2c042f8d70b..99d2cfbce86 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 
 	complete(&nfs_callback_info.started);
 
-	while (nfs_callback_info.users != 0 || !signalled()) {
+	for(;;) {
+		if (signalled()) {
+			if (nfs_callback_info.users == 0)
+				break;
+			flush_signals(current);
+		}
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -135,11 +140,13 @@ int nfs_callback_down(void)
 
 	lock_kernel();
 	down(&nfs_callback_sema);
-	if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
-		goto out;
-	kill_proc(nfs_callback_info.pid, SIGKILL, 1);
-	wait_for_completion(&nfs_callback_info.stopped);
-out:
+	nfs_callback_info.users--;
+	do {
+		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+			break;
+		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+			break;
+	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	up(&nfs_callback_sema);
 	unlock_kernel();
 	return ret;
-- 
cgit v1.2.3


From deb7d638262019cbac5d15ab74ffd1c29242c7cb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:50 -0500
Subject: NFS: Fix a race with PG_private and nfs_release_page()

We don't need to set PG_private for readahead pages, since they never get
unlocked while I/O is in progress. However there is a small race in
nfs_readpage_release() whereby the page may be unlocked, and have
PG_private set.

Fix is to have PG_private set only for the case of writes...

Also fix a bug in nfs_clear_page_writeback(): Don't attempt to clear the
radix_tree tag if we've already deleted the radix tree entry.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 10 +++++-----
 fs/nfs/write.c    |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d6e076c9dbe..106aca388eb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -88,7 +88,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	BUG_ON(PagePrivate(page));
 	BUG_ON(!PageLocked(page));
 	BUG_ON(page->mapping->host != inode);
-	SetPagePrivate(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -136,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
 
-	spin_lock(&nfsi->req_lock);
-	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-	spin_unlock(&nfsi->req_lock);
+	if (req->wb_page != NULL) {
+		spin_lock(&nfsi->req_lock);
+		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+		spin_unlock(&nfsi->req_lock);
+	}
 	nfs_unlock_request(req);
 }
 
@@ -153,7 +154,6 @@ void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
 	if (page != NULL) {
-		ClearPagePrivate(page);
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 647e3217c2e..d9e5ee59457 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -429,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	SetPagePrivate(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -445,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
+	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
-- 
cgit v1.2.3


From 7d46a49f512e8d10b23353781a8ba85edd4fa640 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:50 -0500
Subject: NFS: Clean up nfs_flush_list()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 62 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d9e5ee59457..2beb1268bb1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -963,7 +963,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page *req = nfs_list_entry(head->next);
 	struct page *page = req->wb_page;
@@ -1032,16 +1032,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_write_data	*data;
 	unsigned int		count;
 
-	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(head, inode, how);
-
 	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!data)
 		goto out_bad;
@@ -1074,24 +1071,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	return -ENOMEM;
 }
 
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
 {
 	LIST_HEAD(one_request);
-	struct nfs_page		*req;
-	int			error = 0;
-	unsigned int		pages = 0;
+	int (*flush_one)(struct inode *, struct list_head *, int);
+	struct nfs_page	*req;
+	int wpages = NFS_SERVER(inode)->wpages;
+	int wsize = NFS_SERVER(inode)->wsize;
+	int error;
 
-	while (!list_empty(head)) {
-		pages += nfs_coalesce_requests(head, &one_request, wpages);
+	flush_one = nfs_flush_one;
+	if (wsize < PAGE_CACHE_SIZE)
+		flush_one = nfs_flush_multi;
+	/* For single writes, FLUSH_STABLE is more efficient */
+	if (npages <= wpages && npages == NFS_I(inode)->npages
+			&& nfs_list_entry(head->next)->wb_bytes <= wsize)
+		how |= FLUSH_STABLE;
+
+	do {
+		nfs_coalesce_requests(head, &one_request, wpages);
 		req = nfs_list_entry(one_request.next);
-		error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+		error = flush_one(inode, &one_request, how);
 		if (error < 0)
-			break;
-	}
-	if (error >= 0)
-		return pages;
-
+			goto out_err;
+	} while (!list_empty(head));
+	return 0;
+out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
@@ -1423,24 +1428,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		struct nfs_server *server = NFS_SERVER(inode);
-
-		/* For single writes, FLUSH_STABLE is more efficient */
-		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
-			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
-				how |= FLUSH_STABLE;
-		}
-		error = nfs_flush_list(&head, server->wpages, how);
+		int error = nfs_flush_list(inode, &head, res, how);
+		if (error < 0)
+			return error;
 	}
-	if (error < 0)
-		return error;
 	return res;
 }
 
@@ -1449,14 +1446,13 @@ int nfs_commit_inode(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		error = nfs_commit_list(inode, &head, how);
+		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 	}
-- 
cgit v1.2.3


From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 13:44:51 -0500
Subject: NFS: Fix a race in nfs_sync_inode()

Kudos to Neil Brown for spotting the problem:

"in nfs_sync_inode, there is effectively the sequence:

   nfs_wait_on_requests
   nfs_flush_inode
   nfs_commit_inode

 This seems a bit racy to me as if the only requests are on the
 ->commit list, and nfs_commit_inode is called separately after
 nfs_wait_on_requests completes, and before nfs_commit_inode start
 (say: by nfs_write_inode) then none of these function will return
 >0, yet there will be some pending request that aren't waited for."

The solution is to search for requests to wait upon, search for dirty
requests, and search for uncommitted requests while holding the
nfsi->req_lock

The patch also cleans up nfs_sync_inode(), getting rid of the redundant
FLUSH_WAIT flag. It turns out that we were always setting it.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c |  4 ++--
 fs/nfs/write.c | 72 ++++++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 52 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a0cda53461b..60aac58270a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 static int
 nfs_write_inode(struct inode *inode, int sync)
 {
-	int flags = sync ? FLUSH_WAIT : 0;
+	int flags = sync ? FLUSH_SYNC : 0;
 	int ret;
 
 	ret = nfs_commit_inode(inode, flags);
@@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int err;
 
 	/* Flush out writes to the server in order to update c/mtime */
-	nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+	nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2beb1268bb1..3f5225404c9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  *
  * Interruptible by signals only if mounted with intr flag.
  */
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
@@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 	else
 		idx_end = idx_start + npages - 1;
 
-	spin_lock(&nfsi->req_lock);
 	next = idx_start;
 	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
@@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 		spin_unlock(&nfsi->req_lock);
 		error = nfs_wait_on_request(req);
 		nfs_release_request(req);
+		spin_lock(&nfsi->req_lock);
 		if (error < 0)
 			return error;
-		spin_lock(&nfsi->req_lock);
 		res++;
 	}
-	spin_unlock(&nfsi->req_lock);
 	return res;
 }
 
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
+
+	spin_lock(&nfsi->req_lock);
+	ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
+}
+
 /*
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
@@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
 	}
 	return res;
 }
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	return 0;
+}
 #endif
 
 static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_call_done = nfs_commit_done,
 	.rpc_release = nfs_commit_release,
 };
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+	return 0;
+}
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
-		  unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+		unsigned int npages, int how)
 {
+	struct nfs_inode *nfsi = NFS_I(inode);
+	LIST_HEAD(head);
 	int nocommit = how & FLUSH_NOCOMMIT;
-	int wait = how & FLUSH_WAIT;
-	int error;
-
-	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+	int pages, ret;
 
+	how &= ~FLUSH_NOCOMMIT;
+	spin_lock(&nfsi->req_lock);
 	do {
-		if (wait) {
-			error = nfs_wait_on_requests(inode, idx_start, npages);
-			if (error != 0)
-				continue;
-		}
-		error = nfs_flush_inode(inode, idx_start, npages, how);
-		if (error != 0)
+		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+		if (ret != 0)
 			continue;
-		if (!nocommit)
-			error = nfs_commit_inode(inode, how);
-	} while (error > 0);
-	return error;
+		pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+		if (pages != 0) {
+			spin_unlock(&nfsi->req_lock);
+			ret = nfs_flush_list(inode, &head, pages, how);
+			spin_lock(&nfsi->req_lock);
+			continue;
+		}
+		if (nocommit)
+			break;
+		pages = nfs_scan_commit(inode, &head, 0, 0);
+		if (pages == 0)
+			break;
+		spin_unlock(&nfsi->req_lock);
+		ret = nfs_commit_list(inode, &head, how);
+		spin_lock(&nfsi->req_lock);
+	} while (ret >= 0);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
 }
 
 int nfs_init_writepagecache(void)
-- 
cgit v1.2.3


From f38aa94224c5517a40ba56d453779f70d3229803 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 3 Nov 2005 15:57:06 +0000
Subject: [PATCH] Pass dentry, not just name, in fsnotify creation hooks.

The audit hooks (to be added shortly) will want to see dentry->d_inode
too, not just the name.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/namei.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 8dc2b038d5d..f6619af9e95 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1472,7 +1472,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1793,7 +1793,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1870,7 +1870,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
-		fsnotify_mkdir(dir, dentry->d_name.name);
+		fsnotify_mkdir(dir, dentry);
 	return error;
 }
 
@@ -2133,7 +2133,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 	DQUOT_INIT(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -2210,7 +2210,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&old_dentry->d_inode->i_mutex);
 	if (!error)
-		fsnotify_create(dir, new_dentry->d_name.name);
+		fsnotify_create(dir, new_dentry);
 	return error;
 }
 
-- 
cgit v1.2.3


From 73241ccca0f7786933f1d31b3d86f2456549953a Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 3 Nov 2005 16:00:25 +0000
Subject: [PATCH] Collect more inode information during syscall processing.

This patch augments the collection of inode info during syscall
processing. It represents part of the functionality that was provided
by the auditfs patch included in RHEL4.

Specifically, it:

- Collects information for target inodes created or removed during
  syscalls.  Previous code only collects information for the target
  inode's parent.

- Adds the audit_inode() hook to syscalls that operate on a file
  descriptor (e.g. fchown), enabling audit to do inode filtering for
  these calls.

- Modifies filtering code to check audit context for either an inode #
  or a parent inode # matching a given rule.

- Modifies logging to provide inode # for both parent and child.

- Protect debug info from NULL audit_names.name.

[AV: folded a later typo fix from the same author]

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c |  1 +
 fs/open.c  |  8 +++++++-
 fs/xattr.c | 11 +++++++++--
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index f6619af9e95..51cfc9c3ed0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1353,6 +1353,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
+	audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino);
 
 	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
 	if (error)
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e7..70510004d06 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/audit.h>
 
 #include <asm/unistd.h>
 
@@ -626,6 +627,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
+	audit_inode(NULL, inode, 0);
+
 	err = -EROFS;
 	if (IS_RDONLY(inode))
 		goto out_putf;
@@ -775,7 +778,10 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 
 	file = fget(fd);
 	if (file) {
-		error = chown_common(file->f_dentry, user, group);
+		struct dentry * dentry;
+		dentry = file->f_dentry;
+		audit_inode(NULL, dentry->d_inode, 0);
+		error = chown_common(dentry, user, group);
 		fput(file);
 	}
 	return error;
diff --git a/fs/xattr.c b/fs/xattr.c
index 80eca7d3d69..e416190f5e9 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/module.h>
 #include <linux/fsnotify.h>
+#include <linux/audit.h>
 #include <asm/uaccess.h>
 
 
@@ -234,12 +235,15 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 	      size_t size, int flags)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = setxattr(f->f_dentry, name, value, size, flags);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = setxattr(dentry, name, value, size, flags);
 	fput(f);
 	return error;
 }
@@ -458,12 +462,15 @@ asmlinkage long
 sys_fremovexattr(int fd, char __user *name)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = removexattr(f->f_dentry, name);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = removexattr(dentry, name);
 	fput(f);
 	return error;
 }
-- 
cgit v1.2.3


From 641e6f30a095f3752ed84fd9d279382f5d3ef4c1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 15:44:26 -0800
Subject: [PATCH] sysfs: sysfs_remove_dir() needs to invalidate the dentry

When calling sysfs_remove_dir() don't allow any further sysfs functions
to work for this kobject anymore.  This fixes a nasty USB cdc-acm oops
on disconnect.

Many thanks to Bob Copeland and Paul Fulghum for taking the time to
track this down.

Cc: Bob Copeland <email@bobcopeland.com>
Cc: Paul Fulghum <paulkf@microgate.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   | 1 +
 fs/sysfs/inode.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 49bd219275d..cfd290d3d6b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -302,6 +302,7 @@ void sysfs_remove_dir(struct kobject * kobj)
 	 * Drop reference from dget() on entrance.
 	 */
 	dput(dentry);
+	kobj->dentry = NULL;
 }
 
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 689f7bcfaf3..6beee6f6a67 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -227,12 +227,16 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
 void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 {
 	struct sysfs_dirent * sd;
-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
+	struct sysfs_dirent * parent_sd;
+
+	if (!dir)
+		return;
 
 	if (dir->d_inode == NULL)
 		/* no inode means this hasn't been made visible yet */
 		return;
 
+	parent_sd = dir->d_fsdata;
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
-- 
cgit v1.2.3


From 58383af629efb07e5a0694e445eda0c65b16e1de Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Mon, 6 Feb 2006 14:12:43 -0800
Subject: [PATCH] kobj_map semaphore to mutex conversion

Convert the kobj_map code to use a mutex instead of a semaphore.  It
converts the single two users as well, genhd.c and char_dev.c.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/char_dev.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index 21195c48163..5c36345c9bf 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -19,6 +19,7 @@
 #include <linux/kobject.h>
 #include <linux/kobj_map.h>
 #include <linux/cdev.h>
+#include <linux/mutex.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -28,7 +29,7 @@ static struct kobj_map *cdev_map;
 
 #define MAX_PROBE_HASH 255	/* random */
 
-static DECLARE_MUTEX(chrdevs_lock);
+static DEFINE_MUTEX(chrdevs_lock);
 
 static struct char_device_struct {
 	struct char_device_struct *next;
@@ -88,13 +89,13 @@ out:
 
 void *acquire_chrdev_list(void)
 {
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 	return get_next_chrdev(NULL);
 }
 
 void release_chrdev_list(void *dev)
 {
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	kfree(dev);
 }
 
@@ -151,7 +152,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 
 	memset(cd, 0, sizeof(struct char_device_struct));
 
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 
 	/* temporary */
 	if (major == 0) {
@@ -186,10 +187,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	}
 	cd->next = *cp;
 	*cp = cd;
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	return cd;
 out:
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	kfree(cd);
 	return ERR_PTR(ret);
 }
@@ -200,7 +201,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 	struct char_device_struct *cd = NULL, **cp;
 	int i = major_to_index(major);
 
-	down(&chrdevs_lock);
+	mutex_lock(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
 		if ((*cp)->major == major &&
 		    (*cp)->baseminor == baseminor &&
@@ -210,7 +211,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 		cd = *cp;
 		*cp = cd->next;
 	}
-	up(&chrdevs_lock);
+	mutex_unlock(&chrdevs_lock);
 	return cd;
 }
 
-- 
cgit v1.2.3


From 58d49283b87751f7af75e021a629dcddb027e8eb Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Wed, 22 Feb 2006 11:18:15 +0100
Subject: [PATCH] sysfs: kzalloc conversion

this converts fs/sysfs to kzalloc() usage.
compile tested with make allyesconfig

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c  | 3 +--
 fs/sysfs/inode.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d0e3d849516..e21f4022feb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -301,9 +301,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	/* No error? Great, allocate a buffer for the file, and store it
 	 * it in file->private_data for easy access.
 	 */
-	buffer = kmalloc(sizeof(struct sysfs_buffer),GFP_KERNEL);
+	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
 	if (buffer) {
-		memset(buffer,0,sizeof(struct sysfs_buffer));
 		init_MUTEX(&buffer->sem);
 		buffer->needs_read_fill = 1;
 		buffer->ops = ops;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 6beee6f6a67..4c29ac41ac3 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -54,11 +54,10 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 
 	if (!sd_iattr) {
 		/* setting attributes for the first time, allocate now */
-		sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+		sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
 		if (!sd_iattr)
 			return -ENOMEM;
 		/* assign default attributes */
-		memset(sd_iattr, 0, sizeof(struct iattr));
 		sd_iattr->ia_mode = sd->s_mode;
 		sd_iattr->ia_uid = 0;
 		sd_iattr->ia_gid = 0;
-- 
cgit v1.2.3


From c516865cfbac0d862d4888df91793ad1e74ffd58 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Thu, 9 Mar 2006 19:40:14 +0530
Subject: [PATCH] sysfs: fix problem with duplicate sysfs directories and files

The following patch checks for existing sysfs_dirent before
preparing new one while creating sysfs directories and files.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c     | 32 +++++++++++++++++++++++++++++++-
 fs/sysfs/file.c    |  6 ++++--
 fs/sysfs/symlink.c |  5 +++--
 fs/sysfs/sysfs.h   |  1 +
 4 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index cfd290d3d6b..bea1f4c02b9 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -50,6 +50,32 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
 	return sd;
 }
 
+/**
+ *
+ * Return -EEXIST if there is already a sysfs element with the same name for
+ * the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
+			  const unsigned char *new)
+{
+	struct sysfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = sysfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
 			void * element, umode_t mode, int type)
 {
@@ -102,7 +128,11 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	mutex_lock(&p->d_inode->i_mutex);
 	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
-		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
+ 		if (sysfs_dirent_exist(p->d_fsdata, n))
+  			error = -EEXIST;
+  		else
+			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
+								SYSFS_DIR);
 		if (!error) {
 			error = sysfs_create(*d, mode, init_dir);
 			if (!error) {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e21f4022feb..5e83e724678 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -361,10 +361,12 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 {
 	struct sysfs_dirent * parent_sd = dir->d_fsdata;
 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
-	int error = 0;
+	int error = -EEXIST;
 
 	mutex_lock(&dir->d_inode->i_mutex);
-	error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
+	if (!sysfs_dirent_exist(parent_sd, attr->name))
+		error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
+					  mode, type);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 	return error;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index e38d6338a20..fe23f47f6e4 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -82,12 +82,13 @@ exit1:
 int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
 	struct dentry * dentry = kobj->dentry;
-	int error = 0;
+	int error = -EEXIST;
 
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	error = sysfs_add_link(dentry, name, target);
+	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
+		error = sysfs_add_link(dentry, name, target);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 	return error;
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3f8953e0e5d..cf11d5b789d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -5,6 +5,7 @@ extern kmem_cache_t *sysfs_dir_cachep;
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
+extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
 
-- 
cgit v1.2.3


From dd308bc355a1aa4f202fe9a3133b6c676cb9606c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 7 Mar 2006 21:41:59 +1100
Subject: [PATCH] debugfs: Add debugfs_create_blob() helper for exporting
 binary data

I wanted to export a binary blob via debugfs, and although it was pretty easy
it seems like it'd be easier if there was a helper for it. It's a pity we need
the wrapper struct but I can't see a cleaner way to do it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/file.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'fs')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index d575452cd9f..40c4fc973fa 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -251,3 +251,49 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_bool);
 
+static ssize_t read_file_blob(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	struct debugfs_blob_wrapper *blob = file->private_data;
+	return simple_read_from_buffer(user_buf, count, ppos, blob->data,
+			blob->size);
+}
+
+static struct file_operations fops_blob = {
+	.read =		read_file_blob,
+	.open =		default_open,
+};
+
+/**
+ * debugfs_create_blob - create a file in the debugfs filesystem that is
+ * used to read and write a binary blob.
+ *
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this paramater is NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer
+ *        to the blob data and the size of the data.
+ *
+ * This function creates a file in debugfs with the given name that exports
+ * @blob->data as a binary blob. If the @mode variable is so set it can be
+ * read from. Writing is not supported.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, NULL will be returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * returned.  It is not wise to check for this value, but rather, check for
+ * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * code.
+ */
+struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+				   struct dentry *parent,
+				   struct debugfs_blob_wrapper *blob)
+{
+	return debugfs_create_file(name, mode, parent, blob, &fops_blob);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_blob);
-- 
cgit v1.2.3


From 832c57e9afa7a263bb2f8ee6d04d527ef6709aae Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 11:23:21 -0700
Subject: [PATCH] sysfs: don't export dir symbols

These functions should only be used by the kobject core, and if any
driver tries to use them, bad things happen.  Unexport them to try to
prevent this from happening.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index bea1f4c02b9..9ee95686444 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -510,7 +510,3 @@ struct file_operations sysfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysfs_readdir,
 };
-
-EXPORT_SYMBOL_GPL(sysfs_create_dir);
-EXPORT_SYMBOL_GPL(sysfs_remove_dir);
-EXPORT_SYMBOL_GPL(sysfs_rename_dir);
-- 
cgit v1.2.3


From b3229087c5e08589cea4f5040dab56f7dc11332a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 16 Mar 2006 15:44:26 -0800
Subject: [PATCH] sysfs: fix a kobject leak in sysfs_add_link on the error path

As pointed out by Oliver Neukum.

Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index fe23f47f6e4..d2eac3ceed5 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -66,6 +66,7 @@ static int sysfs_add_link(struct dentry * parent, const char * name, struct kobj
 	if (!error)
 		return 0;
 
+	kobject_put(target);
 	kfree(sl->link_name);
 exit2:
 	kfree(sl);
-- 
cgit v1.2.3


From 7a1218a277c45cba1fb8d7089407a1769c645c43 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Mar 2006 18:11:10 -0500
Subject: SUNRPC: Ensure rpc_call_async() always calls tk_ops->rpc_release()

Currently this will not happen if we exit before rpc_new_task() was called.
Also fix up rpc_run_task() to do the same (for consistency).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 02c7d8c04c5..4aba15ad1d2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -605,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -708,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -2959,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->rpc_status = 0;
 
 	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_delegreturn_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status == 0) {
 		status = data->rpc_status;
@@ -3182,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		struct nfs_seqid *seqid)
 {
 	struct nfs4_unlockdata *data;
-	struct rpc_task *task;
 
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
@@ -3192,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 	/* Unlock _before_ we do the RPC call */
 	do_vfs_lock(fl->fl_file, fl);
-	task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
-	if (IS_ERR(task))
-		nfs4_locku_release_calldata(data);
-	return task;
+	return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 }
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3376,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		data->arg.reclaim = 1;
 	task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
 			&nfs4_lock_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_lock_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	ret = nfs4_wait_for_completion_rpc_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
-- 
cgit v1.2.3


From 096455a22acac06fb6d0d75f276170ab72d55ba6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:23:42 -0500
Subject: NFSv4: Dont list system.nfs4_acl for filesystems that don't support
 it.

Thanks to Frank Filz for pointing out that we list system.nfs4_acl extended
attribute even on filesystems where we don't actually support nfs4_acl.
This is inconsistent with the e.g. ext3 POSIX ACL behaviour, and seems to
annoy cp.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4aba15ad1d2..47ece1dd3c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3559,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
 {
 	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
 
+	if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+		return 0;
 	if (buf && buflen < len)
 		return -ERANGE;
 	if (buf)
-- 
cgit v1.2.3


From f3ee439f43381e45b191cf721b4a51d41f33301f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:24:13 -0500
Subject: LOCKD: nlmsvc_traverse_blocks return is unused

Note that we never return non-zero.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 3 +--
 fs/lockd/svcsubs.c | 5 +++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ce754efe284..d2b66bad7d5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -309,14 +309,13 @@ restart:
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
  */
-int
+void
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	if (action == NLM_ACT_MARK)
 		nlmsvc_act_mark(host, file);
 	else
 		nlmsvc_act_unlock(host, file);
-	return 0;
 }
 
 /*
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 601e5b3dfe2..2043011a1a2 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -224,8 +224,9 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
 		if (file->f_count || file->f_blocks || file->f_shares)
 			return 1;
 	} else {
-		if (nlmsvc_traverse_blocks(host, file, action)
-		 || nlmsvc_traverse_shares(host, file, action))
+		nlmsvc_traverse_blocks(host, file, action);
+
+		if (nlmsvc_traverse_shares(host, file, action))
 			return 1;
 	}
 	return nlm_traverse_locks(host, file, action);
-- 
cgit v1.2.3


From 5f12191bc000ea31970339a5f54c11087506711c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:24:25 -0500
Subject: LOCKD: Make nlmsvc_traverse_shares return void

The nlmsvc_traverse_shares return value is always zero, hence useless.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svcshare.c | 4 +---
 fs/lockd/svcsubs.c  | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836c..27288c83da9 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
  * Traverse all shares for a given file (and host).
  * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
  */
-int
+void
 nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	struct nlm_share	*share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 		}
 		shpp = &share->s_next;
 	}
-
-	return 0;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2043011a1a2..c7a6e3ae44d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -225,9 +225,7 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
 			return 1;
 	} else {
 		nlmsvc_traverse_blocks(host, file, action);
-
-		if (nlmsvc_traverse_shares(host, file, action))
-			return 1;
+		nlmsvc_traverse_shares(host, file, action);
 	}
 	return nlm_traverse_locks(host, file, action);
 }
-- 
cgit v1.2.3


From df6db302cb236ac3a683d535a3e2073d9f4b2833 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 20 Mar 2006 23:25:10 -0500
Subject: SUNRPC,RPCSEC_GSS: spkm3--fix config dependencies

Add default selection of CRYPTO_CAST5 when selecting RPCSEC_GSS_SPKM3.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index e9749b0eecd..9a4158f2a3a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1567,6 +1567,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CAST5
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
-- 
cgit v1.2.3


From 4de151d8cd2553e7e89044ab5d72fcad4eb04afb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 22 Mar 2006 00:13:35 +0100
Subject: It's UTF-8

Fix some comments to "UTF-8".

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/befs/linuxvfs.c | 2 +-
 fs/cifs/CHANGES    | 2 +-
 fs/fat/dir.c       | 2 +-
 fs/fat/inode.c     | 2 +-
 fs/isofs/joliet.c  | 2 +-
 fs/nls/Kconfig     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2d365cb8eec..dd6048ce053 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -561,7 +561,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
  * @sb: Superblock
  * @src: Input string buffer in NLS format
  * @srclen: Length of input string in bytes
- * @dest: The output string in UTF8 format
+ * @dest: The output string in UTF-8 format
  * @destlen: Length of the output buffer
  * 
  * Converts input string @src, which is in the format of the loaded NLS map,
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index d335015473a..cb68efba35d 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -160,7 +160,7 @@ improperly zeroed buffer in CIFS Unix extensions set times call.
 Version 1.25
 ------------
 Fix internationalization problem in cifs readdir with filenames that map to 
-longer UTF8 strings than the string on the wire was in Unicode.  Add workaround
+longer UTF-8 strings than the string on the wire was in Unicode.  Add workaround
 for readdir to netapp servers. Fix search rewind (seek into readdir to return 
 non-consecutive entries).  Do not do readdir when server negotiates 
 buffer size to small to fit filename. Add support for reading POSIX ACLs from
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index db0de5c621c..4095bc149eb 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -114,7 +114,7 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
 }
 
 /*
- * Convert Unicode 16 to UTF8, translated Unicode, or ASCII.
+ * Convert Unicode 16 to UTF-8, translated Unicode, or ASCII.
  * If uni_xlate is enabled and we can't get a 1:1 conversion, use a
  * colon as an escape character since it is normally invalid on the vfat
  * filesystem. The following four characters are the hexadecimal digits
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e7f4aa7fc68..e78d7b4842c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1101,7 +1101,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 			return -EINVAL;
 		}
 	}
-	/* UTF8 doesn't provide FAT semantics */
+	/* UTF-8 doesn't provide FAT semantics */
 	if (!strcmp(opts->iocharset, "utf8")) {
 		printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
 		       " for FAT filesystems, filesystem will be case sensitive!\n");
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 2931de7f1a6..81a90e170ac 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -11,7 +11,7 @@
 #include "isofs.h"
 
 /*
- * Convert Unicode 16 to UTF8 or ASCII.
+ * Convert Unicode 16 to UTF-8 or ASCII.
  */
 static int
 uni16_to_x8(unsigned char *ascii, u16 *uni, int len, struct nls_table *nls)
diff --git a/fs/nls/Kconfig b/fs/nls/Kconfig
index 0ab8f00bdbb..976ecccd6f5 100644
--- a/fs/nls/Kconfig
+++ b/fs/nls/Kconfig
@@ -491,7 +491,7 @@ config NLS_KOI8_U
 	  (koi8-u) and Belarusian (koi8-ru) character sets.
 
 config NLS_UTF8
-	tristate "NLS UTF8"
+	tristate "NLS UTF-8"
 	depends on NLS
 	help
 	  If you want to display filenames with native language characters
-- 
cgit v1.2.3


From f1fdc848aab7fb95b32e058b7f06cc07912b3734 Mon Sep 17 00:00:00 2001
From: Yingping Lu <yingping@sgi.com>
Date: Wed, 22 Mar 2006 12:44:15 +1100
Subject: [XFS] Fixing KDB's xrwtrc command, also added the current process id
 into the trace.

SGI-PV: 948300
SGI-Modid: xfs-linux-melb:xfs-kern:208069a

Signed-off-by: Yingping Lu <yingping@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 fs/xfs/linux-2.6/xfs_lrw.c  | 4 ++--
 fs/xfs/xfs_inode.c          | 8 ++++----
 fs/xfs/xfs_iomap.c          | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9bf130c63b..965757cf15e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -103,7 +103,7 @@ xfs_page_trace(
 		(void *)((unsigned long)delalloc),
 		(void *)((unsigned long)unmapped),
 		(void *)((unsigned long)unwritten),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL);
 }
 #else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 2a936bd38ce..0169360475c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -83,7 +83,7 @@ xfs_rw_enter_trace(
 		(void *)((unsigned long)ioflags),
 		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
@@ -113,7 +113,7 @@ xfs_inval_cached_trace(
 		(void *)((unsigned long)(first & 0xffffffff)),
 		(void *)((unsigned long)((last >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(last & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7d0ded05a46..2424a477794 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1372,10 +1372,10 @@ xfs_itrunc_trace(
 		     (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
 		     (void*)(unsigned long)(toss_finish & 0xffffffff),
 		     (void*)(unsigned long)current_cpu(),
-		     (void*)0,
-		     (void*)0,
-		     (void*)0,
-		     (void*)0);
+		     (void*)(unsigned long)current_pid(),
+		     (void*)NULL,
+		     (void*)NULL,
+		     (void*)NULL);
 }
 #else
 #define	xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 788917f355c..d5dfedcb892 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -76,7 +76,7 @@ xfs_iomap_enter_trace(
 		(void *)((unsigned long)count),
 		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
-		(void *)NULL,
+		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
 		(void *)NULL,
-- 
cgit v1.2.3


From 9fa8046f50bcb88ab9183ee1f22de5adc42bf92a Mon Sep 17 00:00:00 2001
From: Yingping Lu <yingping@sgi.com>
Date: Wed, 22 Mar 2006 12:44:35 +1100
Subject: [XFS] Fixing the error caused by the conflict between DIO Write's
 conversion and concurrent truncate operations. Use vn_iowait to wait for the
 completion of any pending DIOs. Since the truncate requires exclusive IOLOCK,
 so this blocks any further DIO operations since DIO write also needs
 exclusive IOBLOCK. This serves as a barrier and prevent any potential
 starvation.

SGI-PV: 947420
SGI-Modid: xfs-linux-melb:xfs-kern:208088a

Signed-off-by: Yingping Lu <yingping@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c    | 3 +++
 fs/xfs/xfs_vnodeops.c | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2424a477794..8d2b36879f0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1420,6 +1420,9 @@ xfs_itruncate_start(
 
 	mp = ip->i_mount;
 	vp = XFS_ITOV(ip);
+
+	vn_iowait(vp);  /* wait for the completion of any pending DIOs */
+	
 	/*
 	 * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers
 	 * overlapping the region being removed.  We have to use
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b5a44fe286..697bf22a84f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -615,6 +615,7 @@ xfs_setattr(
 			code = xfs_igrow_start(ip, vap->va_size, credp);
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		vn_iowait(vp); /* wait for the completion of any pending DIOs */
 		if (!code)
 			code = xfs_itruncate_data(ip, vap->va_size);
 		if (code) {
@@ -4310,8 +4311,10 @@ xfs_free_file_space(
 	ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
 	if (attr_flags & ATTR_NOLOCK)
 		need_iolock = 0;
-	if (need_iolock)
+	if (need_iolock) {
 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		vn_iowait(vp);	/* wait for the completion of any pending DIOs */
+	}
 
 	rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
 			(__uint8_t)NBPP);
-- 
cgit v1.2.3


From 38e2299a641d93d029eb559e096648ab75a22be2 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 22 Mar 2006 12:47:15 +1100
Subject: [XFS] Explain the race closed by the addition of vn_iowait() to the
 start of xfs_itruncate_start().

SGI-PV: 947420
SGI-Modid: xfs-linux-melb:xfs-kern:25527a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8d2b36879f0..88a517fad07 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1393,6 +1393,16 @@ xfs_itrunc_trace(
  * calling into the buffer/page cache code and we can't hold the
  * inode lock when we do so.
  *
+ * We need to wait for any direct I/Os in flight to complete before we
+ * proceed with the truncate. This is needed to prevent the extents
+ * being read or written by the direct I/Os from being removed while the
+ * I/O is in flight as there is no other method of synchronising
+ * direct I/O with the truncate operation.  Also, because we hold
+ * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
+ * started until the truncate completes and drops the lock. Essentially,
+ * the vn_iowait() call forms an I/O barrier that provides strict ordering
+ * between direct I/Os and the truncate operation.
+ *
  * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
  * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
  * in the case that the caller is locking things out of order and
-- 
cgit v1.2.3


From 3758dee9f64fa9692063e7167128f9cec3f5fd33 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 22 Mar 2006 12:47:28 +1100
Subject: [XFS] Fixup naming inconsistencies found by Pekka Enberg and one from
 Jan Engelhardt.

SGI-PV: 947038
SGI-Modid: xfs-linux-melb:xfs-kern:25529a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm_bhv.c |  4 ++--
 fs/xfs/support/ktrace.c   |  4 ++--
 fs/xfs/xfs_acl.h          |  4 ++--
 fs/xfs/xfs_vfsops.c       | 28 ++++++++++++++--------------
 4 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 90402a1c398..6838b36d95a 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -374,7 +374,7 @@ xfs_qm_exit(void)
 	vfs_bhv_clr_custom(&xfs_qmops);
 	xfs_qm_cleanup_procfs();
 	if (qm_dqzone)
-		kmem_cache_destroy(qm_dqzone);
+		kmem_zone_destroy(qm_dqzone);
 	if (qm_dqtrxzone)
-		kmem_cache_destroy(qm_dqtrxzone);
+		kmem_zone_destroy(qm_dqtrxzone);
 }
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 841aa4c15b8..addf5a7ea06 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -39,8 +39,8 @@ ktrace_init(int zentries)
 void
 ktrace_uninit(void)
 {
-	kmem_cache_destroy(ktrace_hdr_zone);
-	kmem_cache_destroy(ktrace_ent_zone);
+	kmem_zone_destroy(ktrace_hdr_zone);
+	kmem_zone_destroy(ktrace_ent_zone);
 }
 
 /*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index f9315bc960c..538d0d65b04 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -55,8 +55,8 @@ struct xfs_inode;
 
 extern struct kmem_zone *xfs_acl_zone;
 #define xfs_acl_zone_init(zone, name)	\
-		(zone) = kmem_zone_init(sizeof(xfs_acl_t), name)
-#define xfs_acl_zone_destroy(zone)	kmem_cache_destroy(zone)
+		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
+#define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
 
 extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *);
 extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 811a4261fa2..c40e5883db4 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -190,18 +190,18 @@ xfs_cleanup(void)
 	ktrace_free(xfs_alloc_trace_buf);
 #endif
 
-	kmem_cache_destroy(xfs_bmap_free_item_zone);
-	kmem_cache_destroy(xfs_btree_cur_zone);
-	kmem_cache_destroy(xfs_inode_zone);
-	kmem_cache_destroy(xfs_trans_zone);
-	kmem_cache_destroy(xfs_da_state_zone);
-	kmem_cache_destroy(xfs_dabuf_zone);
-	kmem_cache_destroy(xfs_buf_item_zone);
-	kmem_cache_destroy(xfs_efd_zone);
-	kmem_cache_destroy(xfs_efi_zone);
-	kmem_cache_destroy(xfs_ifork_zone);
-	kmem_cache_destroy(xfs_ili_zone);
-	kmem_cache_destroy(xfs_chashlist_zone);
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+	kmem_zone_destroy(xfs_btree_cur_zone);
+	kmem_zone_destroy(xfs_inode_zone);
+	kmem_zone_destroy(xfs_trans_zone);
+	kmem_zone_destroy(xfs_da_state_zone);
+	kmem_zone_destroy(xfs_dabuf_zone);
+	kmem_zone_destroy(xfs_buf_item_zone);
+	kmem_zone_destroy(xfs_efd_zone);
+	kmem_zone_destroy(xfs_efi_zone);
+	kmem_zone_destroy(xfs_ifork_zone);
+	kmem_zone_destroy(xfs_ili_zone);
+	kmem_zone_destroy(xfs_chashlist_zone);
 }
 
 /*
@@ -632,7 +632,7 @@ xfs_quiesce_fs(
 	xfs_mount_t		*mp)
 {
 	int			count = 0, pincount;
-		
+
 	xfs_refcache_purge_mp(mp);
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
 	xfs_finish_reclaim_all(mp, 0);
@@ -643,7 +643,7 @@ xfs_quiesce_fs(
 	 * meta data (typically directory updates).
 	 * Which then must be flushed and logged before
 	 * we can write the unmount record.
-	 */ 
+	 */
 	do {
 		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-- 
cgit v1.2.3


From 2ddee844eef48bf9240ebdfd6c5ffc4333c7d639 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 22 Mar 2006 12:47:40 +1100
Subject: [XFS] Check that a page has dirty buffers before finding it
 acceptable for rewrite clustering. This prevents writing excessive amounts of
 clean data when doing random rewrites of a cached file.

SGI-PV: 951193
SGI-Modid: xfs-linux-melb:xfs-kern:25531a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 965757cf15e..97fc056130e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -647,7 +647,7 @@ xfs_is_delayed_page(
 				acceptable = (type == IOMAP_UNWRITTEN);
 			else if (buffer_delay(bh))
 				acceptable = (type == IOMAP_DELAY);
-			else if (buffer_mapped(bh))
+			else if (buffer_dirty(bh) && buffer_mapped(bh))
 				acceptable = (type == 0);
 			else
 				break;
-- 
cgit v1.2.3


From e15f195cfb2fb1f2af0fdfc21277643deb26c0df Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 22 Mar 2006 12:47:52 +1100
Subject: [XFS] Reenable the noikeep (delete inode cluster space) option by
 default.

SGI-PV: 951200
SGI-Modid: xfs-linux-melb:xfs-kern:25535a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_vfsops.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index c40e5883db4..d4ec4dfaf19 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1698,10 +1698,7 @@ xfs_parseargs(
 	int			iosize;
 
 	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-
-#if 0	/* XXX: off by default, until some remaining issues ironed out */
-	args->flags |= XFSMNT_IDELETE; /* default to on */
-#endif
+	args->flags |= XFSMNT_IDELETE;
 
 	if (!options)
 		goto done;
@@ -1911,7 +1908,6 @@ xfs_showargs(
 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
 		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
 		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
-		{ XFS_MOUNT_IDELETE,		"," MNTOPT_NOIKEEP },
 		{ 0, NULL }
 	};
 	struct proc_xfs_info	*xfs_infop;
@@ -1947,6 +1943,8 @@ xfs_showargs(
 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
+	if (!(mp->m_flags & XFS_MOUNT_IDELETE))
+		seq_printf(m, "," MNTOPT_IKEEP);
 	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
 		seq_printf(m, "," MNTOPT_LARGEIO);
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
-- 
cgit v1.2.3


From bb19fba1937cb6ab2bb98ac893365f6ebf88ef1b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@bruce>
Date: Wed, 22 Mar 2006 14:12:12 +1100
Subject: [XFS] Sync up one/two other minor changes missed in previous merges.

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/Makefile-linux-2.6    | 40 +++++++++++++---------------------------
 fs/xfs/linux-2.6/xfs_super.c |  4 +---
 fs/xfs/xfs_dmapi.h           | 10 ----------
 fs/xfs/xfs_rw.h              |  1 +
 fs/xfs/xfs_vnodeops.c        |  6 +++---
 5 files changed, 18 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 97bd4743b46..5d73eaa1971 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -1,33 +1,19 @@
 #
-# Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (c) 2000-2005 Silicon Graphics, Inc.
+# All Rights Reserved.
 #
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of version 2 of the GNU General Public License as
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
 # published by the Free Software Foundation.
 #
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
 #
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like.  Any license provided herein, whether implied or
-# otherwise, applies only to this software file.  Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write the Free Software Foundation, Inc., 59
-# Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
-# Mountain View, CA  94043, or:
-#
-# http://www.sgi.com
-#
-# For further information regarding this notice, see:
-#
-# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
 EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
@@ -36,7 +22,7 @@ XFS_LINUX := linux-2.6
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
 	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-	EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
+	EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
 endif
 ifeq ($(CONFIG_XFS_TRACE),y)
 	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
@@ -50,7 +36,7 @@ ifeq ($(CONFIG_XFS_TRACE),y)
 	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
 	EXTRA_CFLAGS += -DXFS_LOG_TRACE
 	EXTRA_CFLAGS += -DXFS_RW_TRACE
-	EXTRA_CFLAGS += -DPAGEBUF_TRACE
+	EXTRA_CFLAGS += -DXFS_BUF_TRACE
 	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
 endif
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d9d28a965ba..8355faf8ffd 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -545,7 +545,7 @@ xfs_flush_device(
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
-#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE)
 STATIC void
 vfs_sync_worker(
 	vfs_t		*vfsp,
@@ -972,7 +972,6 @@ init_xfs_fs( void )
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
 		goto undo_register;
-	XFS_DM_INIT(&xfs_fs_type);
 	return 0;
 
 undo_register:
@@ -989,7 +988,6 @@ STATIC void __exit
 exit_xfs_fs( void )
 {
 	vfs_exitquota();
-	XFS_DM_EXIT(&xfs_fs_type);
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
 	xfs_buf_terminate();
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index b4c7f2bc55a..00b1540f810 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -191,14 +191,4 @@ typedef enum {
 
 extern struct bhv_vfsops xfs_dmops;
 
-#ifdef CONFIG_XFS_DMAPI
-void xfs_dm_init(struct file_system_type *);
-void xfs_dm_exit(struct file_system_type *);
-#define XFS_DM_INIT(fstype)	xfs_dm_init(fstype)
-#define XFS_DM_EXIT(fstype)	xfs_dm_exit(fstype)
-#else
-#define XFS_DM_INIT(fstype)
-#define XFS_DM_EXIT(fstype)
-#endif
-
 #endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index de85eefb796..e6379564447 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -89,6 +89,7 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
  */
 extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock);
 extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock);
+extern int xfs_setattr(bhv_desc_t *bdp, vattr_t *vap, int flags, cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
 				 xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 697bf22a84f..a478f42e63f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3187,7 +3187,7 @@ xfs_rmdir(
 
 	/* Fall through to std_return with error = 0 or the errno
 	 * from xfs_trans_commit. */
-std_return:
+ std_return:
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
 					dir_vp, DM_RIGHT_NULL,
@@ -3197,12 +3197,12 @@ std_return:
 	}
 	return error;
 
-error1:
+ error1:
 	xfs_bmap_cancel(&free_list);
 	cancel_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
 
-error_return:
+ error_return:
 	xfs_trans_cancel(tp, cancel_flags);
 	goto std_return;
 }
-- 
cgit v1.2.3


From 5e7a99ac452d7a4ce43b8bacb3495475e1f9fd71 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Wed, 22 Mar 2006 00:07:37 -0800
Subject: [PATCH] v9fs: assign dentry ops to negative dentries

If a file is not found in v9fs_vfs_lookup, the function creates negative
dentry, but doesn't assign any dentry ops.  This leaves the negative entry
in the cache (there is no d_delete to mark it for removal).  If the file is
created outside of the mounted v9fs filesystem, the file shows up in the
directory with weird permissions.

This patch assigns the default v9fs dentry ops to the negative dentry.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3ad8455f857..651a9e14d9a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
+	dentry->d_op = &v9fs_dentry_operations;
 	dirfid = v9fs_fid_lookup(dentry->d_parent);
 
 	if (!dirfid) {
@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto FreeFcall;
 
 	fid->qid = fcall->params.rstat.stat.qid;
-
-	dentry->d_op = &v9fs_dentry_operations;
 	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 
 	d_add(dentry, inode);
-- 
cgit v1.2.3


From ac2b898ca6fb06196a26869c23b66afe7944e52e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Wed, 22 Mar 2006 00:08:15 -0800
Subject: [PATCH] slab: Remove SLAB_NO_REAP option

SLAB_NO_REAP is documented as an option that will cause this slab not to be
reaped under memory pressure.  However, that is not what happens.  The only
thing that SLAB_NO_REAP controls at the moment is the reclaim of the unused
slab elements that were allocated in batch in cache_reap().  Cache_reap()
is run every few seconds independently of memory pressure.

Could we remove the whole thing?  Its only used by three slabs anyways and
I cannot find a reason for having this option.

There is an additional problem with SLAB_NO_REAP.  If set then the recovery
of objects from alien caches is switched off.  Objects not freed on the
same node where they were initially allocated will only be reused if a
certain amount of objects accumulates from one alien node (not very likely)
or if the cache is explicitly shrunk.  (Strangely __cache_shrink does not
check for SLAB_NO_REAP)

Getting rid of SLAB_NO_REAP fixes the problems with alien cache freeing.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ocfs2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8dd3aafec49..09e1c57a86a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(void)
 	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
 					     sizeof(struct ocfs2_journal_lock),
 					     0,
-					     SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
+					     SLAB_HWCACHE_ALIGN,
 					     NULL, NULL);
 	if (!ocfs2_lock_cache)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 84097518d1ecd2330f9488e4c2d09953a3340e74 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 22 Mar 2006 00:08:34 -0800
Subject: [PATCH] mm: nommu use compound pages

Now that compound page handling is properly fixed in the VM, move nommu
over to using compound pages rather than rolling their own refcounting.

nommu vm page refcounting is broken anyway, but there is no need to have
divergent code in the core VM now, nor when it gets fixed.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: David Howells <dhowells@redhat.com>

(Needs testing, please).
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/file-nommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 3f810acd0bf..b1ca234068f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -87,8 +87,7 @@ static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 	xpages = 1UL << order;
 	npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-	for (loop = 0; loop < npages; loop++)
-		set_page_count(pages + loop, 1);
+	split_page(pages, order);
 
 	/* trim off any pages we don't actually require */
 	for (loop = npages; loop < xpages; loop++)
-- 
cgit v1.2.3


From b45b5bd65f668a665db40d093e4e1fe563533608 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 22 Mar 2006 00:08:55 -0800
Subject: [PATCH] hugepage: Strict page reservation for hugepage inodes

These days, hugepages are demand-allocated at first fault time.  There's a
somewhat dubious (and racy) heuristic when making a new mmap() to check if
there are enough available hugepages to fully satisfy that mapping.

A particularly obvious case where the heuristic breaks down is where a
process maps its hugepages not as a single chunk, but as a bunch of
individually mmap()ed (or shmat()ed) blocks without touching and
instantiating the pages in between allocations.  In this case the size of
each block is compared against the total number of available hugepages.
It's thus easy for the process to become overcommitted, because each block
mapping will succeed, although the total number of hugepages required by
all blocks exceeds the number available.  In particular, this defeats such
a program which will detect a mapping failure and adjust its hugepage usage
downward accordingly.

The patch below addresses this problem, by strictly reserving a number of
physical hugepages for hugepage inodes which have been mapped, but not
instatiated.  MAP_SHARED mappings are thus "safe" - they will fail on
mmap(), not later with an OOM SIGKILL.  MAP_PRIVATE mappings can still
trigger an OOM.  (Actually SHARED mappings can technically still OOM, but
only if the sysadmin explicitly reduces the hugepage pool between mapping
and instantiation)

This patch appears to address the problem at hand - it allows DB2 to start
correctly, for instance, which previously suffered the failure described
above.

This patch causes no regressions on the libhugetblfs testsuite, and makes a
test (designed to catch this problem) pass which previously failed (ppc64,
POWER5).

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 74 ++++++++++++++++------------------------------------
 1 file changed, 22 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b3519528994..1a1c2fcb782 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -56,48 +56,10 @@ static void huge_pagevec_release(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-/*
- * huge_pages_needed tries to determine the number of new huge pages that
- * will be required to fully populate this VMA.  This will be equal to
- * the size of the VMA in huge pages minus the number of huge pages
- * (covered by this VMA) that are found in the page cache.
- *
- * Result is in bytes to be compatible with is_hugepage_mem_enough()
- */
-static unsigned long
-huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
-{
-	int i;
-	struct pagevec pvec;
-	unsigned long start = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
-	pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
-	pgoff_t endpg = next + hugepages;
-
-	pagevec_init(&pvec, 0);
-	while (next < endpg) {
-		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
-			break;
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			if (page->index > next)
-				next = page->index;
-			if (page->index >= endpg)
-				break;
-			next++;
-			hugepages--;
-		}
-		huge_pagevec_release(&pvec);
-	}
-	return hugepages << HPAGE_SHIFT;
-}
-
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long bytes;
+	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 	loff_t len, vma_len;
 	int ret;
 
@@ -113,10 +75,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
 		return -EINVAL;
 
-	bytes = huge_pages_needed(mapping, vma);
-	if (!is_hugepage_mem_enough(bytes))
-		return -ENOMEM;
-
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	mutex_lock(&inode->i_mutex);
@@ -129,6 +87,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 		goto out;
 
+	if (vma->vm_flags & VM_MAYSHARE)
+		if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0)
+			goto out;
+
 	ret = 0;
 	hugetlb_prefault_arch_hook(vma->vm_mm);
 	if (inode->i_size < len)
@@ -227,13 +189,18 @@ static void truncate_huge_page(struct page *page)
 	put_page(page);
 }
 
-static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
+static void truncate_hugepages(struct inode *inode, loff_t lstart)
 {
+	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
 	int i;
 
+	hugetlb_truncate_reservation(HUGETLBFS_I(inode),
+				     lstart >> HPAGE_SHIFT);
+	if (!mapping->nrpages)
+		return;
 	pagevec_init(&pvec, 0);
 	next = start;
 	while (1) {
@@ -262,8 +229,7 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 
 static void hugetlbfs_delete_inode(struct inode *inode)
 {
-	if (inode->i_data.nrpages)
-		truncate_hugepages(&inode->i_data, 0);
+	truncate_hugepages(inode, 0);
 	clear_inode(inode);
 }
 
@@ -296,8 +262,7 @@ static void hugetlbfs_forget_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
-	if (inode->i_data.nrpages)
-		truncate_hugepages(&inode->i_data, 0);
+	truncate_hugepages(inode, 0);
 	clear_inode(inode);
 	destroy_inode(inode);
 }
@@ -356,7 +321,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	spin_unlock(&mapping->i_mmap_lock);
-	truncate_hugepages(mapping, offset);
+	truncate_hugepages(inode, offset);
 	return 0;
 }
 
@@ -573,6 +538,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 	}
+	p->prereserved_hpages = 0;
 	return &p->vfs_inode;
 }
 
@@ -805,9 +771,6 @@ struct file *hugetlb_zero_setup(size_t size)
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
 
-	if (!is_hugepage_mem_enough(size))
-		return ERR_PTR(-ENOMEM);
-
 	if (!user_shm_lock(size, current->user))
 		return ERR_PTR(-ENOMEM);
 
@@ -831,6 +794,11 @@ struct file *hugetlb_zero_setup(size_t size)
 	if (!inode)
 		goto out_file;
 
+	error = -ENOMEM;
+	if (hugetlb_extend_reservation(HUGETLBFS_I(inode),
+				       size >> HPAGE_SHIFT) != 0)
+		goto out_inode;
+
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
@@ -841,6 +809,8 @@ struct file *hugetlb_zero_setup(size_t size)
 	file->f_mode = FMODE_WRITE | FMODE_READ;
 	return file;
 
+out_inode:
+	iput(inode);
 out_file:
 	put_filp(file);
 out_dentry:
-- 
cgit v1.2.3


From bba1e9b2111b14625f670bd07e57fd7ed57ce804 Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 22 Mar 2006 00:09:02 -0800
Subject: [PATCH] convert hugetlbfs_counter to atomic

Implementation of hugetlbfs_counter() is functionally equivalent to
atomic_inc_return().  Use the simpler atomic form.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1a1c2fcb782..25fa8bba8cb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -737,21 +737,6 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-/*
- * Return the next identifier for a shm file
- */
-static unsigned long hugetlbfs_counter(void)
-{
-	static DEFINE_SPINLOCK(lock);
-	static unsigned long counter;
-	unsigned long ret;
-
-	spin_lock(&lock);
-	ret = ++counter;
-	spin_unlock(&lock);
-	return ret;
-}
-
 static int can_do_hugetlb_shm(void)
 {
 	return likely(capable(CAP_IPC_LOCK) ||
@@ -767,6 +752,7 @@ struct file *hugetlb_zero_setup(size_t size)
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
 	char buf[16];
+	static atomic_t counter;
 
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
@@ -775,7 +761,7 @@ struct file *hugetlb_zero_setup(size_t size)
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
+	snprintf(buf, 16, "%u", atomic_inc_return(&counter));
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
-- 
cgit v1.2.3


From b20a35035f983f4ac7e29c4a68f30e43510007e0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 22 Mar 2006 00:09:12 -0800
Subject: [PATCH] page migration reorg

Centralize the page migration functions in anticipation of additional
tinkering.  Creates a new file mm/migrate.c

1. Extract buffer_migrate_page() from fs/buffer.c

2. Extract central migration code from vmscan.c

3. Extract some components from mempolicy.c

4. Export pageout() and remove_from_swap() from vmscan.c

5. Make it possible to configure NUMA systems without page migration
   and non-NUMA systems with page migration.

I had to so some #ifdeffing in mempolicy.c that may need a cleanup.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                | 62 ----------------------------------------------
 fs/xfs/linux-2.6/xfs_buf.c |  1 +
 2 files changed, 1 insertion(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index a9b39940200..1d3683d496f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3050,68 +3050,6 @@ asmlinkage long sys_bdflush(int func, long data)
 	return 0;
 }
 
-/*
- * Migration function for pages with buffers. This function can only be used
- * if the underlying filesystem guarantees that no other references to "page"
- * exist.
- */
-#ifdef CONFIG_MIGRATION
-int buffer_migrate_page(struct page *newpage, struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-	struct buffer_head *bh, *head;
-	int rc;
-
-	if (!mapping)
-		return -EAGAIN;
-
-	if (!page_has_buffers(page))
-		return migrate_page(newpage, page);
-
-	head = page_buffers(page);
-
-	rc = migrate_page_remove_references(newpage, page, 3);
-	if (rc)
-		return rc;
-
-	bh = head;
-	do {
-		get_bh(bh);
-		lock_buffer(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	ClearPagePrivate(page);
-	set_page_private(newpage, page_private(page));
-	set_page_private(page, 0);
-	put_page(page);
-	get_page(newpage);
-
-	bh = head;
-	do {
-		set_bh_page(bh, newpage, bh_offset(bh));
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	SetPagePrivate(newpage);
-
-	migrate_page_copy(newpage, page);
-
-	bh = head;
-	do {
-		unlock_buffer(bh);
- 		put_bh(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
-
-	return 0;
-}
-EXPORT_SYMBOL(buffer_migrate_page);
-#endif
-
 /*
  * Buffer-head allocation
  */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bfb4f2917bb..8cdfa415165 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/hash.h>
 #include <linux/kthread.h>
+#include <linux/migrate.h>
 #include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
-- 
cgit v1.2.3


From 67b1dfe77a2eb2a88b37cd77b8979cbdb7695bd6 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 14:57:43 +0000
Subject: NTFS: Fix an (innocent) off-by-one error in the runlist code.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  6 ++++++
 fs/ntfs/Makefile  |  2 +-
 fs/ntfs/namei.c   |  2 +-
 fs/ntfs/runlist.c | 12 ++++++++----
 fs/ntfs/super.c   |  2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9d8ffa89e2c..3d8d60be48d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -19,6 +19,12 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
+2.1.27 - Various bug fixes.
+
+	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
+	  reporting them.
+	- Fix an (innocent) off-by-one error in the runlist code.
+
 2.1.26 - Minor bug fixes and updates.
 
 	- Fix a potential overflow in file.c where a cast to s64 was missing in
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index d95fac7fdeb..e27b4eacffb 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.26\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.27\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 5ea9eb93af6..78e0cf738e2 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -2,7 +2,7 @@
  * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS
  *	     project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 061b5ff6b73..eb52b801512 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -381,6 +381,7 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
+	signed delta;
 	BOOL left = FALSE;	/* Left end of @src needs merging. */
 	BOOL right = FALSE;	/* Right end of @src needs merging. */
 	int tail;		/* Start of tail of @dst. */
@@ -396,11 +397,14 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		left = ntfs_are_rl_mergeable(dst + loc - 1, src);
 	/*
 	 * Allocate some space.  We will need less if the left, right, or both
-	 * ends get merged.
+	 * ends get merged.  The -1 accounts for the run being replaced.
 	 */
-	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
-	if (IS_ERR(dst))
-		return dst;
+	delta = ssize - 1 - left - right;
+	if (delta > 0) {
+		dst = ntfs_rl_realloc(dst, dsize, dsize + delta);
+		if (IS_ERR(dst))
+			return dst;
+	}
 	/*
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlists.
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 368a8ec1066..71c58eca580 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3234,7 +3234,7 @@ static void __exit exit_ntfs_fs(void)
 }
 
 MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2006 Anton Altaparmakov");
 MODULE_VERSION(NTFS_VERSION);
 MODULE_LICENSE("GPL");
 #ifdef DEBUG
-- 
cgit v1.2.3


From 3ccc7384db3d762e834dfdae13c1d6434b2fdeab Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:03:11 +0000
Subject: NTFS: Fix a buggette in an "should be impossible" case handling where
 we       continued the attribute lookup loop instead of aborting it.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/attrib.c  | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3d8d60be48d..d35a5c8e3da 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -24,6 +24,8 @@ ToDo/Notes:
 	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
 	  reporting them.
 	- Fix an (innocent) off-by-one error in the runlist code.
+	- Fix a buggette in an "should be impossible" case handling where we
+	  continued the attribute lookup loop instead of aborting it.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 9480a0526cd..a92b9e9db91 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
 /**
  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -1048,7 +1048,7 @@ do_next_attr_loop:
 				le32_to_cpu(ctx->mrec->bytes_allocated))
 			break;
 		if (a->type == AT_END)
-			continue;
+			break;
 		if (!a->length)
 			break;
 		if (al_entry->instance != a->instance)
-- 
cgit v1.2.3


From 78264bd9c239237fe356c32d08abf8e52a2d8737 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:06:18 +0000
Subject: NTFS: Use buffer_migrate_page() for the ->migratepage function of all
 ntfs        address space operations.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 +++-
 fs/ntfs/aops.c    | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index d35a5c8e3da..8df10700a93 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -19,13 +19,15 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
-2.1.27 - Various bug fixes.
+2.1.27 - Various bug fixes and cleanups.
 
 	- Fix two compiler warnings on Alpha.  Thanks to Andrew Morton for
 	  reporting them.
 	- Fix an (innocent) off-by-one error in the runlist code.
 	- Fix a buggette in an "should be impossible" case handling where we
 	  continued the attribute lookup loop instead of aborting it.
+	- Use buffer_migrate_page() for the ->migratepage function of all ntfs
+	  address space operations.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7e361da770b..7c7e313620f 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
@@ -1551,6 +1552,9 @@ struct address_space_operations ntfs_aops = {
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
 #endif /* NTFS_RW */
+	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
+						   one physical page to an
+						   other. */
 };
 
 /**
@@ -1567,6 +1571,9 @@ struct address_space_operations ntfs_mst_aops = {
 						   without touching the buffers
 						   belonging to the page. */
 #endif /* NTFS_RW */
+	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
+						   one physical page to an
+						   other. */
 };
 
 #ifdef NTFS_RW
-- 
cgit v1.2.3


From 949763b2b8822c6dc6da0d0e1d4af092152546c2 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:34:13 +0000
Subject: NTFS: Fix comparison of $MFT and $MFTMirr to not bail out when there
 are       unused, invalid mft records which are the same in both $MFT and    
   $MFTMirr.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/super.c   | 38 +++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 8df10700a93..548d9059a69 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -28,6 +28,9 @@ ToDo/Notes:
 	  continued the attribute lookup loop instead of aborting it.
 	- Use buffer_migrate_page() for the ->migratepage function of all ntfs
 	  address space operations.
+	- Fix comparison of $MFT and $MFTMirr to not bail out when there are
+	  unused, invalid mft records which are the same in both $MFT and
+	  $MFTMirr.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 71c58eca580..fd4aecc5548 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1099,26 +1099,38 @@ static BOOL check_mft_mirror(ntfs_volume *vol)
 			kmirr = page_address(mirr_page);
 			++index;
 		}
-		/* Make sure the record is ok. */
-		if (ntfs_is_baad_recordp((le32*)kmft)) {
-			ntfs_error(sb, "Incomplete multi sector transfer "
-					"detected in mft record %i.", i);
+		/* Do not check the record if it is not in use. */
+		if (((MFT_RECORD*)kmft)->flags & MFT_RECORD_IN_USE) {
+			/* Make sure the record is ok. */
+			if (ntfs_is_baad_recordp((le32*)kmft)) {
+				ntfs_error(sb, "Incomplete multi sector "
+						"transfer detected in mft "
+						"record %i.", i);
 mm_unmap_out:
-			ntfs_unmap_page(mirr_page);
+				ntfs_unmap_page(mirr_page);
 mft_unmap_out:
-			ntfs_unmap_page(mft_page);
-			return FALSE;
+				ntfs_unmap_page(mft_page);
+				return FALSE;
+			}
 		}
-		if (ntfs_is_baad_recordp((le32*)kmirr)) {
-			ntfs_error(sb, "Incomplete multi sector transfer "
-					"detected in mft mirror record %i.", i);
-			goto mm_unmap_out;
+		/* Do not check the mirror record if it is not in use. */
+		if (((MFT_RECORD*)kmirr)->flags & MFT_RECORD_IN_USE) {
+			if (ntfs_is_baad_recordp((le32*)kmirr)) {
+				ntfs_error(sb, "Incomplete multi sector "
+						"transfer detected in mft "
+						"mirror record %i.", i);
+				goto mm_unmap_out;
+			}
 		}
 		/* Get the amount of data in the current record. */
 		bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use);
-		if (!bytes || bytes > vol->mft_record_size) {
+		if (bytes < sizeof(MFT_RECORD_OLD) ||
+				bytes > vol->mft_record_size ||
+				ntfs_is_baad_recordp((le32*)kmft)) {
 			bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use);
-			if (!bytes || bytes > vol->mft_record_size)
+			if (bytes < sizeof(MFT_RECORD_OLD) ||
+					bytes > vol->mft_record_size ||
+					ntfs_is_baad_recordp((le32*)kmirr))
 				bytes = vol->mft_record_size;
 		}
 		/* Compare the two records. */
-- 
cgit v1.2.3


From 5be0e9511990dc307670dc66a42073db96b20f26 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 23 Mar 2006 02:59:19 -0800
Subject: [PATCH] proc: fix duplicate line in /proc/devices

Fix a duplicate block device line printed after the "Block device" header
in /proc/devices.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1d24fead51a..826c131994c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -312,7 +312,7 @@ static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
 		case BLK_HDR:
 			info->state = BLK_LIST;
 			(*pos)++;
-			break;
+			/*fallthrough*/
 		case BLK_LIST:
 			if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
 				/*
-- 
cgit v1.2.3


From d8733c2956968a01394a4d2a9e97a8b431a78776 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 23 Mar 2006 03:00:11 -0800
Subject: [PATCH] ext3_readdir: use generic readahead

Linus points out that ext3_readdir's readahead only cuts in when
ext3_readdir() is operating at the very start of the directory.  So for large
directories we end up performing no readahead at all and we suck.

So take it all out and use the core VM's page_cache_readahead().  This means
that ext3 directory reads will use all of readahead's dynamic sizing goop.

Note that we're using the directory's filp->f_ra to hold the readahead state,
but readahead is actually being performed against the underlying blockdev's
address_space.  Fortunately the readahead code is all set up to handle this.

Tested with printk.  It works.  I was struggling to find a real workload which
actually cared.

(The patch also exports page_cache_readahead() to GPL modules)

Cc: "Stephen C. Tweedie" <sct@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c   | 52 ++++++++++++++++++++++++----------------------------
 fs/ext3/inode.c |  2 +-
 2 files changed, 25 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 832867aef3d..773459164bb 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -95,11 +95,10 @@ static int ext3_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
-	struct buffer_head * bh, * tmp, * bha[16];
-	struct ext3_dir_entry_2 * de;
-	struct super_block * sb;
+	unsigned long offset;
+	int i, stored;
+	struct ext3_dir_entry_2 *de;
+	struct super_block *sb;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	int ret = 0;
@@ -124,12 +123,29 @@ static int ext3_readdir(struct file * filp,
 	}
 #endif
 	stored = 0;
-	bh = NULL;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
 	while (!error && !stored && filp->f_pos < inode->i_size) {
-		blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb);
-		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		struct buffer_head map_bh;
+		struct buffer_head *bh = NULL;
+
+		map_bh.b_state = 0;
+		err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
+		if (!err) {
+			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
+				&filp->f_ra,
+				filp,
+				map_bh.b_blocknr >>
+					(PAGE_CACHE_SHIFT - inode->i_blkbits),
+				1);
+			bh = ext3_bread(NULL, inode, blk, 0, &err);
+		}
+
+		/*
+		 * We ignore I/O errors on directories so users have a chance
+		 * of recovering data when there's a bad sector
+		 */
 		if (!bh) {
 			ext3_error (sb, "ext3_readdir",
 				"directory #%lu contains a hole at offset %lu",
@@ -138,26 +154,6 @@ static int ext3_readdir(struct file * filp,
 			continue;
 		}
 
-		/*
-		 * Do the readahead
-		 */
-		if (!offset) {
-			for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0;
-			     i > 0; i--) {
-				tmp = ext3_getblk (NULL, inode, ++blk, 0, &err);
-				if (tmp && !buffer_uptodate(tmp) &&
-						!buffer_locked(tmp))
-					bha[num++] = tmp;
-				else
-					brelse (tmp);
-			}
-			if (num) {
-				ll_rw_block (READA, num, bha);
-				for (i = 0; i < num; i++)
-					brelse (bha[i]);
-			}
-		}
-
 revalidate:
 		/* If the dir block has changed since the last call to
 		 * readdir(2), then we might be pointing to an invalid
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0384e539b88..d59d5a667b0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -671,7 +671,7 @@ err_out:
  * The BKL may not be held on entry here.  Be sure to take it early.
  */
 
-static int
+int
 ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		struct buffer_head *bh_result, int create, int extend_disksize)
 {
-- 
cgit v1.2.3


From 0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 23 Mar 2006 03:00:12 -0800
Subject: [PATCH] Shrinks sizeof(files_struct) and better layout

1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
   platforms, lowering kmalloc() allocated space by 50%.

2) Reduce the size of (files_struct), using a special 32 bits (or
   64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
   close_on_exec_init and open_fds_init fields.  This save some ram (248
   bytes per task) as most tasks dont open more than 32 files.  D-Cache
   footprint for such tasks is also reduced to the minimum.

3) Reduce size of allocated fdset.  Currently two full pages are
   allocated, that is 32768 bits on x86 for example, and way too much.  The
   minimum is now L1_CACHE_BYTES.

UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 ..  2] being in the
same cache line)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fcntl.c |  9 ++++-----
 fs/file.c  | 34 ++++++++++++++--------------------
 fs/open.c  |  8 ++++----
 3 files changed, 22 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index dc4a7007f4e..03c789560fb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -73,8 +73,8 @@ repeat:
 	 * orig_start..fdt->next_fd
 	 */
 	start = orig_start;
-	if (start < fdt->next_fd)
-		start = fdt->next_fd;
+	if (start < files->next_fd)
+		start = files->next_fd;
 
 	newfd = start;
 	if (start < fdt->max_fdset) {
@@ -102,9 +102,8 @@ repeat:
 	 * we reacquire the fdtable pointer and use it while holding
 	 * the lock, no one can free it during that time.
 	 */
-	fdt = files_fdtable(files);
-	if (start <= fdt->next_fd)
-		fdt->next_fd = newfd + 1;
+	if (start <= files->next_fd)
+		files->next_fd = newfd + 1;
 
 	error = newfd;
 	
diff --git a/fs/file.c b/fs/file.c
index cea7cbea11d..bbc74331473 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
+		fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
 		 * The fdtable was embedded
 		 */
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 
 void free_fdtable(struct fdtable *fdt)
 {
-	if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
-					fdt->max_fds > NR_OPEN_DEFAULT)
+	if (fdt->free_files ||
+		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
+		fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 		       (nfdt->max_fds - fdt->max_fds) *
 					sizeof(struct file *));
 	}
-	nfdt->next_fd = fdt->next_fd;
 }
 
 /*
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num)
 
 void free_fdset(fd_set *array, int num)
 {
-	int size = num / 8;
-
-	if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
+	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
 		return;
-	else if (size <= PAGE_SIZE)
+	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
 	else
 		vfree(array);
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr)
   	fd_set *new_openset = NULL, *new_execset = NULL;
 	struct file **new_fds;
 
-	fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
 	if (!fdt)
   		goto out;
-	memset(fdt, 0, sizeof(*fdt));
 
-	nfds = __FD_SETSIZE;
+	nfds = 8 * L1_CACHE_BYTES;
   	/* Expand to the max in easy steps */
-  	do {
-		if (nfds < (PAGE_SIZE * 8))
-			nfds = PAGE_SIZE * 8;
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-		}
-	} while (nfds <= nr);
+  	while (nfds <= nr) {
+		nfds = nfds * 2;
+		if (nfds > NR_OPEN)
+			nfds = NR_OPEN;
+	}
 
   	new_openset = alloc_fdset(nfds);
   	new_execset = alloc_fdset(nfds);
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e7..1091dadd6c3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -973,7 +973,7 @@ repeat:
 	fdt = files_fdtable(files);
  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
 				fdt->max_fdset,
-				fdt->next_fd);
+				files->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -998,7 +998,7 @@ repeat:
 
 	FD_SET(fd, fdt->open_fds);
 	FD_CLR(fd, fdt->close_on_exec);
-	fdt->next_fd = fd + 1;
+	files->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
 	if (fdt->fd[fd] != NULL) {
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
 	__FD_CLR(fd, fdt->open_fds);
-	if (fd < fdt->next_fd)
-		fdt->next_fd = fd;
+	if (fd < files->next_fd)
+		files->next_fd = fd;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
-- 
cgit v1.2.3


From 6362e4d4eda61efb04ac1cdae32e48ac6d90b701 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Mar 2006 03:00:17 -0800
Subject: [PATCH] Fix oops in invalidate_dquots()

When quota is being turned off we assumed that all the references to dquots
were already dropped.  That need not be true as inodes being deleted are
not on superblock's inodes list and hence we need not reach it when
removing quota references from inodes.  So invalidate_dquots() has to wait
for all the users of dquots (as quota is already marked as turned off, no
new references can be acquired and so this is bound to happen rather
early).  When we do this, we can also remove the iprune_sem locking as it
was protecting us against exactly the same problem when freeing inodes
icache memory.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 52 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 1966c890b48..9376a437898 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -118,8 +118,7 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
- *   > dquot->dq_lock > dqio_sem
+ *  i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
  * i_mutex on quota files is special (it's below dqio_sem)
  */
 
@@ -407,23 +406,49 @@ out_dqlock:
 
 /* Invalidate all dquots on the list. Note that this function is called after
  * quota is disabled and pointers from inodes removed so there cannot be new
- * quota users. Also because we hold dqonoff_sem there can be no quota users
- * for this sb+type at all. */
+ * quota users. There can still be some users of quotas due to inodes being
+ * just deleted or pruned by prune_icache() (those are not attached to any
+ * list). We have to wait for such users.
+ */
 static void invalidate_dquots(struct super_block *sb, int type)
 {
 	struct dquot *dquot, *tmp;
 
+restart:
 	spin_lock(&dq_list_lock);
 	list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
 		if (dquot->dq_sb != sb)
 			continue;
 		if (dquot->dq_type != type)
 			continue;
-#ifdef __DQUOT_PARANOIA
-		if (atomic_read(&dquot->dq_count))
-			BUG();
-#endif
-		/* Quota now has no users and it has been written on last dqput() */
+		/* Wait for dquot users */
+		if (atomic_read(&dquot->dq_count)) {
+			DEFINE_WAIT(wait);
+
+			atomic_inc(&dquot->dq_count);
+			prepare_to_wait(&dquot->dq_wait_unused, &wait,
+					TASK_UNINTERRUPTIBLE);
+			spin_unlock(&dq_list_lock);
+			/* Once dqput() wakes us up, we know it's time to free
+			 * the dquot.
+			 * IMPORTANT: we rely on the fact that there is always
+			 * at most one process waiting for dquot to free.
+			 * Otherwise dq_count would be > 1 and we would never
+			 * wake up.
+			 */
+			if (atomic_read(&dquot->dq_count) > 1)
+				schedule();
+			finish_wait(&dquot->dq_wait_unused, &wait);
+			dqput(dquot);
+			/* At this moment dquot() need not exist (it could be
+			 * reclaimed by prune_dqcache(). Hence we must
+			 * restart. */
+			goto restart;
+		}
+		/*
+		 * Quota now has no users and it has been written on last
+		 * dqput()
+		 */
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
@@ -540,6 +565,10 @@ we_slept:
 	if (atomic_read(&dquot->dq_count) > 1) {
 		/* We have more than one user... nothing to do */
 		atomic_dec(&dquot->dq_count);
+		/* Releasing dquot during quotaoff phase? */
+		if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+		    atomic_read(&dquot->dq_count) == 1)
+			wake_up(&dquot->dq_wait_unused);
 		spin_unlock(&dq_list_lock);
 		return;
 	}
@@ -581,6 +610,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 	INIT_LIST_HEAD(&dquot->dq_inuse);
 	INIT_HLIST_NODE(&dquot->dq_hash);
 	INIT_LIST_HEAD(&dquot->dq_dirty);
+	init_waitqueue_head(&dquot->dq_wait_unused);
 	dquot->dq_sb = sb;
 	dquot->dq_type = type;
 	atomic_set(&dquot->dq_count, 1);
@@ -732,13 +762,9 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 {
 	LIST_HEAD(tofree_head);
 
-	/* We need to be guarded against prune_icache to reach all the
-	 * inodes - otherwise some can be on the local list of prune_icache */
-	down(&iprune_sem);
 	down_write(&sb_dqopt(sb)->dqptr_sem);
 	remove_dquot_ref(sb, type, &tofree_head);
 	up_write(&sb_dqopt(sb)->dqptr_sem);
-	up(&iprune_sem);
 	put_dquot_list(&tofree_head);
 }
 
-- 
cgit v1.2.3


From 4f7a07b88726781e37c4c5f63db3a6ee3f8500d3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:19 -0800
Subject: [PATCH] convert fs/9p/ to mutexes, fix locking bugs

Convert fs/9p/mux.c from semaphore to mutex.

NOTE: fixed locking bugs in the process - the code was using semaphores
the other way around.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/mux.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index ea1134eb47c..8e8356c1c22 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -31,6 +31,7 @@
 #include <linux/poll.h>
 #include <linux/kthread.h>
 #include <linux/idr.h>
+#include <linux/mutex.h>
 
 #include "debug.h"
 #include "v9fs.h"
@@ -110,7 +111,7 @@ static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
 static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
 
-static DECLARE_MUTEX(v9fs_mux_task_lock);
+static DEFINE_MUTEX(v9fs_mux_task_lock);
 static struct workqueue_struct *v9fs_mux_wq;
 
 static int v9fs_mux_num;
@@ -166,7 +167,7 @@ static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 
 	dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
 		v9fs_mux_poll_task_num);
-	up(&v9fs_mux_task_lock);
+	mutex_lock(&v9fs_mux_task_lock);
 
 	n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
 	if (n > v9fs_mux_poll_task_num) {
@@ -225,7 +226,7 @@ static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 	}
 
 	v9fs_mux_num++;
-	down(&v9fs_mux_task_lock);
+	mutex_unlock(&v9fs_mux_task_lock);
 
 	return 0;
 }
@@ -235,7 +236,7 @@ static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
 	int i;
 	struct v9fs_mux_poll_task *vpt;
 
-	up(&v9fs_mux_task_lock);
+	mutex_lock(&v9fs_mux_task_lock);
 	vpt = m->poll_task;
 	list_del(&m->mux_list);
 	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
@@ -252,7 +253,7 @@ static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
 		v9fs_mux_poll_task_num--;
 	}
 	v9fs_mux_num--;
-	down(&v9fs_mux_task_lock);
+	mutex_unlock(&v9fs_mux_task_lock);
 }
 
 /**
-- 
cgit v1.2.3


From c039e3134ae62863bbc8e8429b29e3c43cf21b2a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:28 -0800
Subject: [PATCH] sem2mutex: blockdev #2

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 28 ++++++++++++++--------------
 fs/buffer.c    |  6 +++---
 fs/super.c     |  4 ++--
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6e50346fb1e..44d05e6e34d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -265,8 +265,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 	    SLAB_CTOR_CONSTRUCTOR)
 	{
 		memset(bdev, 0, sizeof(*bdev));
-		sema_init(&bdev->bd_sem, 1);
-		sema_init(&bdev->bd_mount_sem, 1);
+		mutex_init(&bdev->bd_mutex);
+		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
@@ -574,7 +574,7 @@ static int do_open(struct block_device *bdev, struct file *file)
 	}
 	owner = disk->fops->owner;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -605,21 +605,21 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			down(&whole->bd_sem);
+			mutex_lock(&whole->bd_mutex);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
 			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
 				whole->bd_part_count--;
-				up(&whole->bd_sem);
+				mutex_unlock(&whole->bd_mutex);
 				ret = -ENXIO;
 				goto out_first;
 			}
 			kobject_get(&p->kobj);
 			bdev->bd_part = p;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
-			up(&whole->bd_sem);
+			mutex_unlock(&whole->bd_mutex);
 		}
 	} else {
 		put_disk(disk);
@@ -633,13 +633,13 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			down(&bdev->bd_contains->bd_sem);
+			mutex_lock(&bdev->bd_contains->bd_mutex);
 			bdev->bd_contains->bd_part_count++;
-			up(&bdev->bd_contains->bd_sem);
+			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
 	}
 	bdev->bd_openers++;
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	unlock_kernel();
 	return 0;
 
@@ -652,7 +652,7 @@ out_first:
 	put_disk(disk);
 	module_put(owner);
 out:
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	unlock_kernel();
 	if (ret)
 		bdput(bdev);
@@ -714,7 +714,7 @@ int blkdev_put(struct block_device *bdev)
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	lock_kernel();
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
@@ -724,9 +724,9 @@ int blkdev_put(struct block_device *bdev)
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
 	} else {
-		down(&bdev->bd_contains->bd_sem);
+		mutex_lock(&bdev->bd_contains->bd_mutex);
 		bdev->bd_contains->bd_part_count--;
-		up(&bdev->bd_contains->bd_sem);
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
 	if (!bdev->bd_openers) {
 		struct module *owner = disk->fops->owner;
@@ -746,7 +746,7 @@ int blkdev_put(struct block_device *bdev)
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
 	return ret;
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 1d3683d496f..0d6ca7bac6c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -201,7 +201,7 @@ int fsync_bdev(struct block_device *bdev)
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
  *
- * This takes the block device bd_mount_sem to make sure no new mounts
+ * This takes the block device bd_mount_mutex to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
@@ -210,7 +210,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 {
 	struct super_block *sb;
 
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_mount_mutex);
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
@@ -264,7 +264,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		drop_super(sb);
 	}
 
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_mount_mutex);
 }
 EXPORT_SYMBOL(thaw_bdev);
 
diff --git a/fs/super.c b/fs/super.c
index e20b5580afd..8f9c9b3af70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -693,9 +693,9 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_mount_mutex);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_mount_mutex);
 	if (IS_ERR(s))
 		goto out;
 
-- 
cgit v1.2.3


From d3be915fc5e7d19a2283ad9b0fe0782a74675d0a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:29 -0800
Subject: [PATCH] sem2mutex: quota

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c      | 117 ++++++++++++++++++++++++++++----------------------------
 fs/ext3/super.c |   4 +-
 fs/quota.c      |   6 +--
 fs/quota_v2.c   |   2 +-
 fs/super.c      |   4 +-
 5 files changed, 67 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 9376a437898..acf07e581f8 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -103,12 +103,12 @@
  * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
  * for altering the flag i_mutex is also needed).  If operation is holding
  * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
- * dqonoff_sem.
+ * dqonoff_mutex.
  * This locking assures that:
  *   a) update/access to dquot pointers in inode is serialized
  *   b) everyone is guarded against invalidate_dquots()
  *
- * Each dquot has its dq_lock semaphore. Locked dquots might not be referenced
+ * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
  * from inodes (dquot_alloc_space() and such don't check the dq_lock).
  * Currently dquot is locked only when it is being read to memory (or space for
  * it is being allocated) on the first dqget() and when it is being released on
@@ -118,8 +118,9 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *  i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
- * i_mutex on quota files is special (it's below dqio_sem)
+ *   i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock >
+ *   dqio_mutex
+ * i_mutex on quota files is special (it's below dqio_mutex)
  */
 
 static DEFINE_SPINLOCK(dq_list_lock);
@@ -280,8 +281,8 @@ static inline void remove_inuse(struct dquot *dquot)
 
 static void wait_on_dquot(struct dquot *dquot)
 {
-	down(&dquot->dq_lock);
-	up(&dquot->dq_lock);
+	mutex_lock(&dquot->dq_lock);
+	mutex_unlock(&dquot->dq_lock);
 }
 
 #define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot))
@@ -320,8 +321,8 @@ int dquot_acquire(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dquot->dq_lock);
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dquot->dq_lock);
+	mutex_lock(&dqopt->dqio_mutex);
 	if (!test_bit(DQ_READ_B, &dquot->dq_flags))
 		ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot);
 	if (ret < 0)
@@ -342,8 +343,8 @@ int dquot_acquire(struct dquot *dquot)
 	}
 	set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
 out_iolock:
-	up(&dqopt->dqio_sem);
-	up(&dquot->dq_lock);
+	mutex_unlock(&dqopt->dqio_mutex);
+	mutex_unlock(&dquot->dq_lock);
 	return ret;
 }
 
@@ -355,7 +356,7 @@ int dquot_commit(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	spin_lock(&dq_list_lock);
 	if (!clear_dquot_dirty(dquot)) {
 		spin_unlock(&dq_list_lock);
@@ -372,7 +373,7 @@ int dquot_commit(struct dquot *dquot)
 			ret = ret2;
 	}
 out_sem:
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	return ret;
 }
 
@@ -384,11 +385,11 @@ int dquot_release(struct dquot *dquot)
 	int ret = 0, ret2 = 0;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	down(&dquot->dq_lock);
+	mutex_lock(&dquot->dq_lock);
 	/* Check whether we are not racing with some other dqget() */
 	if (atomic_read(&dquot->dq_count) > 1)
 		goto out_dqlock;
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	if (dqopt->ops[dquot->dq_type]->release_dqblk) {
 		ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot);
 		/* Write the info */
@@ -398,9 +399,9 @@ int dquot_release(struct dquot *dquot)
 			ret = ret2;
 	}
 	clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 out_dqlock:
-	up(&dquot->dq_lock);
+	mutex_unlock(&dquot->dq_lock);
 	return ret;
 }
 
@@ -464,7 +465,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
@@ -499,7 +500,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	spin_lock(&dq_list_lock);
 	dqstats.syncs++;
 	spin_unlock(&dq_list_lock);
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 }
@@ -540,7 +541,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 /*
  * Put reference to dquot
  * NOTE: If you change this function please check whether dqput_blocks() works right...
- * MUST be called with either dqptr_sem or dqonoff_sem held
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
 static void dqput(struct dquot *dquot)
 {
@@ -605,7 +606,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 		return NODQUOT;
 
 	memset((caddr_t)dquot, 0, sizeof(struct dquot));
-	sema_init(&dquot->dq_lock, 1);
+	mutex_init(&dquot->dq_lock);
 	INIT_LIST_HEAD(&dquot->dq_free);
 	INIT_LIST_HEAD(&dquot->dq_inuse);
 	INIT_HLIST_NODE(&dquot->dq_hash);
@@ -620,7 +621,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 
 /*
  * Get reference to dquot
- * MUST be called with either dqptr_sem or dqonoff_sem held
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
 static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
@@ -686,7 +687,7 @@ static int dqinit_needed(struct inode *inode, int type)
 	return 0;
 }
 
-/* This routine is guarded by dqonoff_sem semaphore */
+/* This routine is guarded by dqonoff_mutex mutex */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct list_head *p;
@@ -964,8 +965,8 @@ int dquot_initialize(struct inode *inode, int type)
 	unsigned int id = 0;
 	int cnt, ret = 0;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1028,8 +1029,8 @@ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_add:
 		inode_add_bytes(inode, number);
@@ -1077,8 +1078,8 @@ int dquot_alloc_inode(const struct inode *inode, unsigned long number)
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1121,8 +1122,8 @@ int dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_sub:
 		inode_sub_bytes(inode, number);
@@ -1157,8 +1158,8 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1197,8 +1198,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	    chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
 	char warntype[MAXQUOTAS];
 
-	/* First test before acquiring semaphore - solves deadlocks when we
-         * re-enter the quota code and are already holding the semaphore */
+	/* First test before acquiring mutex - solves deadlocks when we
+         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	/* Clear the arrays */
@@ -1292,9 +1293,9 @@ int dquot_commit_info(struct super_block *sb, int type)
 	int ret;
 	struct quota_info *dqopt = sb_dqopt(sb);
 
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	ret = dqopt->ops[type]->write_file_info(sb, type);
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	return ret;
 }
 
@@ -1350,7 +1351,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 	struct inode *toputinode[MAXQUOTAS];
 
 	/* We need to serialize quota_off() for device */
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -1379,7 +1380,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 		dqopt->info[cnt].dqi_bgrace = 0;
 		dqopt->ops[cnt] = NULL;
 	}
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 	/* Sync the superblock so that buffers with quota data are written to
 	 * disk (and so userspace sees correct data afterwards). */
 	if (sb->s_op->sync_fs)
@@ -1392,7 +1393,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 	 * changes done by userspace on the next quotaon() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (toputinode[cnt]) {
-			down(&dqopt->dqonoff_sem);
+			mutex_lock(&dqopt->dqonoff_mutex);
 			/* If quota was reenabled in the meantime, we have
 			 * nothing to do */
 			if (!sb_has_quota_enabled(sb, cnt)) {
@@ -1404,7 +1405,7 @@ int vfs_quota_off(struct super_block *sb, int type)
 				mark_inode_dirty(toputinode[cnt]);
 				iput(toputinode[cnt]);
 			}
-			up(&dqopt->dqonoff_sem);
+			mutex_unlock(&dqopt->dqonoff_mutex);
 		}
 	if (sb->s_bdev)
 		invalidate_bdev(sb->s_bdev, 0);
@@ -1445,7 +1446,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	/* And now flush the block cache so that kernel sees the changes */
 	invalidate_bdev(sb->s_bdev, 0);
 	mutex_lock(&inode->i_mutex);
-	down(&dqopt->dqonoff_sem);
+	mutex_lock(&dqopt->dqonoff_mutex);
 	if (sb_has_quota_enabled(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
@@ -1470,17 +1471,17 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	dqopt->ops[type] = fmt->qf_ops;
 	dqopt->info[type].dqi_format = fmt;
 	INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
-	down(&dqopt->dqio_sem);
+	mutex_lock(&dqopt->dqio_mutex);
 	if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
-		up(&dqopt->dqio_sem);
+		mutex_unlock(&dqopt->dqio_mutex);
 		goto out_file_init;
 	}
-	up(&dqopt->dqio_sem);
+	mutex_unlock(&dqopt->dqio_mutex);
 	mutex_unlock(&inode->i_mutex);
 	set_enable_flags(dqopt, type);
 
 	add_dquot_ref(sb, type);
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 
@@ -1488,7 +1489,7 @@ out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
 out_lock:
-	up(&dqopt->dqonoff_sem);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 	if (oldflags != -1) {
 		down_write(&dqopt->dqptr_sem);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -1576,14 +1577,14 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 {
 	struct dquot *dquot;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!(dquot = dqget(sb, id, type))) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	do_get_dqblk(dquot, di);
 	dqput(dquot);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1645,14 +1646,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 {
 	struct dquot *dquot;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!(dquot = dqget(sb, id, type))) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	do_set_dqblk(dquot, di);
 	dqput(dquot);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1661,9 +1662,9 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
   
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!sb_has_quota_enabled(sb, type)) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	mi = sb_dqopt(sb)->info + type;
@@ -1673,7 +1674,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
 	ii->dqi_valid = IIF_ALL;
 	spin_unlock(&dq_data_lock);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
@@ -1682,9 +1683,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
 
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!sb_has_quota_enabled(sb, type)) {
-		up(&sb_dqopt(sb)->dqonoff_sem);
+		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
 	mi = sb_dqopt(sb)->info + type;
@@ -1699,7 +1700,7 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
 	sb->dq_op->write_info(sb, type);
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 56bf7658601..efa83205914 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2382,8 +2382,8 @@ static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
  * Process 1                         Process 2
  * ext3_create()                     quota_sync()
  *   journal_start()                   write_dquot()
- *   DQUOT_INIT()                        down(dqio_sem)
- *     down(dqio_sem)                    journal_start()
+ *   DQUOT_INIT()                        down(dqio_mutex)
+ *     down(dqio_mutex)                    journal_start()
  *
  */
 
diff --git a/fs/quota.c b/fs/quota.c
index ba9e0bf32f6..d6a2be826e2 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -170,10 +170,10 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 	/* Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes. We need i_mutex and so we could
-	 * not do it inside dqonoff_sem. Moreover we need to be carefull
+	 * not do it inside dqonoff_mutex. Moreover we need to be carefull
 	 * about races with quotaoff() (that is the reason why we have own
 	 * reference to inode). */
-	down(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		discard[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -182,7 +182,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 			continue;
 		discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
 	}
-	up(&sb_dqopt(sb)->dqonoff_sem);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (discard[cnt]) {
 			mutex_lock(&discard[cnt]->i_mutex);
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b4199ec3ece..c519a583e68 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -394,7 +394,7 @@ static int v2_write_dquot(struct dquot *dquot)
 	ssize_t ret;
 	struct v2_disk_dqblk ddquot, empty;
 
-	/* dq_off is guarded by dqio_sem */
+	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
 		if ((ret = dq_insert_tree(dquot)) < 0) {
 			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
diff --git a/fs/super.c b/fs/super.c
index 8f9c9b3af70..9cc6545dfa4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -77,8 +77,8 @@ static struct super_block *alloc_super(void)
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
 		sema_init(&s->s_vfs_rename_sem,1);
-		sema_init(&s->s_dquot.dqio_sem, 1);
-		sema_init(&s->s_dquot.dqonoff_sem, 1);
+		mutex_init(&s->s_dquot.dqio_mutex);
+		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
 		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
-- 
cgit v1.2.3


From d4f9af9dac4ecb75818f909168f87b441cc95653 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:30 -0800
Subject: [PATCH] sem2mutex: inotify

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Acked-by: Robert Love <rml@novell.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c   |   2 +-
 fs/inotify.c | 110 +++++++++++++++++++++++++++++------------------------------
 2 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index d0be6159eb7..603e93ef0c6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -206,7 +206,7 @@ void inode_init_once(struct inode *inode)
 	i_size_ordered_init(inode);
 #ifdef CONFIG_INOTIFY
 	INIT_LIST_HEAD(&inode->inotify_watches);
-	sema_init(&inode->inotify_sem, 1);
+	mutex_init(&inode->inotify_mutex);
 #endif
 }
 
diff --git a/fs/inotify.c b/fs/inotify.c
index 3041503bde0..60d9653d55b 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -56,8 +56,8 @@ int inotify_max_queued_events;
  * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
  * iprune_sem (synchronize shrink_icache_memory())
  * 	inode_lock (protects the super_block->s_inodes list)
- * 	inode->inotify_sem (protects inode->inotify_watches and watches->i_list)
- * 		inotify_dev->sem (protects inotify_device and watches->d_list)
+ * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
+ * 		inotify_dev->mutex (protects inotify_device and watches->d_list)
  */
 
 /*
@@ -79,12 +79,12 @@ int inotify_max_queued_events;
 /*
  * struct inotify_device - represents an inotify instance
  *
- * This structure is protected by the semaphore 'sem'.
+ * This structure is protected by the mutex 'mutex'.
  */
 struct inotify_device {
 	wait_queue_head_t 	wq;		/* wait queue for i/o */
 	struct idr		idr;		/* idr mapping wd -> watch */
-	struct semaphore	sem;		/* protects this bad boy */
+	struct mutex		mutex;		/* protects this bad boy */
 	struct list_head 	events;		/* list of queued events */
 	struct list_head	watches;	/* list of watches */
 	atomic_t		count;		/* reference count */
@@ -101,7 +101,7 @@ struct inotify_device {
  * device.  In read(), this list is walked and all events that can fit in the
  * buffer are returned.
  *
- * Protected by dev->sem of the device in which we are queued.
+ * Protected by dev->mutex of the device in which we are queued.
  */
 struct inotify_kernel_event {
 	struct inotify_event	event;	/* the user-space event */
@@ -112,8 +112,8 @@ struct inotify_kernel_event {
 /*
  * struct inotify_watch - represents a watch request on a specific inode
  *
- * d_list is protected by dev->sem of the associated watch->dev.
- * i_list and mask are protected by inode->inotify_sem of the associated inode.
+ * d_list is protected by dev->mutex of the associated watch->dev.
+ * i_list and mask are protected by inode->inotify_mutex of the associated inode.
  * dev, inode, and wd are never written to once the watch is created.
  */
 struct inotify_watch {
@@ -261,7 +261,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
 /*
  * inotify_dev_get_event - return the next event in the given dev's queue
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static inline struct inotify_kernel_event *
 inotify_dev_get_event(struct inotify_device *dev)
@@ -272,7 +272,7 @@ inotify_dev_get_event(struct inotify_device *dev)
 /*
  * inotify_dev_queue_event - add a new event to the given device
  *
- * Caller must hold dev->sem.  Can sleep (calls kernel_event()).
+ * Caller must hold dev->mutex.  Can sleep (calls kernel_event()).
  */
 static void inotify_dev_queue_event(struct inotify_device *dev,
 				    struct inotify_watch *watch, u32 mask,
@@ -315,7 +315,7 @@ static void inotify_dev_queue_event(struct inotify_device *dev,
 /*
  * remove_kevent - cleans up and ultimately frees the given kevent
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static void remove_kevent(struct inotify_device *dev,
 			  struct inotify_kernel_event *kevent)
@@ -332,7 +332,7 @@ static void remove_kevent(struct inotify_device *dev,
 /*
  * inotify_dev_event_dequeue - destroy an event on the given device
  *
- * Caller must hold dev->sem.
+ * Caller must hold dev->mutex.
  */
 static void inotify_dev_event_dequeue(struct inotify_device *dev)
 {
@@ -346,7 +346,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 /*
  * inotify_dev_get_wd - returns the next WD for use by the given dev
  *
- * Callers must hold dev->sem.  This function can sleep.
+ * Callers must hold dev->mutex.  This function can sleep.
  */
 static int inotify_dev_get_wd(struct inotify_device *dev,
 			      struct inotify_watch *watch)
@@ -383,7 +383,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 /*
  * create_watch - creates a watch on the given device.
  *
- * Callers must hold dev->sem.  Calls inotify_dev_get_wd() so may sleep.
+ * Callers must hold dev->mutex.  Calls inotify_dev_get_wd() so may sleep.
  * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
  */
 static struct inotify_watch *create_watch(struct inotify_device *dev,
@@ -434,7 +434,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 /*
  * inotify_find_dev - find the watch associated with the given inode and dev
  *
- * Callers must hold inode->inotify_sem.
+ * Callers must hold inode->inotify_mutex.
  */
 static struct inotify_watch *inode_find_dev(struct inode *inode,
 					    struct inotify_device *dev)
@@ -469,7 +469,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
  * the IN_IGNORED event to the given device signifying that the inode is no
  * longer watched.
  *
- * Callers must hold both inode->inotify_sem and dev->sem.  We drop a
+ * Callers must hold both inode->inotify_mutex and dev->mutex.  We drop a
  * reference to the inode before returning.
  *
  * The inode is not iput() so as to remain atomic.  If the inode needs to be
@@ -507,21 +507,21 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
 	if (!inotify_inode_watched(inode))
 		return;
 
-	down(&inode->inotify_sem);
+	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		u32 watch_mask = watch->mask;
 		if (watch_mask & mask) {
 			struct inotify_device *dev = watch->dev;
 			get_inotify_watch(watch);
-			down(&dev->sem);
+			mutex_lock(&dev->mutex);
 			inotify_dev_queue_event(dev, watch, mask, cookie, name);
 			if (watch_mask & IN_ONESHOT)
 				remove_watch_no_event(watch, dev);
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 			put_inotify_watch(watch);
 		}
 	}
-	up(&inode->inotify_sem);
+	mutex_unlock(&inode->inotify_mutex);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
 
@@ -626,16 +626,16 @@ void inotify_unmount_inodes(struct list_head *list)
 			iput(need_iput_tmp);
 
 		/* for each watch, send IN_UNMOUNT and then remove it */
-		down(&inode->inotify_sem);
+		mutex_lock(&inode->inotify_mutex);
 		watches = &inode->inotify_watches;
 		list_for_each_entry_safe(watch, next_w, watches, i_list) {
 			struct inotify_device *dev = watch->dev;
-			down(&dev->sem);
+			mutex_lock(&dev->mutex);
 			inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
 			remove_watch(watch, dev);
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 		}
-		up(&inode->inotify_sem);
+		mutex_unlock(&inode->inotify_mutex);
 		iput(inode);		
 
 		spin_lock(&inode_lock);
@@ -651,14 +651,14 @@ void inotify_inode_is_dead(struct inode *inode)
 {
 	struct inotify_watch *watch, *next;
 
-	down(&inode->inotify_sem);
+	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		struct inotify_device *dev = watch->dev;
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		remove_watch(watch, dev);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 	}
-	up(&inode->inotify_sem);
+	mutex_unlock(&inode->inotify_mutex);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
 
@@ -670,10 +670,10 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
 	int ret = 0;
 
 	poll_wait(file, &dev->wq, wait);
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	if (!list_empty(&dev->events))
 		ret = POLLIN | POLLRDNORM;
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	return ret;
 }
@@ -695,9 +695,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
 
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		events = !list_empty(&dev->events);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 		if (events) {
 			ret = 0;
 			break;
@@ -720,7 +720,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	while (1) {
 		struct inotify_kernel_event *kevent;
 
@@ -750,7 +750,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 		remove_kevent(dev, kevent);
 	}
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	return ret;
 }
@@ -763,37 +763,37 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	 * Destroy all of the watches on this device.  Unfortunately, not very
 	 * pretty.  We cannot do a simple iteration over the list, because we
 	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_sem before dev->sem.  The following works.
+	 * hold inode->inotify_mutex before dev->mutex.  The following works.
 	 */
 	while (1) {
 		struct inotify_watch *watch;
 		struct list_head *watches;
 		struct inode *inode;
 
-		down(&dev->sem);
+		mutex_lock(&dev->mutex);
 		watches = &dev->watches;
 		if (list_empty(watches)) {
-			up(&dev->sem);
+			mutex_unlock(&dev->mutex);
 			break;
 		}
 		watch = list_entry(watches->next, struct inotify_watch, d_list);
 		get_inotify_watch(watch);
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 
 		inode = watch->inode;
-		down(&inode->inotify_sem);
-		down(&dev->sem);
+		mutex_lock(&inode->inotify_mutex);
+		mutex_lock(&dev->mutex);
 		remove_watch_no_event(watch, dev);
-		up(&dev->sem);
-		up(&inode->inotify_sem);
+		mutex_unlock(&dev->mutex);
+		mutex_unlock(&inode->inotify_mutex);
 		put_inotify_watch(watch);
 	}
 
 	/* destroy all of the events on this device */
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	while (!list_empty(&dev->events))
 		inotify_dev_event_dequeue(dev);
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
 	/* free this device: the put matching the get in inotify_init() */
 	put_inotify_dev(dev);
@@ -811,26 +811,26 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd)
 	struct inotify_watch *watch;
 	struct inode *inode;
 
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 	watch = idr_find(&dev->idr, wd);
 	if (unlikely(!watch)) {
-		up(&dev->sem);
+		mutex_unlock(&dev->mutex);
 		return -EINVAL;
 	}
 	get_inotify_watch(watch);
 	inode = watch->inode;
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 
-	down(&inode->inotify_sem);
-	down(&dev->sem);
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&dev->mutex);
 
 	/* make sure that we did not race */
 	watch = idr_find(&dev->idr, wd);
 	if (likely(watch))
 		remove_watch(watch, dev);
 
-	up(&dev->sem);
-	up(&inode->inotify_sem);
+	mutex_unlock(&dev->mutex);
+	mutex_unlock(&inode->inotify_mutex);
 	put_inotify_watch(watch);
 
 	return 0;
@@ -905,7 +905,7 @@ asmlinkage long sys_inotify_init(void)
 	INIT_LIST_HEAD(&dev->events);
 	INIT_LIST_HEAD(&dev->watches);
 	init_waitqueue_head(&dev->wq);
-	sema_init(&dev->sem, 1);
+	mutex_init(&dev->mutex);
 	dev->event_count = 0;
 	dev->queue_size = 0;
 	dev->max_events = inotify_max_queued_events;
@@ -960,8 +960,8 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	inode = nd.dentry->d_inode;
 	dev = filp->private_data;
 
-	down(&inode->inotify_sem);
-	down(&dev->sem);
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&dev->mutex);
 
 	if (mask & IN_MASK_ADD)
 		mask_add = 1;
@@ -998,8 +998,8 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	list_add(&watch->i_list, &inode->inotify_watches);
 	ret = watch->wd;
 out:
-	up(&dev->sem);
-	up(&inode->inotify_sem);
+	mutex_unlock(&dev->mutex);
+	mutex_unlock(&inode->inotify_mutex);
 	path_release(&nd);
 fput_and_out:
 	fput_light(filp, fput_needed);
-- 
cgit v1.2.3


From 144efe3e3e5ad57af549bf800fa4560d7c74e9fe Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:32 -0800
Subject: [PATCH] sem2mutex: eventpoll

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4284cd31eba..f5d69f46ba9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
 #include <linux/eventpoll.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -46,7 +47,7 @@
  * LOCKING:
  * There are three level of locking required by epoll :
  *
- * 1) epsem (semaphore)
+ * 1) epmutex (mutex)
  * 2) ep->sem (rw_semaphore)
  * 3) ep->lock (rw_lock)
  *
@@ -67,9 +68,9 @@
  * if a file has been pushed inside an epoll set and it is then
  * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
  * It is possible to drop the "ep->sem" and to use the global
- * semaphore "epsem" (together with "ep->lock") to have it working,
+ * semaphore "epmutex" (together with "ep->lock") to have it working,
  * but having "ep->sem" will make the interface more scalable.
- * Events that require holding "epsem" are very rare, while for
+ * Events that require holding "epmutex" are very rare, while for
  * normal operations the epoll private "ep->sem" will guarantee
  * a greater scalability.
  */
@@ -274,7 +275,7 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-static struct semaphore epsem;
+static struct mutex epmutex;
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
@@ -477,10 +478,10 @@ void eventpoll_release_file(struct file *file)
 	 * cleanup path, and this means that noone is using this file anymore.
 	 * The only hit might come from ep_free() but by holding the semaphore
 	 * will correctly serialize the operation. We do need to acquire
-	 * "ep->sem" after "epsem" because ep_remove() requires it when called
+	 * "ep->sem" after "epmutex" because ep_remove() requires it when called
 	 * from anywhere but ep_free().
 	 */
-	down(&epsem);
+	mutex_lock(&epmutex);
 
 	while (!list_empty(lsthead)) {
 		epi = list_entry(lsthead->next, struct epitem, fllink);
@@ -492,7 +493,7 @@ void eventpoll_release_file(struct file *file)
 		up_write(&ep->sem);
 	}
 
-	up(&epsem);
+	mutex_unlock(&epmutex);
 }
 
 
@@ -819,9 +820,9 @@ static void ep_free(struct eventpoll *ep)
 	 * We do not need to hold "ep->sem" here because the epoll file
 	 * is on the way to be removed and no one has references to it
 	 * anymore. The only hit might come from eventpoll_release_file() but
-	 * holding "epsem" is sufficent here.
+	 * holding "epmutex" is sufficent here.
 	 */
-	down(&epsem);
+	mutex_lock(&epmutex);
 
 	/*
 	 * Walks through the whole tree by unregistering poll callbacks.
@@ -843,7 +844,7 @@ static void ep_free(struct eventpoll *ep)
 		ep_remove(ep, epi);
 	}
 
-	up(&epsem);
+	mutex_unlock(&epmutex);
 }
 
 
@@ -1615,7 +1616,7 @@ static int __init eventpoll_init(void)
 {
 	int error;
 
-	init_MUTEX(&epsem);
+	mutex_init(&epmutex);
 
 	/* Initialize the structure used to perform safe poll wait head wake ups */
 	ep_poll_safewake_init(&psw);
-- 
cgit v1.2.3


From a11f3a0574a5734db3e5de38922430d005d35118 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:33 -0800
Subject: [PATCH] sem2mutex: vfs_rename_mutex

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/dir.c     |  8 ++++----
 fs/cifs/fcntl.c   |  4 ++--
 fs/cifs/file.c    |  4 ++--
 fs/cifs/inode.c   | 16 ++++++++--------
 fs/cifs/link.c    | 16 ++++++++--------
 fs/cifs/readdir.c |  4 ++--
 fs/cifs/xattr.c   | 16 ++++++++--------
 fs/namei.c        | 12 ++++++------
 fs/super.c        |  2 +-
 9 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fed55e3c53d..632561dd9c5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -138,9 +138,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -317,9 +317,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if(full_path == NULL)
 		rc = -ENOMEM;
 	else if (pTcon->ses->capabilities & CAP_UNIX) {
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index a7a47bb36bf..ec4dfe9bf5e 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,9 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 675bd256829..165d6742638 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -203,9 +203,9 @@ int cifs_open(struct inode *inode, struct file *file)
 		}
 	}
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 59359911f48..ff93a9f81d1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -574,9 +574,9 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 
 	/* Unlink can be called from rename so we can not grab the sem here
 	   since we deadlock otherwise */
-/*	down(&direntry->d_sb->s_vfs_rename_sem);*/
+/*	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/
 	full_path = build_path_from_dentry(direntry);
-/*	up(&direntry->d_sb->s_vfs_rename_sem);*/
+/*	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -718,9 +718,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -803,9 +803,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -1137,9 +1137,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 			rc = 0;
 	}
 		
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 0f99aae3316..8d0da7c87c7 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,10 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
 /* No need to check for cross device links since server will do that
    BB note DFS case in future though (when we may have to check) */
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	fromName = build_path_from_dentry(old_file);
 	toName = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 	if((fromName == NULL) || (toName == NULL)) {
 		rc = -ENOMEM;
 		goto cifs_hl_exit;
@@ -103,9 +103,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 
 	xid = GetXid();
 
-	down(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&direntry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
 
 	if (!full_path)
 		goto out_no_free;
@@ -164,9 +164,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&inode->i_sb->s_vfs_rename_sem);
+	mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		FreeXid(xid);
@@ -232,9 +232,9 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 
 /* BB would it be safe against deadlock to grab this sem 
       even though rename itself grabs the sem and calls lookup? */
-/*       down(&inode->i_sb->s_vfs_rename_sem);*/
+/*       mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/
 	full_path = build_path_from_dentry(direntry);
-/*       up(&inode->i_sb->s_vfs_rename_sem);*/
+/*       mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/
 
 	if(full_path == NULL) {
 		FreeXid(xid);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 288cc048d37..edb3b6eb34b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,9 @@ static int initiate_cifs_search(const int xid, struct file *file)
 	if(pTcon == NULL)
 		return -EINVAL;
 
-	down(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(file->f_dentry);
-	up(&file->f_dentry->d_sb->s_vfs_rename_sem);
+	mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
 
 	if(full_path == NULL) {
 		return -ENOMEM;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 777e3363c2a..3938444d87b 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,9 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
                                                                                      
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -116,9 +116,9 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -223,9 +223,9 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -341,9 +341,9 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
 	cifs_sb = CIFS_SB(sb);
 	pTcon = cifs_sb->tcon;
 
-	down(&sb->s_vfs_rename_sem);
+	mutex_lock(&sb->s_vfs_rename_mutex);
 	full_path = build_path_from_dentry(direntry);
-	up(&sb->s_vfs_rename_sem);
+	mutex_unlock(&sb->s_vfs_rename_mutex);
 	if(full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
diff --git a/fs/namei.c b/fs/namei.c
index 8dc2b038d5d..c72b940797f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -104,7 +104,7 @@
  */
 /*
  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
- * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
+ * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
  * any extra contention...
  */
 
@@ -1422,7 +1422,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 		return NULL;
 	}
 
-	down(&p1->d_inode->i_sb->s_vfs_rename_sem);
+	mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
 
 	for (p = p1; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p2) {
@@ -1450,7 +1450,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 	mutex_unlock(&p1->d_inode->i_mutex);
 	if (p1 != p2) {
 		mutex_unlock(&p2->d_inode->i_mutex);
-		up(&p1->d_inode->i_sb->s_vfs_rename_sem);
+		mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
 	}
 }
 
@@ -2277,17 +2277,17 @@ asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
  *	a) we can get into loop creation. Check is done in is_subdir().
  *	b) race potential - two innocent renames can create a loop together.
  *	   That's where 4.4 screws up. Current fix: serialization on
- *	   sb->s_vfs_rename_sem. We might be more accurate, but that's another
+ *	   sb->s_vfs_rename_mutex. We might be more accurate, but that's another
  *	   story.
  *	c) we have to lock _three_ objects - parents and victim (if it exists).
  *	   And that - after we got ->i_mutex on parents (until then we don't know
  *	   whether the target exists).  Solution: try to be smart with locking
  *	   order for inodes.  We rely on the fact that tree topology may change
- *	   only under ->s_vfs_rename_sem _and_ that parent of the object we
+ *	   only under ->s_vfs_rename_mutex _and_ that parent of the object we
  *	   move will be locked.  Thus we can rank directories by the tree
  *	   (ancestors first) and rank all non-directories after them.
  *	   That works since everybody except rename does "lock parent, lookup,
- *	   lock child" and rename is under ->s_vfs_rename_sem.
+ *	   lock child" and rename is under ->s_vfs_rename_mutex.
  *	   HOWEVER, it relies on the assumption that any object with ->lookup()
  *	   has no more than 1 dentry.  If "hybrid" objects will ever appear,
  *	   we'd better make sure that there's no link(2) for them.
diff --git a/fs/super.c b/fs/super.c
index 9cc6545dfa4..425861cb1ca 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,7 +76,7 @@ static struct super_block *alloc_super(void)
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
-		sema_init(&s->s_vfs_rename_sem,1);
+		mutex_init(&s->s_vfs_rename_mutex);
 		mutex_init(&s->s_dquot.dqio_mutex);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
-- 
cgit v1.2.3


From f24075bd0c1cd1cc2cf86d394f960aa0401de573 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:34 -0800
Subject: [PATCH] sem2mutex: iprune

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c   | 16 ++++++++--------
 fs/inotify.c |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 603e93ef0c6..25967b67903 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,14 +84,14 @@ static struct hlist_head *inode_hashtable;
 DEFINE_SPINLOCK(inode_lock);
 
 /*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
+ * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
  * icache shrinking path, and the umount path.  Without this exclusion,
  * by the time prune_icache calls iput for the inode whose pages it has
  * been invalidating, or by the time it calls clear_inode & destroy_inode
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
  */
-DECLARE_MUTEX(iprune_sem);
+DEFINE_MUTEX(iprune_mutex);
 
 /*
  * Statistics gathering..
@@ -319,7 +319,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 		/*
 		 * We can reschedule here without worrying about the list's
 		 * consistency because the per-sb list of inodes must not
-		 * change during umount anymore, and because iprune_sem keeps
+		 * change during umount anymore, and because iprune_mutex keeps
 		 * shrink_icache_memory() away.
 		 */
 		cond_resched_lock(&inode_lock);
@@ -355,14 +355,14 @@ int invalidate_inodes(struct super_block * sb)
 	int busy;
 	LIST_HEAD(throw_away);
 
-	down(&iprune_sem);
+	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
-	up(&iprune_sem);
+	mutex_unlock(&iprune_mutex);
 
 	return busy;
 }
@@ -377,7 +377,7 @@ int __invalidate_device(struct block_device *bdev)
 	if (sb) {
 		/*
 		 * no need to lock the super, get_super holds the
-		 * read semaphore so the filesystem cannot go away
+		 * read mutex so the filesystem cannot go away
 		 * under us (->put_super runs with the write lock
 		 * hold).
 		 */
@@ -423,7 +423,7 @@ static void prune_icache(int nr_to_scan)
 	int nr_scanned;
 	unsigned long reap = 0;
 
-	down(&iprune_sem);
+	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
@@ -459,7 +459,7 @@ static void prune_icache(int nr_to_scan)
 	spin_unlock(&inode_lock);
 
 	dispose_list(&freeable);
-	up(&iprune_sem);
+	mutex_unlock(&iprune_mutex);
 
 	if (current_is_kswapd())
 		mod_page_state(kswapd_inodesteal, reap);
diff --git a/fs/inotify.c b/fs/inotify.c
index 60d9653d55b..0ee39ef591c 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -54,7 +54,7 @@ int inotify_max_queued_events;
  * Lock ordering:
  *
  * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
- * iprune_sem (synchronize shrink_icache_memory())
+ * iprune_mutex (synchronize shrink_icache_memory())
  * 	inode_lock (protects the super_block->s_inodes list)
  * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
  * 		inotify_dev->mutex (protects inotify_device and watches->d_list)
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
  * @list: list of inodes being unmounted (sb->s_inodes)
  *
  * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay.
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  * We temporarily drop inode_lock, however, and CAN block.
  */
 void inotify_unmount_inodes(struct list_head *list)
@@ -618,7 +618,7 @@ void inotify_unmount_inodes(struct list_head *list)
 		 * We can safely drop inode_lock here because we hold
 		 * references on both inode and next_i.  Also no new inodes
 		 * will be added since the umount has begun.  Finally,
-		 * iprune_sem keeps shrink_icache_memory() away.
+		 * iprune_mutex keeps shrink_icache_memory() away.
 		 */
 		spin_unlock(&inode_lock);
 
-- 
cgit v1.2.3


From 2c68ee754c40099c59828e59618a54726f76126a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:35 -0800
Subject: [PATCH] sem2mutex: jbd, j_checkpoint_mutex

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/checkpoint.c  | 4 ++--
 fs/jbd/journal.c     | 4 ++--
 fs/jbd/transaction.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 543ed543d1e..3f5102b069d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -85,7 +85,7 @@ void __log_wait_for_space(journal_t *journal)
 		if (journal->j_flags & JFS_ABORT)
 			return;
 		spin_unlock(&journal->j_state_lock);
-		down(&journal->j_checkpoint_sem);
+		mutex_lock(&journal->j_checkpoint_mutex);
 
 		/*
 		 * Test again, another process may have checkpointed while we
@@ -98,7 +98,7 @@ void __log_wait_for_space(journal_t *journal)
 			log_do_checkpoint(journal);
 			spin_lock(&journal->j_state_lock);
 		}
-		up(&journal->j_checkpoint_sem);
+		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 }
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e4b516ac498..95a628d8cac 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -659,8 +659,8 @@ static journal_t * journal_init_common (void)
 	init_waitqueue_head(&journal->j_wait_checkpoint);
 	init_waitqueue_head(&journal->j_wait_commit);
 	init_waitqueue_head(&journal->j_wait_updates);
-	init_MUTEX(&journal->j_barrier);
-	init_MUTEX(&journal->j_checkpoint_sem);
+	mutex_init(&journal->j_barrier);
+	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
 	spin_lock_init(&journal->j_state_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ca917973c2c..5fc40888f4c 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -455,7 +455,7 @@ void journal_lock_updates(journal_t *journal)
 	 * to make sure that we serialise special journal-locked operations
 	 * too.
 	 */
-	down(&journal->j_barrier);
+	mutex_lock(&journal->j_barrier);
 }
 
 /**
@@ -470,7 +470,7 @@ void journal_unlock_updates (journal_t *journal)
 {
 	J_ASSERT(journal->j_barrier_count != 0);
 
-	up(&journal->j_barrier);
+	mutex_unlock(&journal->j_barrier);
 	spin_lock(&journal->j_state_lock);
 	--journal->j_barrier_count;
 	spin_unlock(&journal->j_state_lock);
-- 
cgit v1.2.3


From 7cf34c761db8827818a27e23c50756f8b821a9b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:36 -0800
Subject: [PATCH] sem2mutex: fs/libfs.c

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/libfs.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/libfs.c b/fs/libfs.c
index 71fd08fa410..4fdeaceb892 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -7,6 +7,8 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
+#include <linux/mutex.h>
+
 #include <asm/uaccess.h>
 
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -530,7 +532,7 @@ struct simple_attr {
 	char set_buf[24];
 	void *data;
 	const char *fmt;	/* format for read operation */
-	struct semaphore sem;	/* protects access to these buffers */
+	struct mutex mutex;	/* protects access to these buffers */
 };
 
 /* simple_attr_open is called by an actual attribute open file operation
@@ -549,7 +551,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
 	attr->set = set;
 	attr->data = inode->u.generic_ip;
 	attr->fmt = fmt;
-	init_MUTEX(&attr->sem);
+	mutex_init(&attr->mutex);
 
 	file->private_data = attr;
 
@@ -575,7 +577,7 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
 	if (!attr->get)
 		return -EACCES;
 
-	down(&attr->sem);
+	mutex_lock(&attr->mutex);
 	if (*ppos) /* continued read */
 		size = strlen(attr->get_buf);
 	else	  /* first read */
@@ -584,7 +586,7 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
 				 (unsigned long long)attr->get(attr->data));
 
 	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
-	up(&attr->sem);
+	mutex_unlock(&attr->mutex);
 	return ret;
 }
 
@@ -602,7 +604,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 	if (!attr->set)
 		return -EACCES;
 
-	down(&attr->sem);
+	mutex_lock(&attr->mutex);
 	ret = -EFAULT;
 	size = min(sizeof(attr->set_buf) - 1, len);
 	if (copy_from_user(attr->set_buf, buf, size))
@@ -613,7 +615,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 	val = simple_strtol(attr->set_buf, NULL, 0);
 	attr->set(attr->data, val);
 out:
-	up(&attr->sem);
+	mutex_unlock(&attr->mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 0ac1759abc69fb62438c30a7e422f628a1120d67 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:37 -0800
Subject: [PATCH] sem2mutex: fs/seq_file.c

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/seq_file.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7c40570b71d..555b9ac04c2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -37,7 +37,7 @@ int seq_open(struct file *file, struct seq_operations *op)
 		file->private_data = p;
 	}
 	memset(p, 0, sizeof(*p));
-	sema_init(&p->sem, 1);
+	mutex_init(&p->lock);
 	p->op = op;
 
 	/*
@@ -71,7 +71,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 	void *p;
 	int err = 0;
 
-	down(&m->sem);
+	mutex_lock(&m->lock);
 	/*
 	 * seq_file->op->..m_start/m_stop/m_next may do special actions
 	 * or optimisations based on the file->f_version, so we want to
@@ -164,7 +164,7 @@ Done:
 	else
 		*ppos += copied;
 	file->f_version = m->version;
-	up(&m->sem);
+	mutex_unlock(&m->lock);
 	return copied;
 Enomem:
 	err = -ENOMEM;
@@ -237,7 +237,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	long long retval = -EINVAL;
 
-	down(&m->sem);
+	mutex_lock(&m->lock);
 	m->version = file->f_version;
 	switch (origin) {
 		case 1:
@@ -260,7 +260,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
 				}
 			}
 	}
-	up(&m->sem);
+	mutex_unlock(&m->lock);
 	file->f_version = m->version;
 	return retval;
 }
-- 
cgit v1.2.3


From 1eb0d67007e75697a7b87e6b611be935a991395c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:40 -0800
Subject: [PATCH] sem2mutex: JFFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/inode-v23.c | 86 ++++++++++++++++++++++++++---------------------------
 fs/jffs/intrep.c    |  6 ++--
 fs/jffs/jffs_fm.c   |  2 +-
 fs/jffs/jffs_fm.h   |  5 ++--
 4 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index fc3855a1aef..890d7ff7456 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -42,7 +42,7 @@
 #include <linux/quotaops.h>
 #include <linux/highmem.h>
 #include <linux/vfs.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
@@ -203,7 +203,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 	fmc = c->fmc;
 
 	D3(printk (KERN_NOTICE "notify_change(): down biglock\n"));
-	down(&fmc->biglock);
+	mutex_lock(&fmc->biglock);
 
 	f = jffs_find_file(c, inode->i_ino);
 
@@ -211,7 +211,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 		printk("jffs_setattr(): Invalid inode number: %lu\n",
 		       inode->i_ino);
 		D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 		res = -EINVAL;
 		goto out;
 	});
@@ -232,7 +232,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (!(new_node = jffs_alloc_node())) {
 		D(printk("jffs_setattr(): Allocation failed!\n"));
 		D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 		res = -ENOMEM;
 		goto out;
 	}
@@ -319,7 +319,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 		D(printk("jffs_notify_change(): The write failed!\n"));
 		jffs_free_node(new_node);
 		D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		goto out;
 	}
 
@@ -327,7 +327,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	mark_inode_dirty(inode);
 	D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 out:
 	unlock_kernel();
 	return res;
@@ -461,7 +461,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto jffs_rename_end;
 	}
 	D3(printk (KERN_NOTICE "rename(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	/* Create a node and initialize as much as needed.  */
 	result = -ENOMEM;
 	if (!(node = jffs_alloc_node())) {
@@ -555,7 +555,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 jffs_rename_end:
 	D3(printk (KERN_NOTICE "rename(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_rename()  */
@@ -574,14 +574,14 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	int ddino;
 	lock_kernel();
 	D3(printk (KERN_NOTICE "readdir(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	D2(printk("jffs_readdir(): inode: 0x%p, filp: 0x%p\n", inode, filp));
 	if (filp->f_pos == 0) {
 		D3(printk("jffs_readdir(): \".\" %lu\n", inode->i_ino));
 		if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) {
 			D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -598,7 +598,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
 		if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
 			D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -617,7 +617,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		if (filldir(dirent, f->name, f->nsize,
 			    filp->f_pos , f->ino, DT_UNKNOWN) < 0) {
 		        D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-			up(&c->fmc->biglock);
+			mutex_unlock(&c->fmc->biglock);
 			unlock_kernel();
 			return 0;
 		}
@@ -627,7 +627,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		} while(f && f->deleted);
 	}
 	D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return filp->f_pos;
 } /* jffs_readdir()  */
@@ -660,7 +660,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	});
 
 	D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	r = -ENAMETOOLONG;
 	if (len > JFFS_MAX_NAME_LEN) {
@@ -683,31 +683,31 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	if ((len == 1) && (name[0] == '.')) {
 		D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		if (!(inode = iget(dir->i_sb, d->ino))) {
 			D(printk("jffs_lookup(): . iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else if ((len == 2) && (name[0] == '.') && (name[1] == '.')) {
 	        D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
  		if (!(inode = iget(dir->i_sb, d->pino))) {
 			D(printk("jffs_lookup(): .. iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else if ((f = jffs_find_child(d, name, len))) {
 	        D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		if (!(inode = iget(dir->i_sb, f->ino))) {
 			D(printk("jffs_lookup(): iget() ==> NULL\n"));
 			goto jffs_lookup_end_no_biglock;
 		}
 		D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
-		down(&c->fmc->biglock);
+		mutex_lock(&c->fmc->biglock);
 	} else {
 		D3(printk("jffs_lookup(): Couldn't find the file. "
 			  "f = 0x%p, name = \"%s\", d = 0x%p, d->ino = %u\n",
@@ -717,13 +717,13 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	d_add(dentry, inode);
 	D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return NULL;
 
 jffs_lookup_end:
 	D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 jffs_lookup_end_no_biglock:
 	unlock_kernel();
@@ -753,7 +753,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 	ClearPageError(page);
 
 	D3(printk (KERN_NOTICE "readpage(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	read_len = 0;
 	result = 0;
@@ -782,7 +782,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 	kunmap(page);
 
 	D3(printk (KERN_NOTICE "readpage(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 	if (result) {
 	        SetPageError(page);
@@ -839,7 +839,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	c = dir_f->c;
 	D3(printk (KERN_NOTICE "mkdir(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	dir_mode = S_IFDIR | (mode & (S_IRWXUGO|S_ISVTX)
 			      & ~current->fs->umask);
@@ -906,7 +906,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	result = 0;
 jffs_mkdir_end:
 	D3(printk (KERN_NOTICE "mkdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_mkdir()  */
@@ -921,10 +921,10 @@ jffs_rmdir(struct inode *dir, struct dentry *dentry)
 	D3(printk("***jffs_rmdir()\n"));
 	D3(printk (KERN_NOTICE "rmdir(): down biglock\n"));
 	lock_kernel();
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	ret = jffs_remove(dir, dentry, S_IFDIR);
 	D3(printk (KERN_NOTICE "rmdir(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return ret;
 }
@@ -940,10 +940,10 @@ jffs_unlink(struct inode *dir, struct dentry *dentry)
 	lock_kernel();
 	D3(printk("***jffs_unlink()\n"));
 	D3(printk (KERN_NOTICE "unlink(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	ret = jffs_remove(dir, dentry, 0);
 	D3(printk (KERN_NOTICE "unlink(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return ret;
 }
@@ -1086,7 +1086,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	c = dir_f->c;
 
 	D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	/* Create and initialize a new node.  */
 	if (!(node = jffs_alloc_node())) {
@@ -1152,7 +1152,7 @@ jffs_mknod_err:
 
 jffs_mknod_end:
 	D3(printk (KERN_NOTICE "mknod(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return result;
 } /* jffs_mknod()  */
@@ -1203,7 +1203,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 		return -ENOMEM;
 	}
 	D3(printk (KERN_NOTICE "symlink(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	node->data_offset = 0;
 	node->removed_size = 0;
@@ -1253,7 +1253,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	d_instantiate(dentry, inode);
  jffs_symlink_end:
 	D3(printk (KERN_NOTICE "symlink(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return err;
 } /* jffs_symlink()  */
@@ -1306,7 +1306,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
 		return -ENOMEM;
 	}
 	D3(printk (KERN_NOTICE "create(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	node->data_offset = 0;
 	node->removed_size = 0;
@@ -1359,7 +1359,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
 	d_instantiate(dentry, inode);
  jffs_create_end:
 	D3(printk (KERN_NOTICE "create(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	unlock_kernel();
 	return err;
 } /* jffs_create()  */
@@ -1423,7 +1423,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	thiscount = min(c->fmc->max_chunk_size - sizeof(struct jffs_raw_inode), count);
 
 	D3(printk (KERN_NOTICE "file_write(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	/* Urgh. POSIX says we can do short writes if we feel like it. 
 	 * In practice, we can't. Nothing will cope. So we loop until
@@ -1511,7 +1511,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	}
  out:
 	D3(printk (KERN_NOTICE "file_write(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 
 	/* Fix things in the real inode.  */
 	if (pos > inode->i_size) {
@@ -1567,7 +1567,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		return -EIO;
 	}
 	D3(printk (KERN_NOTICE "ioctl(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 
 	switch (cmd) {
 	case JFFS_PRINT_HASH:
@@ -1609,7 +1609,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		ret = -ENOTTY;
 	}
 	D3(printk (KERN_NOTICE "ioctl(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 	return ret;
 } /* jffs_ioctl()  */
 
@@ -1685,12 +1685,12 @@ jffs_read_inode(struct inode *inode)
 	}
 	c = (struct jffs_control *)inode->i_sb->s_fs_info;
 	D3(printk (KERN_NOTICE "read_inode(): down biglock\n"));
-	down(&c->fmc->biglock);
+	mutex_lock(&c->fmc->biglock);
 	if (!(f = jffs_find_file(c, inode->i_ino))) {
 		D(printk("jffs_read_inode(): No such inode (%lu).\n",
 			 inode->i_ino));
 		D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
-		up(&c->fmc->biglock);
+		mutex_unlock(&c->fmc->biglock);
 		return;
 	}
 	inode->u.generic_ip = (void *)f;
@@ -1732,7 +1732,7 @@ jffs_read_inode(struct inode *inode)
 	}
 
 	D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
-	up(&c->fmc->biglock);
+	mutex_unlock(&c->fmc->biglock);
 }
 
 
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index ce7b54b0b2b..0ef207dfaf6 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -62,7 +62,7 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/pagemap.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <asm/byteorder.h>
 #include <linux/smp_lock.h>
 #include <linux/time.h>
@@ -3416,7 +3416,7 @@ jffs_garbage_collect_thread(void *ptr)
 		D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): collecting.\n"));
 
 		D3(printk (KERN_NOTICE "g_c_thread(): down biglock\n"));
-		down(&fmc->biglock);
+		mutex_lock(&fmc->biglock);
 		
 		D1(printk("***jffs_garbage_collect_thread(): round #%u, "
 			  "fmc->dirty_size = %u\n", i++, fmc->dirty_size));
@@ -3447,6 +3447,6 @@ jffs_garbage_collect_thread(void *ptr)
 		
 	gc_end:
 		D3(printk (KERN_NOTICE "g_c_thread(): up biglock\n"));
-		up(&fmc->biglock);
+		mutex_unlock(&fmc->biglock);
 	} /* for (;;) */
 } /* jffs_garbage_collect_thread() */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 6da13b309bd..7d8ca1aeace 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -139,7 +139,7 @@ jffs_build_begin(struct jffs_control *c, int unit)
 	fmc->tail = NULL;
 	fmc->head_extra = NULL;
 	fmc->tail_extra = NULL;
-	init_MUTEX(&fmc->biglock);
+	mutex_init(&fmc->biglock);
 	return fmc;
 }
 
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index f64151e7412..c794d923df2 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -20,10 +20,11 @@
 #ifndef __LINUX_JFFS_FM_H__
 #define __LINUX_JFFS_FM_H__
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/jffs.h>
 #include <linux/mtd/mtd.h>
-#include <linux/config.h>
+#include <linux/mutex.h>
 
 /* The alignment between two nodes in the flash memory.  */
 #define JFFS_ALIGN_SIZE 4
@@ -97,7 +98,7 @@ struct jffs_fmcontrol
 	struct jffs_fm *tail;
 	struct jffs_fm *head_extra;
 	struct jffs_fm *tail_extra;
-	struct semaphore biglock;
+	struct mutex biglock;
 };
 
 /* Notice the two members head_extra and tail_extra in the jffs_control
-- 
cgit v1.2.3


From 1d5599e397dcc7c2300d200e66dad326d7dbac38 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:41 -0800
Subject: [PATCH] sem2mutex: autofs4 wq_sem

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 ++-
 fs/autofs4/inode.c    |  2 +-
 fs/autofs4/waitq.c    | 16 ++++++++--------
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 385bed09b0d..f54c5b21f87 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -13,6 +13,7 @@
 /* Internal header file for autofs */
 
 #include <linux/auto_fs4.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
@@ -102,7 +103,7 @@ struct autofs_sb_info {
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
-	struct semaphore wq_sem;
+	struct mutex wq_mutex;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 };
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2d3082854a2..1ad98d48e55 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -269,7 +269,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
-	init_MUTEX(&sbi->wq_sem);
+	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
 	s->s_blocksize = 1024;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 394ff36ef8f..be78e9378c0 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -178,7 +178,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOENT;
 	}
 
-	if (down_interruptible(&sbi->wq_sem)) {
+	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
 		kfree(name);
 		return -EINTR;
 	}
@@ -194,7 +194,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		/* Can't wait for an expire if there's no mount */
 		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
 			kfree(name);
-			up(&sbi->wq_sem);
+			mutex_unlock(&sbi->wq_mutex);
 			return -ENOENT;
 		}
 
@@ -202,7 +202,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if ( !wq ) {
 			kfree(name);
-			up(&sbi->wq_sem);
+			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
 		}
 
@@ -218,10 +218,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
 		atomic_set(&wq->notified, 1);
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 	} else {
 		atomic_inc(&wq->wait_ctr);
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 		kfree(name);
 		DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
@@ -282,19 +282,19 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 {
 	struct autofs_wait_queue *wq, **wql;
 
-	down(&sbi->wq_sem);
+	mutex_lock(&sbi->wq_mutex);
 	for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
 		if ( wq->wait_queue_token == wait_queue_token )
 			break;
 	}
 
 	if ( !wq ) {
-		up(&sbi->wq_sem);
+		mutex_unlock(&sbi->wq_mutex);
 		return -EINVAL;
 	}
 
 	*wql = wq->next;	/* Unlink from chain */
-	up(&sbi->wq_sem);
+	mutex_unlock(&sbi->wq_mutex);
 	kfree(wq->name);
 	wq->name = NULL;	/* Do not wait on this queue */
 
-- 
cgit v1.2.3


From 7bf6d78dd93ccc52cd2cac5066c4b84834e4f1f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:42 -0800
Subject: [PATCH] sem2mutex: HPFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/hpfs_fn.h |  5 +++--
 fs/hpfs/inode.c   | 10 +++++-----
 fs/hpfs/namei.c   | 60 +++++++++++++++++++++++++++----------------------------
 fs/hpfs/super.c   |  4 ++--
 4 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 6628c3b352c..4c6473ab3b3 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -9,6 +9,7 @@
 //#define DBG
 //#define DEBUG_LOCKS
 
+#include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/hpfs_fs.h>
@@ -57,8 +58,8 @@ struct hpfs_inode_info {
 	unsigned i_ea_uid : 1;	/* file's uid is stored in ea */
 	unsigned i_ea_gid : 1;	/* file's gid is stored in ea */
 	unsigned i_dirty : 1;
-	struct semaphore i_sem;
-	struct semaphore i_parent;
+	struct mutex i_mutex;
+	struct mutex i_parent_mutex;
 	loff_t **i_rddir_off;
 	struct inode vfs_inode;
 };
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index e3d17e9ea6c..56f2c338c4d 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -186,9 +186,9 @@ void hpfs_write_inode(struct inode *i)
 		kfree(hpfs_inode->i_rddir_off);
 		hpfs_inode->i_rddir_off = NULL;
 	}
-	down(&hpfs_inode->i_parent);
+	mutex_lock(&hpfs_inode->i_parent_mutex);
 	if (!i->i_nlink) {
-		up(&hpfs_inode->i_parent);
+		mutex_unlock(&hpfs_inode->i_parent_mutex);
 		return;
 	}
 	parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -199,14 +199,14 @@ void hpfs_write_inode(struct inode *i)
 			hpfs_read_inode(parent);
 			unlock_new_inode(parent);
 		}
-		down(&hpfs_inode->i_sem);
+		mutex_lock(&hpfs_inode->i_mutex);
 		hpfs_write_inode_nolock(i);
-		up(&hpfs_inode->i_sem);
+		mutex_unlock(&hpfs_inode->i_mutex);
 		iput(parent);
 	} else {
 		mark_inode_dirty(i);
 	}
-	up(&hpfs_inode->i_parent);
+	mutex_unlock(&hpfs_inode->i_parent_mutex);
 }
 
 void hpfs_write_inode_nolock(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 8ff8fc433fc..a03abb12c61 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -60,7 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (dee.read_only)
 		result->i_mode &= ~0222;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail3;
@@ -101,11 +101,11 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 bail3:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail2:
 	hpfs_brelse4(&qbh0);
@@ -168,7 +168,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	result->i_data.a_ops = &hpfs_aops;
 	hpfs_i(result)->mmu_private = 0;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -193,12 +193,12 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -254,7 +254,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	result->i_blocks = 1;
 	init_special_inode(result, mode, rdev);
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -271,12 +271,12 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	brelse(bh);
 	unlock_kernel();
 	return 0;
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -333,7 +333,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	result->i_op = &page_symlink_inode_operations;
 	result->i_data.a_ops = &hpfs_symlink_aops;
 
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
@@ -352,11 +352,11 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	unlock_kernel();
 	return 0;
 bail2:
-	up(&hpfs_i(dir)->i_sem);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -382,8 +382,8 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 	lock_kernel();
 	hpfs_adjust_length((char *)name, &len);
 again:
-	down(&hpfs_i(inode)->i_parent);
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
 	if (!de)
@@ -410,8 +410,8 @@ again:
 		if (rep++)
 			break;
 
-		up(&hpfs_i(dir)->i_sem);
-		up(&hpfs_i(inode)->i_parent);
+		mutex_unlock(&hpfs_i(dir)->i_mutex);
+		mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count) > 1 ||
@@ -442,8 +442,8 @@ again:
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	up(&hpfs_i(dir)->i_sem);
-	up(&hpfs_i(inode)->i_parent);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
+	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
@@ -463,8 +463,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	hpfs_adjust_length((char *)name, &len);
 	lock_kernel();
-	down(&hpfs_i(inode)->i_parent);
-	down(&hpfs_i(dir)->i_sem);
+	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
 	if (!de)
@@ -502,8 +502,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	up(&hpfs_i(dir)->i_sem);
-	up(&hpfs_i(inode)->i_parent);
+	mutex_unlock(&hpfs_i(dir)->i_mutex);
+	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
@@ -565,12 +565,12 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	lock_kernel();
 	/* order doesn't matter, due to VFS exclusion */
-	down(&hpfs_i(i)->i_parent);
+	mutex_lock(&hpfs_i(i)->i_parent_mutex);
 	if (new_inode)
-		down(&hpfs_i(new_inode)->i_parent);
-	down(&hpfs_i(old_dir)->i_sem);
+		mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
+	mutex_lock(&hpfs_i(old_dir)->i_mutex);
 	if (new_dir != old_dir)
-		down(&hpfs_i(new_dir)->i_sem);
+		mutex_lock(&hpfs_i(new_dir)->i_mutex);
 	
 	/* Erm? Moving over the empty non-busy directory is perfectly legal */
 	if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -650,11 +650,11 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	hpfs_decide_conv(i, (char *)new_name, new_len);
 end1:
 	if (old_dir != new_dir)
-		up(&hpfs_i(new_dir)->i_sem);
-	up(&hpfs_i(old_dir)->i_sem);
-	up(&hpfs_i(i)->i_parent);
+		mutex_unlock(&hpfs_i(new_dir)->i_mutex);
+	mutex_unlock(&hpfs_i(old_dir)->i_mutex);
+	mutex_unlock(&hpfs_i(i)->i_parent_mutex);
 	if (new_inode)
-		up(&hpfs_i(new_inode)->i_parent);
+		mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
 	unlock_kernel();
 	return err;
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 63e88d7e2c3..9488a794076 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -181,8 +181,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
-		init_MUTEX(&ei->i_sem);
-		init_MUTEX(&ei->i_parent);
+		mutex_init(&ei->i_mutex);
+		mutex_init(&ei->i_parent_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
-- 
cgit v1.2.3


From 97461518610fb1679f67333bb699bb81136e49fe Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:42 -0800
Subject: [PATCH] convert ext3's truncate_sem to a mutex

ext3's truncate_sem is always released in the same function it's taken
and it otherwise is a mutex as well..

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/file.c  |  4 ++--
 fs/ext3/inode.c | 14 +++++++-------
 fs/ext3/ioctl.c |  4 ++--
 fs/ext3/super.c |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 98e78345ead..59098ea5671 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -37,9 +37,9 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
 	{
-		down(&EXT3_I(inode)->truncate_sem);
+		mutex_lock(&EXT3_I(inode)->truncate_mutex);
 		ext3_discard_reservation(inode);
-		up(&EXT3_I(inode)->truncate_sem);
+		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
 	}
 	if (is_dx(inode) && filp->private_data)
 		ext3_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d59d5a667b0..2c361377e0a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -702,7 +702,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	if (!create || err == -EIO)
 		goto cleanup;
 
-	down(&ei->truncate_sem);
+	mutex_lock(&ei->truncate_mutex);
 
 	/*
 	 * If the indirect block is missing while we are reading
@@ -723,7 +723,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		}
 		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
-			up(&ei->truncate_sem);
+			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
 			clear_buffer_new(bh_result);
@@ -759,13 +759,13 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		err = ext3_splice_branch(handle, inode, iblock, chain,
 					 partial, left);
 	/*
-	 * i_disksize growing is protected by truncate_sem.  Don't forget to
+	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
 	 * ext3_get_block() -bzzz
 	*/
 	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
 		ei->i_disksize = inode->i_size;
-	up(&ei->truncate_sem);
+	mutex_unlock(&ei->truncate_mutex);
 	if (err)
 		goto cleanup;
 
@@ -1227,7 +1227,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
  * Same applies to ext3_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_sem.
+ * lock_journal and i_truncate_mutex.
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
  * allocations fail.
@@ -2161,7 +2161,7 @@ void ext3_truncate(struct inode * inode)
 	 * From here we block out all ext3_get_block() callers who want to
 	 * modify the block allocation tree.
 	 */
-	down(&ei->truncate_sem);
+	mutex_lock(&ei->truncate_mutex);
 
 	if (n == 1) {		/* direct blocks */
 		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2228,7 +2228,7 @@ do_indirects:
 
 	ext3_discard_reservation(inode);
 
-	up(&ei->truncate_sem);
+	mutex_unlock(&ei->truncate_mutex);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	ext3_mark_inode_dirty(handle, inode);
 
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 556cd551007..aaf1da17b6d 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -182,7 +182,7 @@ flags_err:
 		 * need to allocate reservation structure for this inode
 		 * before set the window size
 		 */
-		down(&ei->truncate_sem);
+		mutex_lock(&ei->truncate_mutex);
 		if (!ei->i_block_alloc_info)
 			ext3_init_block_alloc_info(inode);
 
@@ -190,7 +190,7 @@ flags_err:
 			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
 			rsv->rsv_goal_size = rsv_window_size;
 		}
-		up(&ei->truncate_sem);
+		mutex_unlock(&ei->truncate_mutex);
 		return 0;
 	}
 	case EXT3_IOC_GROUP_EXTEND: {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index efa83205914..efe5b20d7a5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 #ifdef CONFIG_EXT3_FS_XATTR
 		init_rwsem(&ei->xattr_sem);
 #endif
-		init_MUTEX(&ei->truncate_sem);
+		mutex_init(&ei->truncate_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
-- 
cgit v1.2.3


From 8e3f90459b7052c31a9669417b837fb14aa6d313 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:43 -0800
Subject: [PATCH] sem2mutex: NCPFS

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/file.c          |  4 ++--
 fs/ncpfs/inode.c         |  6 +++---
 fs/ncpfs/ncplib_kernel.c |  4 ++--
 fs/ncpfs/sock.c          | 34 +++++++++++++++++-----------------
 4 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 973b444d691..ebdad8f6398 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -46,7 +46,7 @@ int ncp_make_open(struct inode *inode, int right)
 		NCP_FINFO(inode)->volNumber, 
 		NCP_FINFO(inode)->dirEntNum);
 	error = -EACCES;
-	down(&NCP_FINFO(inode)->open_sem);
+	mutex_lock(&NCP_FINFO(inode)->open_mutex);
 	if (!atomic_read(&NCP_FINFO(inode)->opened)) {
 		struct ncp_entry_info finfo;
 		int result;
@@ -93,7 +93,7 @@ int ncp_make_open(struct inode *inode, int right)
 	}
 
 out_unlock:
-	up(&NCP_FINFO(inode)->open_sem);
+	mutex_unlock(&NCP_FINFO(inode)->open_mutex);
 out:
 	return error;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d277a58bd12..0b521d3d97c 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -63,7 +63,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
-		init_MUTEX(&ei->open_sem);
+		mutex_init(&ei->open_mutex);
 		inode_init_once(&ei->vfs_inode);
 	}
 }
@@ -520,7 +520,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	}
 
 /*	server->lock = 0;	*/
-	init_MUTEX(&server->sem);
+	mutex_init(&server->mutex);
 	server->packet = NULL;
 /*	server->buffer_size = 0;	*/
 /*	server->conn_status = 0;	*/
@@ -557,7 +557,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	server->dentry_ttl = 0;	/* no caching */
 
 	INIT_LIST_HEAD(&server->tx.requests);
-	init_MUTEX(&server->rcv.creq_sem);
+	mutex_init(&server->rcv.creq_mutex);
 	server->tx.creq		= NULL;
 	server->rcv.creq	= NULL;
 	server->data_ready	= sock->sk->sk_data_ready;
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index c755e1848a4..d9ebf6439f5 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -291,7 +291,7 @@ ncp_make_closed(struct inode *inode)
 	int err;
 
 	err = 0;
-	down(&NCP_FINFO(inode)->open_sem);	
+	mutex_lock(&NCP_FINFO(inode)->open_mutex);
 	if (atomic_read(&NCP_FINFO(inode)->opened) == 1) {
 		atomic_set(&NCP_FINFO(inode)->opened, 0);
 		err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
@@ -301,7 +301,7 @@ ncp_make_closed(struct inode *inode)
 				NCP_FINFO(inode)->volNumber,
 				NCP_FINFO(inode)->dirEntNum, err);
 	}
-	up(&NCP_FINFO(inode)->open_sem);
+	mutex_unlock(&NCP_FINFO(inode)->open_mutex);
 	return err;
 }
 
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 6593a5ca88b..8783eb7ec64 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -171,9 +171,9 @@ static inline void __ncp_abort_request(struct ncp_server *server, struct ncp_req
 
 static inline void ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err)
 {
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncp_abort_request(server, req, err);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static inline void __ncptcp_abort(struct ncp_server *server)
@@ -303,20 +303,20 @@ static inline void __ncp_start_request(struct ncp_server *server, struct ncp_req
 
 static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	if (!ncp_conn_valid(server)) {
-		up(&server->rcv.creq_sem);
+		mutex_unlock(&server->rcv.creq_mutex);
 		printk(KERN_ERR "ncpfs: tcp: Server died\n");
 		return -EIO;
 	}
 	if (server->tx.creq || server->rcv.creq) {
 		req->status = RQ_QUEUED;
 		list_add_tail(&req->req, &server->tx.requests);
-		up(&server->rcv.creq_sem);
+		mutex_unlock(&server->rcv.creq_mutex);
 		return 0;
 	}
 	__ncp_start_request(server, req);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 	return 0;
 }
 
@@ -400,7 +400,7 @@ void ncpdgram_rcv_proc(void *s)
 				info_server(server, 0, server->unexpected_packet.data, result);
 				continue;
 			}
-			down(&server->rcv.creq_sem);		
+			mutex_lock(&server->rcv.creq_mutex);
 			req = server->rcv.creq;
 			if (req && (req->tx_type == NCP_ALLOC_SLOT_REQUEST || (server->sequence == reply.sequence && 
 					server->connection == get_conn_number(&reply)))) {
@@ -430,11 +430,11 @@ void ncpdgram_rcv_proc(void *s)
 				     	server->rcv.creq = NULL;
 					ncp_finish_request(req, result);
 					__ncp_next_request(server);
-					up(&server->rcv.creq_sem);
+					mutex_unlock(&server->rcv.creq_mutex);
 					continue;
 				}
 			}
-			up(&server->rcv.creq_sem);
+			mutex_unlock(&server->rcv.creq_mutex);
 		}
 drop:;		
 		_recv(sock, &reply, sizeof(reply), MSG_DONTWAIT);
@@ -472,9 +472,9 @@ static void __ncpdgram_timeout_proc(struct ncp_server *server)
 void ncpdgram_timeout_proc(void *s)
 {
 	struct ncp_server *server = s;
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncpdgram_timeout_proc(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static inline void ncp_init_req(struct ncp_request_reply* req)
@@ -657,18 +657,18 @@ void ncp_tcp_rcv_proc(void *s)
 {
 	struct ncp_server *server = s;
 
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_rcv_proc(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 void ncp_tcp_tx_proc(void *s)
 {
 	struct ncp_server *server = s;
 	
-	down(&server->rcv.creq_sem);
+	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_try_send(server);
-	up(&server->rcv.creq_sem);
+	mutex_unlock(&server->rcv.creq_mutex);
 }
 
 static int do_ncp_rpc_call(struct ncp_server *server, int size,
@@ -833,7 +833,7 @@ int ncp_disconnect(struct ncp_server *server)
 
 void ncp_lock_server(struct ncp_server *server)
 {
-	down(&server->sem);
+	mutex_lock(&server->mutex);
 	if (server->lock)
 		printk(KERN_WARNING "ncp_lock_server: was locked!\n");
 	server->lock = 1;
@@ -846,5 +846,5 @@ void ncp_unlock_server(struct ncp_server *server)
 		return;
 	}
 	server->lock = 0;
-	up(&server->sem);
+	mutex_unlock(&server->mutex);
 }
-- 
cgit v1.2.3


From 1e7933defd0fce79b2d8ecdbc7ca37fed0c188ed Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 03:00:44 -0800
Subject: [PATCH] sem2mutex: UDF

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/balloc.c | 36 ++++++++++++++++++------------------
 fs/udf/ialloc.c |  8 ++++----
 fs/udf/super.c  |  2 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 201049ac8a9..ea521f846d9 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -152,7 +152,7 @@ static void udf_bitmap_free_blocks(struct super_block * sb,
 	int bitmap_nr;
 	unsigned long overflow;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
 		(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
 	{
@@ -211,7 +211,7 @@ error_return:
 	sb->s_dirt = 1;
 	if (UDF_SB_LVIDBH(sb))
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
 
@@ -226,7 +226,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block * sb,
 	int nr_groups, bitmap_nr;
 	struct buffer_head *bh;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition))
 		goto out;
 
@@ -275,7 +275,7 @@ out:
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 	}
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
 
@@ -291,7 +291,7 @@ static int udf_bitmap_new_block(struct super_block * sb,
 	int newblock = 0;
 
 	*err = -ENOSPC;
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
 	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
@@ -364,7 +364,7 @@ repeat:
 	}
 	if (i >= (nr_groups*2))
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return newblock;
 	}
 	if (bit < sb->s_blocksize << 3)
@@ -373,7 +373,7 @@ repeat:
 		bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3);
 	if (bit >= sb->s_blocksize << 3)
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return 0;
 	}
 
@@ -387,7 +387,7 @@ got_block:
 	 */
 	if (inode && DQUOT_ALLOC_BLOCK(inode, 1))
 	{
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		*err = -EDQUOT;
 		return 0;
 	}
@@ -410,13 +410,13 @@ got_block:
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 	}
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
 
 error_return:
 	*err = -EIO;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return 0;
 }
 
@@ -433,7 +433,7 @@ static void udf_table_free_blocks(struct super_block * sb,
 	int8_t etype;
 	int i;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
 		(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
 	{
@@ -666,7 +666,7 @@ static void udf_table_free_blocks(struct super_block * sb,
 
 error_return:
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
 
@@ -692,7 +692,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
 	else
 		return 0;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	extoffset = sizeof(struct unallocSpaceEntry);
 	bloc = UDF_I_LOCATION(table);
 
@@ -736,7 +736,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
 		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
 		sb->s_dirt = 1;
 	}
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	return alloc_count;
 }
 
@@ -761,7 +761,7 @@ static int udf_table_new_block(struct super_block * sb,
 	else
 		return newblock;
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
 		goal = 0;
 
@@ -811,7 +811,7 @@ static int udf_table_new_block(struct super_block * sb,
 	if (spread == 0xFFFFFFFF)
 	{
 		udf_release_data(goal_bh);
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		return 0;
 	}
 
@@ -827,7 +827,7 @@ static int udf_table_new_block(struct super_block * sb,
 	if (inode && DQUOT_ALLOC_BLOCK(inode, 1))
 	{
 		udf_release_data(goal_bh);
-		up(&sbi->s_alloc_sem);
+		mutex_unlock(&sbi->s_alloc_mutex);
 		*err = -EDQUOT;
 		return 0;
 	}
@@ -846,7 +846,7 @@ static int udf_table_new_block(struct super_block * sb,
 	}
 
 	sb->s_dirt = 1;
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 	*err = 0;
 	return newblock;
 }
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c9b707b470c..3873c672cb4 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -42,7 +42,7 @@ void udf_free_inode(struct inode * inode)
 
 	clear_inode(inode);
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	if (sbi->s_lvidbh) {
 		if (S_ISDIR(inode->i_mode))
 			UDF_SB_LVIDIU(sb)->numDirs =
@@ -53,7 +53,7 @@ void udf_free_inode(struct inode * inode)
 		
 		mark_buffer_dirty(sbi->s_lvidbh);
 	}
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 
 	udf_free_blocks(sb, NULL, UDF_I_LOCATION(inode), 0, 1);
 }
@@ -83,7 +83,7 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 		return NULL;
 	}
 
-	down(&sbi->s_alloc_sem);
+	mutex_lock(&sbi->s_alloc_mutex);
 	UDF_I_UNIQUE(inode) = 0;
 	UDF_I_LENEXTENTS(inode) = 0;
 	UDF_I_NEXT_ALLOC_BLOCK(inode) = 0;
@@ -148,7 +148,7 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 		UDF_I_CRTIME(inode) = current_fs_time(inode->i_sb);
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
-	up(&sbi->s_alloc_sem);
+	mutex_unlock(&sbi->s_alloc_mutex);
 
 	if (DQUOT_ALLOC_INODE(inode))
 	{
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 368d8f81fe5..9303c50c5d5 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1515,7 +1515,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sb->s_fs_info = sbi;
 	memset(UDF_SB(sb), 0x00, sizeof(struct udf_sb_info));
 
-	init_MUTEX(&sbi->s_alloc_sem);
+	mutex_init(&sbi->s_alloc_mutex);
 
 	if (!udf_parse_options((char *)options, &uopt))
 		goto error_out;
-- 
cgit v1.2.3


From 6b9438e1323a2be10dcc039f6321e7ca18b9459e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Thu, 23 Mar 2006 03:00:48 -0800
Subject: [PATCH] fat_lock is used as a mutex, convert it to using the new
 mutex primitive

The fat code uses the fat_lock always in a mutex way (taking and releasing
the lock in the same function), the patch below converts it into the new
mutex primitive.  Please consider this patch for the code.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/fatent.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a1a9e045121..ab171ea8e86 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -267,19 +267,19 @@ static struct fatent_operations fat32_ops = {
 
 static inline void lock_fat(struct msdos_sb_info *sbi)
 {
-	down(&sbi->fat_lock);
+	mutex_lock(&sbi->fat_lock);
 }
 
 static inline void unlock_fat(struct msdos_sb_info *sbi)
 {
-	up(&sbi->fat_lock);
+	mutex_unlock(&sbi->fat_lock);
 }
 
 void fat_ent_access_init(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
-	init_MUTEX(&sbi->fat_lock);
+	mutex_init(&sbi->fat_lock);
 
 	switch (sbi->fat_bits) {
 	case 32:
-- 
cgit v1.2.3


From a7ccf007189aa4401695e3105b00d9429f836b46 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:50 -0800
Subject: [PATCH] fs/ufs/file.c: drop insane header dependencies

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/file.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index ed69d7fe1b5..62ad481810e 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -23,18 +23,8 @@
  *  ext2 fs regular file handling primitives
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/ufs_fs.h>
-#include <linux/fcntl.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
-- 
cgit v1.2.3


From 78ec7b6917a938910d5ed6049c0e4ee6e6852e4e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:51 -0800
Subject: [PATCH] minix: switch to inode_inc_link_count, inode_dec_link_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/minix/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index b25bca5bdb5..5b6a4540a05 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -6,18 +6,6 @@
 
 #include "minix.h"
 
-static inline void inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static int add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = minix_add_link(dentry, inode);
@@ -25,7 +13,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -125,7 +113,7 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -139,7 +127,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 	return add_nondir(dentry, inode);
 }
@@ -152,7 +140,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 	if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
 		goto out;
 
-	inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = minix_new_inode(dir, &err);
 	if (!inode)
@@ -163,7 +151,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 		inode->i_mode |= S_ISGID;
 	minix_set_inode(inode, 0);
 
-	inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = minix_make_empty(inode, dir);
 	if (err)
@@ -178,11 +166,11 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
-	dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -202,7 +190,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
 		goto end_unlink;
 
 	inode->i_ctime = dir->i_ctime;
-	dec_count(inode);
+	inode_dec_link_count(inode);
 end_unlink:
 	return err;
 }
@@ -215,8 +203,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
 	if (minix_empty_dir(inode)) {
 		err = minix_unlink(dir, dentry);
 		if (!err) {
-			dec_count(dir);
-			dec_count(inode);
+			inode_dec_link_count(dir);
+			inode_dec_link_count(inode);
 		}
 	}
 	return err;
@@ -257,34 +245,34 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
 		new_de = minix_find_entry(new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		minix_set_link(new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= info->s_link_max)
 				goto out_dir;
 		}
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = minix_add_link(new_dentry, old_inode);
 		if (err) {
-			dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	minix_delete_entry(old_de, old_page);
-	dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		minix_set_link(dir_de, dir_page, new_dir);
-		dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 4e907c3d45d10dc5162d283d109be425c23aeb69 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:52 -0800
Subject: [PATCH] sysv: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 7f0e4b53085..b8a73f716fb 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -16,18 +16,6 @@
 #include <linux/smp_lock.h>
 #include "sysv.h"
 
-static inline void inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static int add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = sysv_add_link(dentry, inode);
@@ -35,7 +23,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -124,7 +112,7 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -138,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	return add_nondir(dentry, inode);
@@ -151,7 +139,7 @@ static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 
 	if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max) 
 		goto out;
-	inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = sysv_new_inode(dir, S_IFDIR|mode);
 	err = PTR_ERR(inode);
@@ -160,7 +148,7 @@ static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 
 	sysv_set_inode(inode, 0);
 
-	inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = sysv_make_empty(inode, dir);
 	if (err)
@@ -175,11 +163,11 @@ out:
 	return err;
 
 out_fail:
-	dec_count(inode);
-	dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -199,7 +187,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	dec_count(inode);
+	inode_dec_link_count(inode);
 out:
 	return err;
 }
@@ -213,8 +201,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
 		err = sysv_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			dec_count(inode);
-			dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	return err;
@@ -258,34 +246,34 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
 		new_de = sysv_find_entry(new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		sysv_set_link(new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
 				goto out_dir;
 		}
-		inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = sysv_add_link(new_dentry, old_inode);
 		if (err) {
-			dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	sysv_delete_entry(old_de, old_page);
-	dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		sysv_set_link(dir_de, dir_page, new_dir);
-		dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From a513b035eadf80bb9ce0be4c5dd9f242cfb9eecc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:53 -0800
Subject: [PATCH] ext2: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/namei.c | 54 +++++++++++++++++++-----------------------------------
 1 file changed, 19 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ad1432a2a62..4ca82498532 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -36,22 +36,6 @@
 #include "acl.h"
 #include "xip.h"
 
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-
-static inline void ext2_inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void ext2_dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ext2_add_link(dentry, inode);
@@ -59,7 +43,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -201,7 +185,7 @@ out:
 	return err;
 
 out_fail:
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput (inode);
 	goto out;
 }
@@ -215,7 +199,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext2_inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	return ext2_add_nondir(dentry, inode);
@@ -229,7 +213,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
-	ext2_inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = ext2_new_inode (dir, S_IFDIR | mode);
 	err = PTR_ERR(inode);
@@ -243,7 +227,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	else
 		inode->i_mapping->a_ops = &ext2_aops;
 
-	ext2_inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = ext2_make_empty(inode, dir);
 	if (err)
@@ -258,11 +242,11 @@ out:
 	return err;
 
 out_fail:
-	ext2_dec_count(inode);
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 out_dir:
-	ext2_dec_count(dir);
+	inode_dec_link_count(dir);
 	goto out;
 }
 
@@ -282,7 +266,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	ext2_dec_count(inode);
+	inode_dec_link_count(inode);
 	err = 0;
 out:
 	return err;
@@ -297,8 +281,8 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
 		err = ext2_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			ext2_dec_count(inode);
-			ext2_dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	return err;
@@ -338,41 +322,41 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 		new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
 		if (!new_de)
 			goto out_dir;
-		ext2_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		ext2_set_link(new_dir, new_de, new_page, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		ext2_dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= EXT2_LINK_MAX)
 				goto out_dir;
 		}
-		ext2_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = ext2_add_link(new_dentry, old_inode);
 		if (err) {
-			ext2_dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			ext2_inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	/*
 	 * Like most other Unix systems, set the ctime for inodes on a
  	 * rename.
-	 * ext2_dec_count() will mark the inode dirty.
+	 * inode_dec_link_count() will mark the inode dirty.
 	 */
 	old_inode->i_ctime = CURRENT_TIME_SEC;
 
 	ext2_delete_entry (old_de, old_page);
-	ext2_dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		ext2_set_link(old_inode, dir_de, dir_page, new_dir);
-		ext2_dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 3257545e40a769cbef98cf13eabe50f00712991e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:00:53 -0800
Subject: [PATCH] ufs: switch to inode_inc_count, inode_dec_count

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/namei.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 2958cde7d3d..8d5f98a01c7 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -43,18 +43,6 @@
 #define UFSD(x)
 #endif
 
-static inline void ufs_inc_count(struct inode *inode)
-{
-	inode->i_nlink++;
-	mark_inode_dirty(inode);
-}
-
-static inline void ufs_dec_count(struct inode *inode)
-{
-	inode->i_nlink--;
-	mark_inode_dirty(inode);
-}
-
 static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ufs_add_link(dentry, inode);
@@ -62,7 +50,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	return err;
 }
@@ -173,7 +161,7 @@ out:
 	return err;
 
 out_fail:
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	iput(inode);
 	goto out;
 }
@@ -191,7 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 	}
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ufs_inc_count(inode);
+	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
 
 	error = ufs_add_nondir(dentry, inode);
@@ -208,7 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		goto out;
 
 	lock_kernel();
-	ufs_inc_count(dir);
+	inode_inc_link_count(dir);
 
 	inode = ufs_new_inode(dir, S_IFDIR|mode);
 	err = PTR_ERR(inode);
@@ -218,7 +206,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	inode->i_op = &ufs_dir_inode_operations;
 	inode->i_fop = &ufs_dir_operations;
 
-	ufs_inc_count(inode);
+	inode_inc_link_count(inode);
 
 	err = ufs_make_empty(inode, dir);
 	if (err)
@@ -234,11 +222,11 @@ out:
 	return err;
 
 out_fail:
-	ufs_dec_count(inode);
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
+	inode_dec_link_count(inode);
 	iput (inode);
 out_dir:
-	ufs_dec_count(dir);
+	inode_dec_link_count(dir);
 	unlock_kernel();
 	goto out;
 }
@@ -260,7 +248,7 @@ static int ufs_unlink(struct inode * dir, struct dentry *dentry)
 		goto out;
 
 	inode->i_ctime = dir->i_ctime;
-	ufs_dec_count(inode);
+	inode_dec_link_count(inode);
 	err = 0;
 out:
 	unlock_kernel();
@@ -277,8 +265,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
 		err = ufs_unlink(dir, dentry);
 		if (!err) {
 			inode->i_size = 0;
-			ufs_dec_count(inode);
-			ufs_dec_count(dir);
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
 		}
 	}
 	unlock_kernel();
@@ -319,35 +307,35 @@ static int ufs_rename (struct inode * old_dir, struct dentry * old_dentry,
 		new_de = ufs_find_entry (new_dentry, &new_bh);
 		if (!new_de)
 			goto out_dir;
-		ufs_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		ufs_set_link(new_dir, new_de, new_bh, old_inode);
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		if (dir_de)
 			new_inode->i_nlink--;
-		ufs_dec_count(new_inode);
+		inode_dec_link_count(new_inode);
 	} else {
 		if (dir_de) {
 			err = -EMLINK;
 			if (new_dir->i_nlink >= UFS_LINK_MAX)
 				goto out_dir;
 		}
-		ufs_inc_count(old_inode);
+		inode_inc_link_count(old_inode);
 		err = ufs_add_link(new_dentry, old_inode);
 		if (err) {
-			ufs_dec_count(old_inode);
+			inode_dec_link_count(old_inode);
 			goto out_dir;
 		}
 		if (dir_de)
-			ufs_inc_count(new_dir);
+			inode_inc_link_count(new_dir);
 	}
 
 	ufs_delete_entry (old_dir, old_de, old_bh);
 
-	ufs_dec_count(old_inode);
+	inode_dec_link_count(old_inode);
 
 	if (dir_de) {
 		ufs_set_link(old_inode, dir_de, dir_bh, new_dir);
-		ufs_dec_count(old_dir);
+		inode_dec_link_count(old_dir);
 	}
 	unlock_kernel();
 	return 0;
-- 
cgit v1.2.3


From 7a673c6b8fff4b2888ef1d47462e4be79936ac5a Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Thu, 23 Mar 2006 03:00:59 -0800
Subject: [PATCH] devpts: use lib/parser.c for parsing mount options

Item from "2.6 should fix" list.

Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/devpts/inode.c | 76 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index bfb8a230bac..14c5620b5ca 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,6 +18,7 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/devpts_fs.h>
+#include <linux/parser.h>
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
@@ -32,39 +33,60 @@ static struct {
 	umode_t mode;
 } config = {.mode = 0600};
 
+enum {
+	Opt_uid, Opt_gid, Opt_mode,
+	Opt_err
+};
+
+static match_table_t tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_mode, "mode=%o"},
+	{Opt_err, NULL}
+};
+
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
-	int setuid = 0;
-	int setgid = 0;
-	uid_t uid = 0;
-	gid_t gid = 0;
-	umode_t mode = 0600;
-	char *this_char;
-
-	this_char = NULL;
-	while ((this_char = strsep(&data, ",")) != NULL) {
-		int n;
-		char dummy;
-		if (!*this_char)
+	char *p;
+
+	config.setuid  = 0;
+	config.setgid  = 0;
+	config.uid     = 0;
+	config.gid     = 0;
+	config.mode    = 0600;
+
+	while ((p = strsep(&data, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		int option;
+
+		if (!*p)
 			continue;
-		if (sscanf(this_char, "uid=%i%c", &n, &dummy) == 1) {
-			setuid = 1;
-			uid = n;
-		} else if (sscanf(this_char, "gid=%i%c", &n, &dummy) == 1) {
-			setgid = 1;
-			gid = n;
-		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
-			mode = n & ~S_IFMT;
-		else {
-			printk("devpts: called with bogus options\n");
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			config.uid = option;
+			config.setuid = 1;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			config.gid = option;
+			config.setgid = 1;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			config.mode = option & ~S_IFMT;
+			break;
+		default:
+			printk(KERN_ERR "devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
-	config.setuid  = setuid;
-	config.setgid  = setgid;
-	config.uid     = uid;
-	config.gid     = gid;
-	config.mode    = mode;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 713729e8b993cb880225e2ced50a3f5ac05c2b3f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Mar 2006 03:01:01 -0800
Subject: [PATCH] fs/*/file.c: drop insane header dependencies

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/file.c      |  4 ----
 fs/qnx4/file.c      |  3 ---
 fs/ramfs/file-mmu.c | 11 -----------
 3 files changed, 18 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index afebbfde696..6af10885f9d 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -19,11 +19,7 @@
  *
  *  adfs regular file handling primitives           
  */
-#include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/time.h>
-#include <linux/stat.h>
 #include <linux/buffer_head.h>			/* for file_fsync() */
 #include <linux/adfs_fs.h>
 
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index b471315e24e..c33963fded9 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,10 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/config.h>
-#include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/time.h>
 #include <linux/qnx4_fs.h>
 
 /*
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2115383dcc8..6ada2095b9a 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -24,18 +24,7 @@
  * caches is sufficient.
  */
 
-#include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/smp_lock.h>
-#include <linux/backing-dev.h>
-#include <linux/ramfs.h>
-
-#include <asm/uaccess.h>
-#include "internal.h"
 
 struct address_space_operations ramfs_aops = {
 	.readpage	= simple_readpage,
-- 
cgit v1.2.3


From 5a6b7951bfcca7f45f44269ea87417c74558daf8 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@linux.intel.com>
Date: Thu, 23 Mar 2006 03:01:03 -0800
Subject: [PATCH] get_empty_filp tweaks, inline epoll_init_file()

Eliminate a handful of cache references by keeping current in a register
instead of reloading (helps x86) and avoiding the overhead of a function
call.  Inlining eventpoll_init_file() saves 24 bytes.  Also reorder file
initialization to make writes occur more sequentially.

Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c  |  9 ---------
 fs/file_table.c | 10 ++++++----
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f5d69f46ba9..1c2b16fda13 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -452,15 +452,6 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
 }
 
 
-/* Used to initialize the epoll bits inside the "struct file" */
-void eventpoll_init_file(struct file *file)
-{
-
-	INIT_LIST_HEAD(&file->f_ep_links);
-	spin_lock_init(&file->f_ep_lock);
-}
-
-
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
  * interface. We need to have this facility to cleanup correctly files that are
diff --git a/fs/file_table.c b/fs/file_table.c
index 44fabeaa941..bcea1998b4d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -88,6 +88,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
  */
 struct file *get_empty_filp(void)
 {
+	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
 
@@ -112,13 +113,14 @@ struct file *get_empty_filp(void)
 	if (security_file_alloc(f))
 		goto fail_sec;
 
-	eventpoll_init_file(f);
+	tsk = current;
+	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_set(&f->f_count, 1);
-	f->f_uid = current->fsuid;
-	f->f_gid = current->fsgid;
 	rwlock_init(&f->f_owner.lock);
+	f->f_uid = tsk->fsuid;
+	f->f_gid = tsk->fsgid;
+	eventpoll_init_file(f);
 	/* f->f_version: 0 */
-	INIT_LIST_HEAD(&f->f_u.fu_list);
 	return f;
 
 over:
-- 
cgit v1.2.3


From 394e3902c55e667945f6f1c2bdbc59842cce70f7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 23 Mar 2006 03:01:05 -0800
Subject: [PATCH] more for_each_cpu() conversions

When we stop allocating percpu memory for not-possible CPUs we must not touch
the percpu data for not-possible CPUs at all.  The correct way of doing this
is to test cpu_possible() or to use for_each_cpu().

This patch is a kernel-wide sweep of all instances of NR_CPUS.  I found very
few instances of this bug, if any.  But the patch converts lots of open-coded
test to use the preferred helper macros.

Cc: Mikael Starvik <starvik@axis.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Kyle McMartin <kyle@parisc-linux.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Christian Zankel <chris@zankel.net>
Cc: Philippe Elie <phil.el@wanadoo.fr>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Jens Axboe <axboe@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_stats.c  | 7 ++-----
 fs/xfs/linux-2.6/xfs_sysctl.c | 3 +--
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 8955720a2c6..713e6a7505d 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -62,18 +62,15 @@ xfs_read_xfsstats(
 		while (j < xstats[i].endpoint) {
 			val = 0;
 			/* sum over all cpus */
-			for (c = 0; c < NR_CPUS; c++) {
-				if (!cpu_possible(c)) continue;
+			for_each_cpu(c)
 				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-			}
 			len += sprintf(buffer + len, " %u", val);
 			j++;
 		}
 		buffer[len++] = '\n';
 	}
 	/* extra precision counters */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i)) continue;
+	for_each_cpu(i) {
 		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
 		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
 		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index a0256497242..7079cc83721 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -38,8 +38,7 @@ xfs_stats_clear_proc_handler(
 
 	if (!ret && write && *valp) {
 		printk("XFS Clearing xfsstats\n");
-		for (c = 0; c < NR_CPUS; c++) {
-			if (!cpu_possible(c)) continue;
+		for_each_cpu(c) {
 			preempt_disable();
 			/* save vn_active, it's a universal truth! */
 			vn_active = per_cpu(xfsstats, c).vn_active;
-- 
cgit v1.2.3


From b0e6e962992b76580f4900b166a337bad7c1e81b Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Thu, 23 Mar 2006 03:01:08 -0800
Subject: [PATCH] reduce size of bio mempools

The biovec default mempool limit of 256 entries results in over 3MB of RAM
being permanently pinned, even on systems with only 128MB of RAM.  Since
mempool tries to allocate from the system pool first, it makes sense to
reduce the size of the mempool fallbacks to a more reasonable limit of 1-5
entries -- enough for the system to be able to make progress even under
load.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Jens Axboe <axboe@suse.de>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 1f3bb501c26..8f1d2e815c9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1243,11 +1243,11 @@ static int __init init_bio(void)
 		scale = 4;
 
 	/*
-	 * scale number of entries
+	 * Limit number of entries reserved -- mempools are only used when
+	 * the system is completely unable to allocate memory, so we only
+	 * need enough to make progress.
 	 */
-	bvec_pool_entries = megabytes * 2;
-	if (bvec_pool_entries > 256)
-		bvec_pool_entries = 256;
+	bvec_pool_entries = 1 + scale;
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
 	if (!fs_bio_set)
-- 
cgit v1.2.3


From a0646a1f04f1ec4c7514e5b00496b54e054a2c99 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:53:03 +0000
Subject: NTFS: Add support for sparse files which have a compression unit of
 0.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/attrib.c  | 25 +++++++++++++-------
 fs/ntfs/inode.c   | 68 ++++++++++++++++++++++++++++++++++++-------------------
 fs/ntfs/layout.h  | 19 ++++++++++------
 4 files changed, 75 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 548d9059a69..b5774233ef1 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -31,6 +31,7 @@ ToDo/Notes:
 	- Fix comparison of $MFT and $MFTMirr to not bail out when there are
 	  unused, invalid mft records which are the same in both $MFT and
 	  $MFTMirr.
+	- Add support for sparse files which have a compression unit of 0.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index a92b9e9db91..7a568eb7d80 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1695,7 +1695,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 			a->data.non_resident.initialized_size =
 			cpu_to_sle64(attr_size);
 	if (NInoSparse(ni) || NInoCompressed(ni)) {
-		a->data.non_resident.compression_unit = 4;
+		a->data.non_resident.compression_unit = 0;
+		if (NInoCompressed(ni) || vol->major_ver < 3)
+			a->data.non_resident.compression_unit = 4;
 		a->data.non_resident.compressed_size =
 				a->data.non_resident.allocated_size;
 	} else
@@ -1714,13 +1716,20 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	ni->allocated_size = new_size;
 	if (NInoSparse(ni) || NInoCompressed(ni)) {
 		ni->itype.compressed.size = ni->allocated_size;
-		ni->itype.compressed.block_size = 1U <<
-				(a->data.non_resident.compression_unit +
-				vol->cluster_size_bits);
-		ni->itype.compressed.block_size_bits =
-				ffs(ni->itype.compressed.block_size) - 1;
-		ni->itype.compressed.block_clusters = 1U <<
-				a->data.non_resident.compression_unit;
+		if (a->data.non_resident.compression_unit) {
+			ni->itype.compressed.block_size = 1U << (a->data.
+					non_resident.compression_unit +
+					vol->cluster_size_bits);
+			ni->itype.compressed.block_size_bits =
+					ffs(ni->itype.compressed.block_size) -
+					1;
+			ni->itype.compressed.block_clusters = 1U <<
+					a->data.non_resident.compression_unit;
+		} else {
+			ni->itype.compressed.block_size = 0;
+			ni->itype.compressed.block_size_bits = 0;
+			ni->itype.compressed.block_clusters = 0;
+		}
 		vi->i_blocks = ni->itype.compressed.size >> 9;
 	} else
 		vi->i_blocks = ni->allocated_size >> 9;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 55263b7de9c..ae341922f42 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
 /**
  * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -24,8 +24,10 @@
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
 #include <linux/mount.h>
+#include <linux/mutex.h>
 
 #include "aops.h"
+#include "attrib.h"
 #include "dir.h"
 #include "debug.h"
 #include "inode.h"
@@ -1064,10 +1066,10 @@ skip_large_dir_stuff:
 		if (a->non_resident) {
 			NInoSetNonResident(ni);
 			if (NInoCompressed(ni) || NInoSparse(ni)) {
-				if (a->data.non_resident.compression_unit !=
-						4) {
+				if (NInoCompressed(ni) && a->data.non_resident.
+						compression_unit != 4) {
 					ntfs_error(vi->i_sb, "Found "
-							"nonstandard "
+							"non-standard "
 							"compression unit (%u "
 							"instead of 4).  "
 							"Cannot handle this.",
@@ -1076,16 +1078,26 @@ skip_large_dir_stuff:
 					err = -EOPNOTSUPP;
 					goto unm_err_out;
 				}
-				ni->itype.compressed.block_clusters = 1U <<
-						a->data.non_resident.
-						compression_unit;
-				ni->itype.compressed.block_size = 1U << (
-						a->data.non_resident.
-						compression_unit +
-						vol->cluster_size_bits);
-				ni->itype.compressed.block_size_bits = ffs(
-						ni->itype.compressed.
-						block_size) - 1;
+				if (a->data.non_resident.compression_unit) {
+					ni->itype.compressed.block_size = 1U <<
+							(a->data.non_resident.
+							compression_unit +
+							vol->cluster_size_bits);
+					ni->itype.compressed.block_size_bits =
+							ffs(ni->itype.
+							compressed.
+							block_size) - 1;
+					ni->itype.compressed.block_clusters =
+							1U << a->data.
+							non_resident.
+							compression_unit;
+				} else {
+					ni->itype.compressed.block_size = 0;
+					ni->itype.compressed.block_size_bits =
+							0;
+					ni->itype.compressed.block_clusters =
+							0;
+				}
 				ni->itype.compressed.size = sle64_to_cpu(
 						a->data.non_resident.
 						compressed_size);
@@ -1338,8 +1350,9 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			goto unm_err_out;
 		}
 		if (NInoCompressed(ni) || NInoSparse(ni)) {
-			if (a->data.non_resident.compression_unit != 4) {
-				ntfs_error(vi->i_sb, "Found nonstandard "
+			if (NInoCompressed(ni) && a->data.non_resident.
+					compression_unit != 4) {
+				ntfs_error(vi->i_sb, "Found non-standard "
 						"compression unit (%u instead "
 						"of 4).  Cannot handle this.",
 						a->data.non_resident.
@@ -1347,13 +1360,22 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 				err = -EOPNOTSUPP;
 				goto unm_err_out;
 			}
-			ni->itype.compressed.block_clusters = 1U <<
-					a->data.non_resident.compression_unit;
-			ni->itype.compressed.block_size = 1U << (
-					a->data.non_resident.compression_unit +
-					vol->cluster_size_bits);
-			ni->itype.compressed.block_size_bits = ffs(
-					ni->itype.compressed.block_size) - 1;
+			if (a->data.non_resident.compression_unit) {
+				ni->itype.compressed.block_size = 1U <<
+						(a->data.non_resident.
+						compression_unit +
+						vol->cluster_size_bits);
+				ni->itype.compressed.block_size_bits =
+						ffs(ni->itype.compressed.
+						block_size) - 1;
+				ni->itype.compressed.block_clusters = 1U <<
+						a->data.non_resident.
+						compression_unit;
+			} else {
+				ni->itype.compressed.block_size = 0;
+				ni->itype.compressed.block_size_bits = 0;
+				ni->itype.compressed.block_clusters = 0;
+			}
 			ni->itype.compressed.size = sle64_to_cpu(
 					a->data.non_resident.compressed_size);
 		}
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index bb408d4dcbb..f4283e12070 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -769,7 +769,7 @@ typedef struct {
 				compressed.  (This effectively limits the
 				compression unit size to be a power of two
 				clusters.)  WinNT4 only uses a value of 4.
-				Sparse files also have this set to 4. */
+				Sparse files have this set to 0 on XPSP2. */
 /* 35*/			u8 reserved[5];		/* Align to 8-byte boundary. */
 /* The sizes below are only used when lowest_vcn is zero, as otherwise it would
    be difficult to keep them up-to-date.*/
@@ -1076,16 +1076,21 @@ typedef struct {
 /* 20*/	sle64 last_access_time;		/* Time this mft record was last
 					   accessed. */
 /* 28*/	sle64 allocated_size;		/* Byte size of on-disk allocated space
-					   for the data attribute.  So for
-					   normal $DATA, this is the
+					   for the unnamed data attribute.  So
+					   for normal $DATA, this is the
 					   allocated_size from the unnamed
 					   $DATA attribute and for compressed
 					   and/or sparse $DATA, this is the
 					   compressed_size from the unnamed
-					   $DATA attribute.  NOTE: This is a
-					   multiple of the cluster size. */
-/* 30*/	sle64 data_size;		/* Byte size of actual data in data
-					   attribute. */
+					   $DATA attribute.  For a directory or
+					   other inode without an unnamed $DATA
+					   attribute, this is always 0.  NOTE:
+					   This is a multiple of the cluster
+					   size. */
+/* 30*/	sle64 data_size;		/* Byte size of actual data in unnamed
+					   data attribute.  For a directory or
+					   other inode without an unnamed $DATA
+					   attribute, this is always 0. */
 /* 38*/	FILE_ATTR_FLAGS file_attributes;	/* Flags describing the file. */
 /* 3c*/	union {
 	/* 3c*/	struct {
-- 
cgit v1.2.3


From f95c4018fd4b0bdef9b1bcb4eac7056e2a07282a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 15:59:32 +0000
Subject: NTFS: Remove all the make_bad_inode() calls.  This should only be
 called       from read inode and new inode code paths.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/aops.c    |  1 -
 fs/ntfs/attrib.c  |  6 ------
 fs/ntfs/file.c    | 13 +------------
 fs/ntfs/mft.c     |  1 +
 fs/ntfs/mft.h     |  5 +----
 6 files changed, 5 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b5774233ef1..13e70d4e2fd 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -32,6 +32,8 @@ ToDo/Notes:
 	  unused, invalid mft records which are the same in both $MFT and
 	  $MFTMirr.
 	- Add support for sparse files which have a compression unit of 0.
+	- Remove all the make_bad_inode() calls.  This should only be called
+	  from read inode and new inode code paths.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7c7e313620f..1cf105b9920 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1530,7 +1530,6 @@ err_out:
 				"error %i.", err);
 		SetPageError(page);
 		NVolSetErrors(ni->vol);
-		make_bad_inode(vi);
 	}
 	unlock_page(page);
 	if (ctx)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7a568eb7d80..1663f5c3c6a 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2438,16 +2438,12 @@ undo_alloc:
 				"chkdsk to recover.", IS_ERR(m) ?
 				"restore attribute search context" :
 				"truncate attribute runlist");
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
 		NVolSetErrors(vol);
 	} else if (mp_rebuilt) {
 		if (ntfs_attr_record_resize(m, a, attr_len)) {
 			ntfs_error(vol->sb, "Failed to restore attribute "
 					"record in error code path.  Run "
 					"chkdsk to recover.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
@@ -2460,8 +2456,6 @@ undo_alloc:
 						"mapping pairs array in error "
 						"code path.  Run chkdsk to "
 						"recover.");
-				make_bad_inode(vi);
-				make_bad_inode(VFS_I(base_ni));
 				NVolSetErrors(vol);
 			}
 			flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 2e5ba0c535d..f5d057e4acc 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1207,8 +1207,6 @@ rl_not_mapped_enoent:
 					"attribute runlist in error code "
 					"path.  Run chkdsk to recover the "
 					"lost cluster.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			status.runlist_merged = 0;
@@ -1239,8 +1237,6 @@ rl_not_mapped_enoent:
 			ntfs_error(vol->sb, "Failed to restore attribute "
 					"record in error code path.  Run "
 					"chkdsk to recover.");
-			make_bad_inode(vi);
-			make_bad_inode(VFS_I(base_ni));
 			NVolSetErrors(vol);
 		} else /* if (success) */ {
 			if (ntfs_mapping_pairs_build(vol, (u8*)a +
@@ -1253,8 +1249,6 @@ rl_not_mapped_enoent:
 						"mapping pairs array in error "
 						"code path.  Run chkdsk to "
 						"recover.");
-				make_bad_inode(vi);
-				make_bad_inode(VFS_I(base_ni));
 				NVolSetErrors(vol);
 			}
 			flush_dcache_mft_record_page(ctx->ntfs_ino);
@@ -1623,11 +1617,8 @@ err_out:
 		unmap_mft_record(base_ni);
 	ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
 			"code %i).", err);
-	if (err != -ENOMEM) {
+	if (err != -ENOMEM)
 		NVolSetErrors(ni->vol);
-		make_bad_inode(VFS_I(base_ni));
-		make_bad_inode(vi);
-	}
 	return err;
 }
 
@@ -1802,8 +1793,6 @@ err_out:
 		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
 				"with error %i.", err);
 		NVolSetErrors(ni->vol);
-		make_bad_inode(VFS_I(base_ni));
-		make_bad_inode(vi);
 	}
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 6499aafc225..7254391b0e5 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -93,6 +93,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 				"Run chkdsk.", ni->mft_no);
 		ntfs_unmap_page(page);
 		page = ERR_PTR(-EIO);
+		NVolSetErrors(vol);
 	}
 err_out:
 	ni->page = NULL;
diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h
index 407de2cef1d..639cd1bab08 100644
--- a/fs/ntfs/mft.h
+++ b/fs/ntfs/mft.h
@@ -97,10 +97,7 @@ extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync);
  * uptodate.
  *
  * On success, clean the mft record and return 0.  On error, leave the mft
- * record dirty and return -errno.  The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode.  We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
  */
 static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
 {
-- 
cgit v1.2.3


From d4faf636d6f8d8940174e38317161eb08a7a57ec Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:05:11 +0000
Subject: NTFS: Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to
 maximum       allowed by NTFS, i.e. 255 Unicode characters, not including the
       terminating NULL (which is not stored on disk).

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/unistr.c  | 51 ++++++++++++++++++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 13e70d4e2fd..41d0381be6e 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -34,6 +34,9 @@ ToDo/Notes:
 	- Add support for sparse files which have a compression unit of 0.
 	- Remove all the make_bad_inode() calls.  This should only be called
 	  from read inode and new inode code paths.
+	- Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to maximum
+	  allowed by NTFS, i.e. 255 Unicode characters, not including the
+	  terminating NULL (which is not stored on disk).
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 0ea887fc859..b123c0fa6bf 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -1,7 +1,7 @@
 /*
  * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -19,6 +19,8 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/slab.h>
+
 #include "types.h"
 #include "debug.h"
 #include "ntfs.h"
@@ -242,7 +244,7 @@ int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
  * map dictates, into a little endian, 2-byte Unicode string.
  *
  * This function allocates the string and the caller is responsible for
- * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it.
+ * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
  *
  * On success the function returns the number of Unicode characters written to
  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
@@ -262,37 +264,48 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 	wchar_t wc;
 	int i, o, wc_len;
 
-	/* We don't trust outside sources. */
-	if (ins) {
+	/* We do not trust outside sources. */
+	if (likely(ins)) {
 		ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
-		if (ucs) {
+		if (likely(ucs)) {
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,
 						&wc);
-				if (wc_len >= 0) {
-					if (wc) {
+				if (likely(wc_len >= 0 &&
+						o < NTFS_MAX_NAME_LEN)) {
+					if (likely(wc)) {
 						ucs[o++] = cpu_to_le16(wc);
 						continue;
-					} /* else (!wc) */
+					} /* else if (!wc) */
 					break;
-				} /* else (wc_len < 0) */
-				goto conversion_err;
+				} /* else if (wc_len < 0 ||
+						o >= NTFS_MAX_NAME_LEN) */
+				goto name_err;
 			}
 			ucs[o] = 0;
 			*outs = ucs;
 			return o;
-		} /* else (!ucs) */
-		ntfs_error(vol->sb, "Failed to allocate name from "
-				"ntfs_name_cache!");
+		} /* else if (!ucs) */
+		ntfs_error(vol->sb, "Failed to allocate buffer for converted "
+				"name from ntfs_name_cache.");
 		return -ENOMEM;
-	} /* else (!ins) */
-	ntfs_error(NULL, "Received NULL pointer.");
+	} /* else if (!ins) */
+	ntfs_error(vol->sb, "Received NULL pointer.");
 	return -EINVAL;
-conversion_err:
-	ntfs_error(vol->sb, "Name using character set %s contains characters "
-			"that cannot be converted to Unicode.", nls->charset);
+name_err:
 	kmem_cache_free(ntfs_name_cache, ucs);
-	return -EILSEQ;
+	if (wc_len < 0) {
+		ntfs_error(vol->sb, "Name using character set %s contains "
+				"characters that cannot be converted to "
+				"Unicode.", nls->charset);
+		i = -EILSEQ;
+	} else /* if (o >= NTFS_MAX_NAME_LEN) */ {
+		ntfs_error(vol->sb, "Name is too long (maximum length for a "
+				"name on NTFS is %d Unicode characters.",
+				NTFS_MAX_NAME_LEN);
+		i = -ENAMETOOLONG;
+	}
+	return i;
 }
 
 /**
-- 
cgit v1.2.3


From 2c2c8c1c211c75d0cc9d7642a569ceac1aecd96d Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:09:40 +0000
Subject: NTFS: Improve comments on file attribute flags in fs/ntfs/layout.h.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/layout.h  | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 41d0381be6e..a3a9d4b9797 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -37,6 +37,7 @@ ToDo/Notes:
 	- Limit name length in fs/ntfs/unistr.c::ntfs_nlstoucs() to maximum
 	  allowed by NTFS, i.e. 255 Unicode characters, not including the
 	  terminating NULL (which is not stored on disk).
+	- Improve comments on file attribute flags in fs/ntfs/layout.h.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index f4283e12070..d34b93cb8b4 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -801,13 +801,16 @@ typedef struct {
 typedef ATTR_RECORD ATTR_REC;
 
 /*
- * File attribute flags (32-bit).
+ * File attribute flags (32-bit) appearing in the file_attributes fields of the
+ * STANDARD_INFORMATION attribute of MFT_RECORDs and the FILENAME_ATTR
+ * attributes of MFT_RECORDs and directory index entries.
+ *
+ * All of the below flags appear in the directory index entries but only some
+ * appear in the STANDARD_INFORMATION attribute whilst only some others appear
+ * in the FILENAME_ATTR attribute of MFT_RECORDs.  Unless otherwise stated the
+ * flags appear in all of the above.
  */
 enum {
-	/*
-	 * The following flags are only present in the STANDARD_INFORMATION
-	 * attribute (in the field file_attributes).
-	 */
 	FILE_ATTR_READONLY		= const_cpu_to_le32(0x00000001),
 	FILE_ATTR_HIDDEN		= const_cpu_to_le32(0x00000002),
 	FILE_ATTR_SYSTEM		= const_cpu_to_le32(0x00000004),
@@ -839,18 +842,14 @@ enum {
 	   F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
 	   is used to to obtain all flags that are valid for setting. */
 	/*
-	 * The following flag is only present in the FILE_NAME attribute (in
-	 * the field file_attributes).
+	 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
+	 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
+	 * attribute of an mft record.
 	 */
 	FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT	= const_cpu_to_le32(0x10000000),
 	/* Note, this is a copy of the corresponding bit from the mft record,
 	   telling us whether this is a directory or not, i.e. whether it has
 	   an index root attribute or not. */
-	/*
-	 * The following flag is present both in the STANDARD_INFORMATION
-	 * attribute and in the FILE_NAME attribute (in the field
-	 * file_attributes).
-	 */
 	FILE_ATTR_DUP_VIEW_INDEX_PRESENT	= const_cpu_to_le32(0x20000000),
 	/* Note, this is a copy of the corresponding bit from the mft record,
 	   telling us whether this file has a view index present (eg. object id
@@ -891,7 +890,7 @@ typedef struct {
 					   Windows this is only updated when
 					   accessed if some time delta has
 					   passed since the last update. Also,
-					   last access times updates can be
+					   last access time updates can be
 					   disabled altogether for speed. */
 /* 32*/	FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
 /* 36*/	union {
-- 
cgit v1.2.3


From a778f217328a7391e0919b6463ec7f143851d12d Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:18:23 +0000
Subject: NTFS: Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode()
 where we       forgot to update a temporary variable so loading index inodes
 which       have an index allocation attribute failed.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/inode.c   | 26 +++++++++++---------------
 fs/ntfs/mft.c     |  5 +----
 3 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index a3a9d4b9797..5fb74e62f53 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -38,6 +38,9 @@ ToDo/Notes:
 	  allowed by NTFS, i.e. 255 Unicode characters, not including the
 	  terminating NULL (which is not stored on disk).
 	- Improve comments on file attribute flags in fs/ntfs/layout.h.
+	- Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode() where we
+	  forgot to update a temporary variable so loading index inodes which
+	  have an index allocation attribute failed.
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index ae341922f42..5f4b23d213b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -19,15 +19,19 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
-#include <linux/quotaops.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
 
 #include "aops.h"
 #include "attrib.h"
+#include "bitmap.h"
 #include "dir.h"
 #include "debug.h"
 #include "inode.h"
@@ -1428,7 +1432,6 @@ err_out:
 			"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
 			base_vi->i_ino);
 	make_bad_inode(vi);
-	make_bad_inode(base_vi);
 	if (err != -ENOMEM)
 		NVolSetErrors(vol);
 	return err;
@@ -1613,6 +1616,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 					"$INDEX_ALLOCATION attribute.");
 		goto unm_err_out;
 	}
+	a = ctx->attr;
 	if (!a->non_resident) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
 				"resident.");
@@ -2845,11 +2849,8 @@ done:
 old_bad_out:
 	old_size = -1;
 bad_out:
-	if (err != -ENOMEM && err != -EOPNOTSUPP) {
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
+	if (err != -ENOMEM && err != -EOPNOTSUPP)
 		NVolSetErrors(vol);
-	}
 	if (err != -EOPNOTSUPP)
 		NInoSetTruncateFailed(ni);
 	else if (old_size >= 0)
@@ -2864,11 +2865,8 @@ out:
 	ntfs_debug("Failed.  Returning error code %i.", err);
 	return err;
 conv_err_out:
-	if (err != -ENOMEM && err != -EOPNOTSUPP) {
-		make_bad_inode(vi);
-		make_bad_inode(VFS_I(base_ni));
+	if (err != -ENOMEM && err != -EOPNOTSUPP)
 		NVolSetErrors(vol);
-	}
 	if (err != -EOPNOTSUPP)
 		NInoSetTruncateFailed(ni);
 	else
@@ -3116,9 +3114,7 @@ err_out:
 				"retries later.");
 		mark_inode_dirty(vi);
 	} else {
-		ntfs_error(vi->i_sb, "Failed (error code %i):  Marking inode "
-				"as bad.  You should run chkdsk.", -err);
-		make_bad_inode(vi);
+		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
 		NVolSetErrors(ni->vol);
 	}
 	return err;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 7254391b0e5..eb3eb143a32 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -651,10 +651,7 @@ err_out:
  * fs/ntfs/aops.c::mark_ntfs_record_dirty().
  *
  * On success, clean the mft record and return 0.  On error, leave the mft
- * record dirty and return -errno.  The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode.  We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
  *
  * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
  * However, if the mft record has a counterpart in the mft mirror and @sync is
-- 
cgit v1.2.3


From 20fdcf1d543b1285ef8b1c1993a9221f2eda52dc Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:21:02 +0000
Subject: NTFS: Add a missing call to flush_dcache_mft_record_page() in      
 fs/ntfs/inode.c::ntfs_write_inode().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/inode.c   | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 5fb74e62f53..d20031587bb 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -41,6 +41,8 @@ ToDo/Notes:
 	- Fix a bug in fs/ntfs/inode.c::ntfs_read_locked_index_inode() where we
 	  forgot to update a temporary variable so loading index inodes which
 	  have an index allocation attribute failed.
+	- Add a missing call to flush_dcache_mft_record_page() in
+	  fs/ntfs/inode.c::ntfs_write_inode().
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 5f4b23d213b..73791b2d949 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -3064,9 +3064,12 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	 * record will be cleaned and written out to disk below, i.e. before
 	 * this function returns.
 	 */
-	if (modified && !NInoTestSetDirty(ctx->ntfs_ino))
-		mark_ntfs_record_dirty(ctx->ntfs_ino->page,
-				ctx->ntfs_ino->page_ofs);
+	if (modified) {
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		if (!NInoTestSetDirty(ctx->ntfs_ino)) {
+			mark_ntfs_record_dirty(ctx->ntfs_ino->page,
+					ctx->ntfs_ino->page_ofs);
+	}
 	ntfs_attr_put_search_ctx(ctx);
 	/* Now the access times are updated, write the base mft record. */
 	if (NInoDirty(ni))
-- 
cgit v1.2.3


From 834ba600cefe6847acaebe5e8e984476dfeebf55 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 16:25:23 +0000
Subject: NTFS: Handle the recently introduced -ENAMETOOLONG return value from 
      fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 4 ++--
 fs/ntfs/namei.c   | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index d20031587bb..9fb08ef3a7f 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -16,8 +16,6 @@ ToDo/Notes:
 	  inode having been discarded already.  Whether this can actually ever
 	  happen is unclear however so it is worth waiting until someone hits
 	  the problem.
-	- Enable the code for setting the NT4 compatibility flag when we start
-	  making NTFS 1.2 specific modifications.
 
 2.1.27 - Various bug fixes and cleanups.
 
@@ -43,6 +41,8 @@ ToDo/Notes:
 	  have an index allocation attribute failed.
 	- Add a missing call to flush_dcache_mft_record_page() in
 	  fs/ntfs/inode.c::ntfs_write_inode().
+	- Handle the recently introduced -ENAMETOOLONG return value from
+	  fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 78e0cf738e2..eddb2247cec 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -115,7 +115,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
 			&uname);
 	if (uname_len < 0) {
-		ntfs_error(vol->sb, "Failed to convert name to Unicode.");
+		if (uname_len != -ENAMETOOLONG)
+			ntfs_error(vol->sb, "Failed to convert name to "
+					"Unicode.");
 		return ERR_PTR(uname_len);
 	}
 	mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len,
@@ -157,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 		/* Return the error code. */
 		return (struct dentry *)dent_inode;
 	}
-	/* It is guaranteed that name is no longer allocated at this point. */
+	/* It is guaranteed that @name is no longer allocated at this point. */
 	if (MREF_ERR(mref) == -ENOENT) {
 		ntfs_debug("Entry was not found, adding negative dentry.");
 		/* The dcache will handle negative entries. */
@@ -168,7 +170,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error "
 			"code %i.", -MREF_ERR(mref));
 	return ERR_PTR(MREF_ERR(mref));
-
 	// TODO: Consider moving this lot to a separate function! (AIA)
 handle_name:
    {
-- 
cgit v1.2.3


From 4e5e529ad684f1b3fba957f5dd4eb7c2b534ee92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 23 Mar 2006 16:57:48 +0000
Subject: NTFS: Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  1 +
 fs/ntfs/aops.c     |  6 +++---
 fs/ntfs/compress.c |  4 ++--
 fs/ntfs/inode.c    | 10 ++++-----
 fs/ntfs/inode.h    | 13 ++++++------
 fs/ntfs/mft.c      | 62 +++++++++++++++++++++++++++---------------------------
 fs/ntfs/ntfs.h     |  2 +-
 fs/ntfs/super.c    | 42 ++++++++++++++++++------------------
 8 files changed, 71 insertions(+), 69 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9fb08ef3a7f..35cc4b1d60f 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -43,6 +43,7 @@ ToDo/Notes:
 	  fs/ntfs/inode.c::ntfs_write_inode().
 	- Handle the recently introduced -ENAMETOOLONG return value from
 	  fs/ntfs/unistr.c::ntfs_nlstoucs() in fs/ntfs/namei.c::ntfs_lookup().
+	- Semaphore to mutex conversion.  (Ingo Molnar)
 
 2.1.26 - Minor bug fixes and updates.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 1cf105b9920..580412d330c 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1278,18 +1278,18 @@ unm_done:
 		
 		tni = locked_nis[nr_locked_nis];
 		/* Get the base inode. */
-		down(&tni->extent_lock);
+		mutex_lock(&tni->extent_lock);
 		if (tni->nr_extents >= 0)
 			base_tni = tni;
 		else {
 			base_tni = tni->ext.base_ntfs_ino;
 			BUG_ON(!base_tni);
 		}
-		up(&tni->extent_lock);
+		mutex_unlock(&tni->extent_lock);
 		ntfs_debug("Unlocking %s inode 0x%lx.",
 				tni == base_tni ? "base" : "extent",
 				tni->mft_no);
-		up(&tni->mrec_lock);
+		mutex_unlock(&tni->mrec_lock);
 		atomic_dec(&tni->count);
 		iput(VFS_I(base_tni));
 	}
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 25d24106f89..68a607ff9fd 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -67,7 +67,7 @@ static DEFINE_SPINLOCK(ntfs_cb_lock);
 /**
  * allocate_compression_buffers - allocate the decompression buffers
  *
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
  *
  * Return 0 on success or -ENOMEM if the allocations failed.
  */
@@ -84,7 +84,7 @@ int allocate_compression_buffers(void)
 /**
  * free_compression_buffers - free the decompression buffers
  *
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
  */
 void free_compression_buffers(void)
 {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 73791b2d949..4c86b7e1d1e 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -388,7 +388,7 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	atomic_set(&ni->count, 1);
 	ni->vol = NTFS_SB(sb);
 	ntfs_init_runlist(&ni->runlist);
-	init_MUTEX(&ni->mrec_lock);
+	mutex_init(&ni->mrec_lock);
 	ni->page = NULL;
 	ni->page_ofs = 0;
 	ni->attr_list_size = 0;
@@ -400,7 +400,7 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->itype.index.collation_rule = 0;
 	ni->itype.index.block_size_bits = 0;
 	ni->itype.index.vcn_size_bits = 0;
-	init_MUTEX(&ni->extent_lock);
+	mutex_init(&ni->extent_lock);
 	ni->nr_extents = 0;
 	ni->ext.base_ntfs_ino = NULL;
 }
@@ -3066,7 +3066,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	 */
 	if (modified) {
 		flush_dcache_mft_record_page(ctx->ntfs_ino);
-		if (!NInoTestSetDirty(ctx->ntfs_ino)) {
+		if (!NInoTestSetDirty(ctx->ntfs_ino))
 			mark_ntfs_record_dirty(ctx->ntfs_ino->page,
 					ctx->ntfs_ino->page_ofs);
 	}
@@ -3075,7 +3075,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	if (NInoDirty(ni))
 		err = write_mft_record(ni, m, sync);
 	/* Write all attached extent mft records. */
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (ni->nr_extents > 0) {
 		ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
 		int i;
@@ -3102,7 +3102,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 			}
 		}
 	}
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	unmap_mft_record(ni);
 	if (unlikely(err))
 		goto err_out;
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 3de5c023196..f088291e017 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -24,12 +24,13 @@
 #ifndef _LINUX_NTFS_INODE_H
 #define _LINUX_NTFS_INODE_H
 
-#include <linux/mm.h>
+#include <asm/atomic.h>
+
 #include <linux/fs.h>
-#include <linux/seq_file.h>
 #include <linux/list.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
 
 #include "layout.h"
 #include "volume.h"
@@ -81,7 +82,7 @@ struct _ntfs_inode {
 	 * The following fields are only valid for real inodes and extent
 	 * inodes.
 	 */
-	struct semaphore mrec_lock; /* Lock for serializing access to the
+	struct mutex mrec_lock;	/* Lock for serializing access to the
 				   mft record belonging to this inode. */
 	struct page *page;	/* The page containing the mft record of the
 				   inode. This should only be touched by the
@@ -119,7 +120,7 @@ struct _ntfs_inode {
 			u8 block_clusters;	/* Number of clusters per cb. */
 		} compressed;
 	} itype;
-	struct semaphore extent_lock;	/* Lock for accessing/modifying the
+	struct mutex extent_lock;	/* Lock for accessing/modifying the
 					   below . */
 	s32 nr_extents;	/* For a base mft record, the number of attached extent
 			   inodes (0 if none), for extent records and for fake
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index eb3eb143a32..4e72bc7afdf 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -105,8 +105,8 @@ err_out:
  * map_mft_record - map, pin and lock an mft record
  * @ni:		ntfs inode whose MFT record to map
  *
- * First, take the mrec_lock semaphore. We might now be sleeping, while waiting
- * for the semaphore if it was already locked by someone else.
+ * First, take the mrec_lock mutex.  We might now be sleeping, while waiting
+ * for the mutex if it was already locked by someone else.
  *
  * The page of the record is mapped using map_mft_record_page() before being
  * returned to the caller.
@@ -136,9 +136,9 @@ err_out:
  * So that code will end up having to own the mrec_lock of all mft
  * records/inodes present in the page before I/O can proceed. In that case we
  * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
- * accessing anything without owning the mrec_lock semaphore. But we do need
- * to use them because of the read_cache_page() invocation and the code becomes
- * so much simpler this way that it is well worth it.
+ * accessing anything without owning the mrec_lock mutex.  But we do need to
+ * use them because of the read_cache_page() invocation and the code becomes so
+ * much simpler this way that it is well worth it.
  *
  * The mft record is now ours and we return a pointer to it. You need to check
  * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
@@ -161,13 +161,13 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni)
 	atomic_inc(&ni->count);
 
 	/* Serialize access to this mft record. */
-	down(&ni->mrec_lock);
+	mutex_lock(&ni->mrec_lock);
 
 	m = map_mft_record_page(ni);
 	if (likely(!IS_ERR(m)))
 		return m;
 
-	up(&ni->mrec_lock);
+	mutex_unlock(&ni->mrec_lock);
 	atomic_dec(&ni->count);
 	ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
 	return m;
@@ -218,7 +218,7 @@ void unmap_mft_record(ntfs_inode *ni)
 	ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
 
 	unmap_mft_record_page(ni);
-	up(&ni->mrec_lock);
+	mutex_unlock(&ni->mrec_lock);
 	atomic_dec(&ni->count);
 	/*
 	 * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
@@ -262,7 +262,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
 	 * in which case just return it. If not found, add it to the base
 	 * inode before returning it.
 	 */
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 	if (base_ni->nr_extents > 0) {
 		extent_nis = base_ni->ext.extent_ntfs_inos;
 		for (i = 0; i < base_ni->nr_extents; i++) {
@@ -275,7 +275,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
 		}
 	}
 	if (likely(ni != NULL)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		/* We found the record; just have to map and return it. */
 		m = map_mft_record(ni);
@@ -302,7 +302,7 @@ map_err_out:
 	/* Record wasn't there. Get a new ntfs inode and initialize it. */
 	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
 	if (unlikely(!ni)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -313,7 +313,7 @@ map_err_out:
 	/* Now map the record. */
 	m = map_mft_record(ni);
 	if (IS_ERR(m)) {
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		atomic_dec(&base_ni->count);
 		ntfs_clear_extent_inode(ni);
 		goto map_err_out;
@@ -348,14 +348,14 @@ map_err_out:
 		base_ni->ext.extent_ntfs_inos = tmp;
 	}
 	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	atomic_dec(&base_ni->count);
 	ntfs_debug("Done 2.");
 	*ntfs_ino = ni;
 	return m;
 unm_err_out:
 	unmap_mft_record(ni);
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	atomic_dec(&base_ni->count);
 	/*
 	 * If the extent inode was not attached to the base inode we need to
@@ -400,12 +400,12 @@ void __mark_mft_record_dirty(ntfs_inode *ni)
 	BUG_ON(NInoAttr(ni));
 	mark_ntfs_record_dirty(ni->page, ni->page_ofs);
 	/* Determine the base vfs inode and mark it dirty, too. */
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (likely(ni->nr_extents >= 0))
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 }
 
@@ -981,7 +981,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 		}
 		ntfs_debug("Inode 0x%lx is not dirty.", mft_no);
 		/* The inode is not dirty, try to take the mft record lock. */
-		if (unlikely(down_trylock(&ni->mrec_lock))) {
+		if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
 			ntfs_debug("Mft record 0x%lx is already locked, do "
 					"not write it.", mft_no);
 			atomic_dec(&ni->count);
@@ -1041,13 +1041,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	 * corresponding to this extent mft record attached.
 	 */
 	ni = NTFS_I(vi);
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	if (ni->nr_extents <= 0) {
 		/*
 		 * The base inode has no attached extent inodes, write this
 		 * extent mft record.
 		 */
-		up(&ni->extent_lock);
+		mutex_unlock(&ni->extent_lock);
 		iput(vi);
 		ntfs_debug("Base inode 0x%lx has no attached extent inodes, "
 				"write the extent record.", na.mft_no);
@@ -1070,7 +1070,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	 * extent mft record.
 	 */
 	if (!eni) {
-		up(&ni->extent_lock);
+		mutex_unlock(&ni->extent_lock);
 		iput(vi);
 		ntfs_debug("Extent inode 0x%lx is not attached to its base "
 				"inode 0x%lx, write the extent record.",
@@ -1081,12 +1081,12 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 			mft_no, na.mft_no);
 	/* Take a reference to the extent ntfs inode. */
 	atomic_inc(&eni->count);
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 	/*
 	 * Found the extent inode coresponding to this extent mft record.
 	 * Try to take the mft record lock.
 	 */
-	if (unlikely(down_trylock(&eni->mrec_lock))) {
+	if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
 		atomic_dec(&eni->count);
 		iput(vi);
 		ntfs_debug("Extent mft record 0x%lx is already locked, do "
@@ -2709,7 +2709,7 @@ mft_rec_already_initialized:
 		 * have its page mapped and it is very easy to do.
 		 */
 		atomic_inc(&ni->count);
-		down(&ni->mrec_lock);
+		mutex_lock(&ni->mrec_lock);
 		ni->page = page;
 		ni->page_ofs = ofs;
 		/*
@@ -2796,22 +2796,22 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
 	BUG_ON(NInoAttr(ni));
 	BUG_ON(ni->nr_extents != -1);
 
-	down(&ni->extent_lock);
+	mutex_lock(&ni->extent_lock);
 	base_ni = ni->ext.base_ntfs_ino;
-	up(&ni->extent_lock);
+	mutex_unlock(&ni->extent_lock);
 
 	BUG_ON(base_ni->nr_extents <= 0);
 
 	ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n",
 			mft_no, base_ni->mft_no);
 
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 
 	/* Make sure we are holding the only reference to the extent inode. */
 	if (atomic_read(&ni->count) > 2) {
 		ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, "
 				"not freeing.", base_ni->mft_no);
-		up(&base_ni->extent_lock);
+		mutex_unlock(&base_ni->extent_lock);
 		return -EBUSY;
 	}
 
@@ -2829,7 +2829,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
 		break;
 	}
 
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 
 	if (unlikely(err)) {
 		ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to "
@@ -2888,7 +2888,7 @@ rollback_error:
 	return 0;
 rollback:
 	/* Rollback what we did... */
-	down(&base_ni->extent_lock);
+	mutex_lock(&base_ni->extent_lock);
 	extent_nis = base_ni->ext.extent_ntfs_inos;
 	if (!(base_ni->nr_extents & 3)) {
 		int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
@@ -2897,7 +2897,7 @@ rollback:
 		if (unlikely(!extent_nis)) {
 			ntfs_error(vol->sb, "Failed to allocate internal "
 					"buffer during rollback.%s", es);
-			up(&base_ni->extent_lock);
+			mutex_unlock(&base_ni->extent_lock);
 			NVolSetErrors(vol);
 			goto rollback_error;
 		}
@@ -2912,7 +2912,7 @@ rollback:
 	m->flags |= MFT_RECORD_IN_USE;
 	m->sequence_number = old_seq_no;
 	extent_nis[base_ni->nr_extents++] = ni;
-	up(&base_ni->extent_lock);
+	mutex_unlock(&base_ni->extent_lock);
 	mark_mft_record_dirty(ni);
 	return err;
 }
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 653d2a5c489..0624c8ef4d9 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -91,7 +91,7 @@ extern void free_compression_buffers(void);
 
 /* From fs/ntfs/super.c */
 #define default_upcase_len 0x10000
-extern struct semaphore ntfs_lock;
+extern struct mutex ntfs_lock;
 
 typedef struct {
 	int val;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index fd4aecc5548..6816edafe4d 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1677,11 +1677,11 @@ read_partial_upcase_page:
 	ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
 			i_size, 64 * 1024 * sizeof(ntfschar));
 	iput(ino);
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (!default_upcase) {
 		ntfs_debug("Using volume specified $UpCase since default is "
 				"not present.");
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		return TRUE;
 	}
 	max = default_upcase_len;
@@ -1695,12 +1695,12 @@ read_partial_upcase_page:
 		vol->upcase = default_upcase;
 		vol->upcase_len = max;
 		ntfs_nr_upcase_users++;
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		ntfs_debug("Volume specified $UpCase matches default. Using "
 				"default.");
 		return TRUE;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	ntfs_debug("Using volume specified $UpCase since it does not match "
 			"the default.");
 	return TRUE;
@@ -1709,17 +1709,17 @@ iput_upcase_failed:
 	ntfs_free(vol->upcase);
 	vol->upcase = NULL;
 upcase_failed:
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (default_upcase) {
 		vol->upcase = default_upcase;
 		vol->upcase_len = default_upcase_len;
 		ntfs_nr_upcase_users++;
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		ntfs_error(sb, "Failed to load $UpCase from the volume. Using "
 				"default.");
 		return TRUE;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	ntfs_error(sb, "Failed to initialize upcase table.");
 	return FALSE;
 }
@@ -2195,12 +2195,12 @@ iput_attrdef_err_out:
 iput_upcase_err_out:
 #endif /* NTFS_RW */
 	vol->upcase_len = 0;
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -2405,7 +2405,7 @@ static void ntfs_put_super(struct super_block *sb)
 	 * Destroy the global default upcase table if necessary.  Also decrease
 	 * the number of upcase users if we are a user.
 	 */
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
@@ -2416,7 +2416,7 @@ static void ntfs_put_super(struct super_block *sb)
 	}
 	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
 		free_compression_buffers();
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -2890,7 +2890,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 			ntfs_error(sb, "Failed to load essential metadata.");
 		goto iput_tmp_ino_err_out_now;
 	}
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	/*
 	 * The current mount is a compression user if the cluster size is
 	 * less than or equal 4kiB.
@@ -2901,7 +2901,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 			ntfs_error(NULL, "Failed to allocate buffers "
 					"for compression engine.");
 			ntfs_nr_compression_users--;
-			up(&ntfs_lock);
+			mutex_unlock(&ntfs_lock);
 			goto iput_tmp_ino_err_out_now;
 		}
 	}
@@ -2913,7 +2913,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	if (!default_upcase)
 		default_upcase = generate_default_upcase();
 	ntfs_nr_upcase_users++;
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	/*
 	 * From now on, ignore @silent parameter. If we fail below this line,
 	 * it will be due to a corrupt fs or a system error, so we report it.
@@ -2931,12 +2931,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		atomic_inc(&vol->root_ino->i_count);
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
-		down(&ntfs_lock);
+		mutex_lock(&ntfs_lock);
 		if (!--ntfs_nr_upcase_users && default_upcase) {
 			ntfs_free(default_upcase);
 			default_upcase = NULL;
 		}
-		up(&ntfs_lock);
+		mutex_unlock(&ntfs_lock);
 		sb->s_export_op = &ntfs_export_ops;
 		lock_kernel();
 		return 0;
@@ -3004,12 +3004,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		vol->attrdef = NULL;
 	}
 	vol->upcase_len = 0;
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (vol->upcase == default_upcase) {
 		ntfs_nr_upcase_users--;
 		vol->upcase = NULL;
 	}
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 	if (vol->upcase) {
 		ntfs_free(vol->upcase);
 		vol->upcase = NULL;
@@ -3024,14 +3024,14 @@ unl_upcase_iput_tmp_ino_err_out_now:
 	 * Decrease the number of upcase users and destroy the global default
 	 * upcase table if necessary.
 	 */
-	down(&ntfs_lock);
+	mutex_lock(&ntfs_lock);
 	if (!--ntfs_nr_upcase_users && default_upcase) {
 		ntfs_free(default_upcase);
 		default_upcase = NULL;
 	}
 	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
 		free_compression_buffers();
-	up(&ntfs_lock);
+	mutex_unlock(&ntfs_lock);
 iput_tmp_ino_err_out_now:
 	iput(tmp_ino);
 	if (vol->mft_ino && vol->mft_ino != tmp_ino)
@@ -3091,7 +3091,7 @@ struct kmem_cache *ntfs_attr_ctx_cache;
 struct kmem_cache *ntfs_index_ctx_cache;
 
 /* Driver wide semaphore. */
-DECLARE_MUTEX(ntfs_lock);
+DEFINE_MUTEX(ntfs_lock);
 
 static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
-- 
cgit v1.2.3


From e750d1c7cc314b9ba1934b0b474b7d39f906f865 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 23 Mar 2006 17:04:12 +0000
Subject: NTFS: 2.1.27 - Various bug fixes and cleanups.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6816edafe4d..7646b505938 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3090,7 +3090,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep,
 struct kmem_cache *ntfs_attr_ctx_cache;
 struct kmem_cache *ntfs_index_ctx_cache;
 
-/* Driver wide semaphore. */
+/* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
 static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
-- 
cgit v1.2.3


From b86ff981a8252d83d6a7719ae09f3a05307e3592 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 23 Mar 2006 19:56:55 +0100
Subject: [PATCH] relay: migrate from relayfs to a generic relay API

Original patch from Paul Mundt, sysfs parts removed by me since they
were broken.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 fs/Kconfig           |  12 --
 fs/Makefile          |   1 -
 fs/relayfs/Makefile  |   4 -
 fs/relayfs/buffers.c | 190 -----------------
 fs/relayfs/buffers.h |  12 --
 fs/relayfs/inode.c   | 581 ---------------------------------------------------
 fs/relayfs/relay.c   | 482 ------------------------------------------
 fs/relayfs/relay.h   |   8 -
 8 files changed, 1290 deletions(-)
 delete mode 100644 fs/relayfs/Makefile
 delete mode 100644 fs/relayfs/buffers.c
 delete mode 100644 fs/relayfs/buffers.h
 delete mode 100644 fs/relayfs/inode.c
 delete mode 100644 fs/relayfs/relay.c
 delete mode 100644 fs/relayfs/relay.h

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index e9749b0eecd..c8d0a209120 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -859,18 +859,6 @@ config RAMFS
 	  To compile this as a module, choose M here: the module will be called
 	  ramfs.
 
-config RELAYFS_FS
-	tristate "Relayfs file system support"
-	---help---
-	  Relayfs is a high-speed data relay filesystem designed to provide
-	  an efficient mechanism for tools and facilities to relay large
-	  amounts of data from kernel space to user space.
-
-	  To compile this code as a module, choose M here: the module will be
-	  called relayfs.
-
-	  If unsure, say N.
-
 config CONFIGFS_FS
 	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 1db711319c8..080b3867be4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -91,7 +91,6 @@ obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_UDF_FS)		+= udf/
-obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
deleted file mode 100644
index e76e182cdb3..00000000000
--- a/fs/relayfs/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-obj-$(CONFIG_RELAYFS_FS) += relayfs.o
-
-relayfs-y := relay.o inode.o buffers.o
-
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
deleted file mode 100644
index 10187812771..00000000000
--- a/fs/relayfs/buffers.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * RelayFS buffer management code.
- *
- * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
- * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
- *
- * This file is released under the GPL.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-/*
- * close() vm_op implementation for relayfs file mapping.
- */
-static void relay_file_mmap_close(struct vm_area_struct *vma)
-{
-	struct rchan_buf *buf = vma->vm_private_data;
-	buf->chan->cb->buf_unmapped(buf, vma->vm_file);
-}
-
-/*
- * nopage() vm_op implementation for relayfs file mapping.
- */
-static struct page *relay_buf_nopage(struct vm_area_struct *vma,
-				     unsigned long address,
-				     int *type)
-{
-	struct page *page;
-	struct rchan_buf *buf = vma->vm_private_data;
-	unsigned long offset = address - vma->vm_start;
-
-	if (address > vma->vm_end)
-		return NOPAGE_SIGBUS; /* Disallow mremap */
-	if (!buf)
-		return NOPAGE_OOM;
-
-	page = vmalloc_to_page(buf->start + offset);
-	if (!page)
-		return NOPAGE_OOM;
-	get_page(page);
-
-	if (type)
-		*type = VM_FAULT_MINOR;
-
-	return page;
-}
-
-/*
- * vm_ops for relay file mappings.
- */
-static struct vm_operations_struct relay_file_mmap_ops = {
-	.nopage = relay_buf_nopage,
-	.close = relay_file_mmap_close,
-};
-
-/**
- *	relay_mmap_buf: - mmap channel buffer to process address space
- *	@buf: relay channel buffer
- *	@vma: vm_area_struct describing memory to be mapped
- *
- *	Returns 0 if ok, negative on error
- *
- *	Caller should already have grabbed mmap_sem.
- */
-int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
-{
-	unsigned long length = vma->vm_end - vma->vm_start;
-	struct file *filp = vma->vm_file;
-
-	if (!buf)
-		return -EBADF;
-
-	if (length != (unsigned long)buf->chan->alloc_size)
-		return -EINVAL;
-
-	vma->vm_ops = &relay_file_mmap_ops;
-	vma->vm_private_data = buf;
-	buf->chan->cb->buf_mapped(buf, filp);
-
-	return 0;
-}
-
-/**
- *	relay_alloc_buf - allocate a channel buffer
- *	@buf: the buffer struct
- *	@size: total size of the buffer
- *
- *	Returns a pointer to the resulting buffer, NULL if unsuccessful
- */
-static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
-{
-	void *mem;
-	unsigned int i, j, n_pages;
-
-	size = PAGE_ALIGN(size);
-	n_pages = size >> PAGE_SHIFT;
-
-	buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
-	if (!buf->page_array)
-		return NULL;
-
-	for (i = 0; i < n_pages; i++) {
-		buf->page_array[i] = alloc_page(GFP_KERNEL);
-		if (unlikely(!buf->page_array[i]))
-			goto depopulate;
-	}
-	mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
-	if (!mem)
-		goto depopulate;
-
-	memset(mem, 0, size);
-	buf->page_count = n_pages;
-	return mem;
-
-depopulate:
-	for (j = 0; j < i; j++)
-		__free_page(buf->page_array[j]);
-	kfree(buf->page_array);
-	return NULL;
-}
-
-/**
- *	relay_create_buf - allocate and initialize a channel buffer
- *	@alloc_size: size of the buffer to allocate
- *	@n_subbufs: number of sub-buffers in the channel
- *
- *	Returns channel buffer if successful, NULL otherwise
- */
-struct rchan_buf *relay_create_buf(struct rchan *chan)
-{
-	struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
-	if (!buf)
-		return NULL;
-
-	buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
-	if (!buf->padding)
-		goto free_buf;
-
-	buf->start = relay_alloc_buf(buf, chan->alloc_size);
-	if (!buf->start)
-		goto free_buf;
-
-	buf->chan = chan;
-	kref_get(&buf->chan->kref);
-	return buf;
-
-free_buf:
-	kfree(buf->padding);
-	kfree(buf);
-	return NULL;
-}
-
-/**
- *	relay_destroy_buf - destroy an rchan_buf struct and associated buffer
- *	@buf: the buffer struct
- */
-void relay_destroy_buf(struct rchan_buf *buf)
-{
-	struct rchan *chan = buf->chan;
-	unsigned int i;
-
-	if (likely(buf->start)) {
-		vunmap(buf->start);
-		for (i = 0; i < buf->page_count; i++)
-			__free_page(buf->page_array[i]);
-		kfree(buf->page_array);
-	}
-	kfree(buf->padding);
-	kfree(buf);
-	kref_put(&chan->kref, relay_destroy_channel);
-}
-
-/**
- *	relay_remove_buf - remove a channel buffer
- *
- *	Removes the file from the relayfs fileystem, which also frees the
- *	rchan_buf_struct and the channel buffer.  Should only be called from
- *	kref_put().
- */
-void relay_remove_buf(struct kref *kref)
-{
-	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-	buf->chan->cb->remove_buf_file(buf->dentry);
-	relay_destroy_buf(buf);
-}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
deleted file mode 100644
index 37a12493f64..00000000000
--- a/fs/relayfs/buffers.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _BUFFERS_H
-#define _BUFFERS_H
-
-/* This inspired by rtai/shmem */
-#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
-
-extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
-extern struct rchan_buf *relay_create_buf(struct rchan *chan);
-extern void relay_destroy_buf(struct rchan_buf *buf);
-extern void relay_remove_buf(struct kref *kref);
-
-#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
deleted file mode 100644
index 383523011aa..00000000000
--- a/fs/relayfs/inode.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * VFS-related code for RelayFS, a high-speed data relay filesystem.
- *
- * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
- * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
- *
- * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
- *
- * This file is released under the GPL.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-#define RELAYFS_MAGIC			0xF0B4A981
-
-static struct vfsmount *		relayfs_mount;
-static int				relayfs_mount_count;
-
-static struct backing_dev_info		relayfs_backing_dev_info = {
-	.ra_pages	= 0,	/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
-static struct inode *relayfs_get_inode(struct super_block *sb,
-				       int mode,
- 				       struct file_operations *fops,
-				       void *data)
-{
-	struct inode *inode;
-
-	inode = new_inode(sb);
-	if (!inode)
-		return NULL;
-
-	inode->i_mode = mode;
-	inode->i_uid = 0;
-	inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
-	inode->i_blocks = 0;
-	inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	switch (mode & S_IFMT) {
-	case S_IFREG:
-		inode->i_fop = fops;
-		if (data)
-			inode->u.generic_ip = data;
-		break;
-	case S_IFDIR:
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-
-		/* directory inodes start off with i_nlink == 2 (for "." entry) */
-		inode->i_nlink++;
-		break;
-	default:
-		break;
-	}
-
-	return inode;
-}
-
-/**
- *	relayfs_create_entry - create a relayfs directory or file
- *	@name: the name of the file to create
- *	@parent: parent directory
- *	@mode: mode
- *	@fops: file operations to use for the file
- *	@data: user-associated data for this file
- *
- *	Returns the new dentry, NULL on failure
- *
- *	Creates a file or directory with the specifed permissions.
- */
-static struct dentry *relayfs_create_entry(const char *name,
-					   struct dentry *parent,
-					   int mode,
-					   struct file_operations *fops,
-					   void *data)
-{
-	struct dentry *d;
-	struct inode *inode;
-	int error = 0;
-
-	BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
-
-	error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
-	if (error) {
-		printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
-		return NULL;
-	}
-
-	if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
-		parent = relayfs_mount->mnt_sb->s_root;
-
-	if (!parent) {
-		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-		return NULL;
-	}
-
-	parent = dget(parent);
-	mutex_lock(&parent->d_inode->i_mutex);
-	d = lookup_one_len(name, parent, strlen(name));
-	if (IS_ERR(d)) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	if (d->d_inode) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
-	if (!inode) {
-		d = NULL;
-		goto release_mount;
-	}
-
-	d_instantiate(d, inode);
-	dget(d);	/* Extra count - pin the dentry in core */
-
-	if (S_ISDIR(mode))
-		parent->d_inode->i_nlink++;
-
-	goto exit;
-
-release_mount:
-	simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-
-exit:
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
-	return d;
-}
-
-/**
- *	relayfs_create_file - create a file in the relay filesystem
- *	@name: the name of the file to create
- *	@parent: parent directory
- *	@mode: mode, if not specied the default perms are used
- *	@fops: file operations to use for the file
- *	@data: user-associated data for this file
- *
- *	Returns file dentry if successful, NULL otherwise.
- *
- *	The file will be created user r on behalf of current user.
- */
-struct dentry *relayfs_create_file(const char *name,
-				   struct dentry *parent,
-				   int mode,
-				   struct file_operations *fops,
-				   void *data)
-{
-	BUG_ON(!fops);
-
-	if (!mode)
-		mode = S_IRUSR;
-	mode = (mode & S_IALLUGO) | S_IFREG;
-
-	return relayfs_create_entry(name, parent, mode, fops, data);
-}
-
-/**
- *	relayfs_create_dir - create a directory in the relay filesystem
- *	@name: the name of the directory to create
- *	@parent: parent directory, NULL if parent should be fs root
- *
- *	Returns directory dentry if successful, NULL otherwise.
- *
- *	The directory will be created world rwx on behalf of current user.
- */
-struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
-{
-	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	return relayfs_create_entry(name, parent, mode, NULL, NULL);
-}
-
-/**
- *	relayfs_remove - remove a file or directory in the relay filesystem
- *	@dentry: file or directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove(struct dentry *dentry)
-{
-	struct dentry *parent;
-	int error = 0;
-
-	if (!dentry)
-		return -EINVAL;
-	parent = dentry->d_parent;
-	if (!parent)
-		return -EINVAL;
-
-	parent = dget(parent);
-	mutex_lock(&parent->d_inode->i_mutex);
-	if (dentry->d_inode) {
-		if (S_ISDIR(dentry->d_inode->i_mode))
-			error = simple_rmdir(parent->d_inode, dentry);
-		else
-			error = simple_unlink(parent->d_inode, dentry);
-		if (!error)
-			d_delete(dentry);
-	}
-	if (!error)
-		dput(dentry);
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
-
-	if (!error)
-		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
-
-	return error;
-}
-
-/**
- *	relayfs_remove_file - remove a file from relay filesystem
- *	@dentry: directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove_file(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/**
- *	relayfs_remove_dir - remove a directory in the relay filesystem
- *	@dentry: directory dentry
- *
- *	Returns 0 if successful, negative otherwise.
- */
-int relayfs_remove_dir(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/**
- *	relay_file_open - open file op for relay files
- *	@inode: the inode
- *	@filp: the file
- *
- *	Increments the channel buffer refcount.
- */
-static int relay_file_open(struct inode *inode, struct file *filp)
-{
-	struct rchan_buf *buf = inode->u.generic_ip;
-	kref_get(&buf->kref);
-	filp->private_data = buf;
-
-	return 0;
-}
-
-/**
- *	relay_file_mmap - mmap file op for relay files
- *	@filp: the file
- *	@vma: the vma describing what to map
- *
- *	Calls upon relay_mmap_buf to map the file into user space.
- */
-static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct rchan_buf *buf = filp->private_data;
-	return relay_mmap_buf(buf, vma);
-}
-
-/**
- *	relay_file_poll - poll file op for relay files
- *	@filp: the file
- *	@wait: poll table
- *
- *	Poll implemention.
- */
-static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
-{
-	unsigned int mask = 0;
-	struct rchan_buf *buf = filp->private_data;
-
-	if (buf->finalized)
-		return POLLERR;
-
-	if (filp->f_mode & FMODE_READ) {
-		poll_wait(filp, &buf->read_wait, wait);
-		if (!relay_buf_empty(buf))
-			mask |= POLLIN | POLLRDNORM;
-	}
-
-	return mask;
-}
-
-/**
- *	relay_file_release - release file op for relay files
- *	@inode: the inode
- *	@filp: the file
- *
- *	Decrements the channel refcount, as the filesystem is
- *	no longer using it.
- */
-static int relay_file_release(struct inode *inode, struct file *filp)
-{
-	struct rchan_buf *buf = filp->private_data;
-	kref_put(&buf->kref, relay_remove_buf);
-
-	return 0;
-}
-
-/**
- *	relay_file_read_consume - update the consumed count for the buffer
- */
-static void relay_file_read_consume(struct rchan_buf *buf,
-				    size_t read_pos,
-				    size_t bytes_consumed)
-{
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-	size_t read_subbuf;
-
-	if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
-		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
-		buf->bytes_consumed = 0;
-	}
-
-	buf->bytes_consumed += bytes_consumed;
-	read_subbuf = read_pos / buf->chan->subbuf_size;
-	if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
-		if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
-		    (buf->offset == subbuf_size))
-			return;
-		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
-		buf->bytes_consumed = 0;
-	}
-}
-
-/**
- *	relay_file_read_avail - boolean, are there unconsumed bytes available?
- */
-static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
-{
-	size_t bytes_produced, bytes_consumed, write_offset;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-	size_t produced = buf->subbufs_produced % n_subbufs;
-	size_t consumed = buf->subbufs_consumed % n_subbufs;
-
-	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
-
-	if (consumed > produced) {
-		if ((produced > n_subbufs) &&
-		    (produced + n_subbufs - consumed <= n_subbufs))
-			produced += n_subbufs;
-	} else if (consumed == produced) {
-		if (buf->offset > subbuf_size) {
-			produced += n_subbufs;
-			if (buf->subbufs_produced == buf->subbufs_consumed)
-				consumed += n_subbufs;
-		}
-	}
-
-	if (buf->offset > subbuf_size)
-		bytes_produced = (produced - 1) * subbuf_size + write_offset;
-	else
-		bytes_produced = produced * subbuf_size + write_offset;
-	bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
-
-	if (bytes_produced == bytes_consumed)
-		return 0;
-
-	relay_file_read_consume(buf, read_pos, 0);
-
-	return 1;
-}
-
-/**
- *	relay_file_read_subbuf_avail - return bytes available in sub-buffer
- */
-static size_t relay_file_read_subbuf_avail(size_t read_pos,
-					   struct rchan_buf *buf)
-{
-	size_t padding, avail = 0;
-	size_t read_subbuf, read_offset, write_subbuf, write_offset;
-	size_t subbuf_size = buf->chan->subbuf_size;
-
-	write_subbuf = (buf->data - buf->start) / subbuf_size;
-	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
-	read_subbuf = read_pos / subbuf_size;
-	read_offset = read_pos % subbuf_size;
-	padding = buf->padding[read_subbuf];
-
-	if (read_subbuf == write_subbuf) {
-		if (read_offset + padding < write_offset)
-			avail = write_offset - (read_offset + padding);
-	} else
-		avail = (subbuf_size - padding) - read_offset;
-
-	return avail;
-}
-
-/**
- *	relay_file_read_start_pos - find the first available byte to read
- *
- *	If the read_pos is in the middle of padding, return the
- *	position of the first actually available byte, otherwise
- *	return the original value.
- */
-static size_t relay_file_read_start_pos(size_t read_pos,
-					struct rchan_buf *buf)
-{
-	size_t read_subbuf, padding, padding_start, padding_end;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-
-	read_subbuf = read_pos / subbuf_size;
-	padding = buf->padding[read_subbuf];
-	padding_start = (read_subbuf + 1) * subbuf_size - padding;
-	padding_end = (read_subbuf + 1) * subbuf_size;
-	if (read_pos >= padding_start && read_pos < padding_end) {
-		read_subbuf = (read_subbuf + 1) % n_subbufs;
-		read_pos = read_subbuf * subbuf_size;
-	}
-
-	return read_pos;
-}
-
-/**
- *	relay_file_read_end_pos - return the new read position
- */
-static size_t relay_file_read_end_pos(struct rchan_buf *buf,
-				      size_t read_pos,
-				      size_t count)
-{
-	size_t read_subbuf, padding, end_pos;
-	size_t subbuf_size = buf->chan->subbuf_size;
-	size_t n_subbufs = buf->chan->n_subbufs;
-
-	read_subbuf = read_pos / subbuf_size;
-	padding = buf->padding[read_subbuf];
-	if (read_pos % subbuf_size + count + padding == subbuf_size)
-		end_pos = (read_subbuf + 1) * subbuf_size;
-	else
-		end_pos = read_pos + count;
-	if (end_pos >= subbuf_size * n_subbufs)
-		end_pos = 0;
-
-	return end_pos;
-}
-
-/**
- *	relay_file_read - read file op for relay files
- *	@filp: the file
- *	@buffer: the userspace buffer
- *	@count: number of bytes to read
- *	@ppos: position to read from
- *
- *	Reads count bytes or the number of bytes available in the
- *	current sub-buffer being read, whichever is smaller.
- */
-static ssize_t relay_file_read(struct file *filp,
-			       char __user *buffer,
-			       size_t count,
-			       loff_t *ppos)
-{
-	struct rchan_buf *buf = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
-	size_t read_start, avail;
-	ssize_t ret = 0;
-	void *from;
-
-	mutex_lock(&inode->i_mutex);
-	if(!relay_file_read_avail(buf, *ppos))
-		goto out;
-
-	read_start = relay_file_read_start_pos(*ppos, buf);
-	avail = relay_file_read_subbuf_avail(read_start, buf);
-	if (!avail)
-		goto out;
-
-	from = buf->start + read_start;
-	ret = count = min(count, avail);
-	if (copy_to_user(buffer, from, count)) {
-		ret = -EFAULT;
-		goto out;
-	}
-	relay_file_read_consume(buf, read_start, count);
-	*ppos = relay_file_read_end_pos(buf, read_start, count);
-out:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-struct file_operations relay_file_operations = {
-	.open		= relay_file_open,
-	.poll		= relay_file_poll,
-	.mmap		= relay_file_mmap,
-	.read		= relay_file_read,
-	.llseek		= no_llseek,
-	.release	= relay_file_release,
-};
-
-static struct super_operations relayfs_ops = {
-	.statfs		= simple_statfs,
-	.drop_inode	= generic_delete_inode,
-};
-
-static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
-{
-	struct inode *inode;
-	struct dentry *root;
-	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = RELAYFS_MAGIC;
-	sb->s_op = &relayfs_ops;
-	inode = relayfs_get_inode(sb, mode, NULL, NULL);
-
-	if (!inode)
-		return -ENOMEM;
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
-		return -ENOMEM;
-	}
-	sb->s_root = root;
-
-	return 0;
-}
-
-static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
-					   int flags, const char *dev_name,
-					   void *data)
-{
-	return get_sb_single(fs_type, flags, data, relayfs_fill_super);
-}
-
-static struct file_system_type relayfs_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "relayfs",
-	.get_sb		= relayfs_get_sb,
-	.kill_sb	= kill_litter_super,
-};
-
-static int __init init_relayfs_fs(void)
-{
-	return register_filesystem(&relayfs_fs_type);
-}
-
-static void __exit exit_relayfs_fs(void)
-{
-
-
-
-
-
-	unregister_filesystem(&relayfs_fs_type);
-}
-
-module_init(init_relayfs_fs)
-module_exit(exit_relayfs_fs)
-
-EXPORT_SYMBOL_GPL(relay_file_operations);
-EXPORT_SYMBOL_GPL(relayfs_create_dir);
-EXPORT_SYMBOL_GPL(relayfs_remove_dir);
-EXPORT_SYMBOL_GPL(relayfs_create_file);
-EXPORT_SYMBOL_GPL(relayfs_remove_file);
-
-MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
-MODULE_DESCRIPTION("Relay Filesystem");
-MODULE_LICENSE("GPL");
-
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
deleted file mode 100644
index abf3ceaace4..00000000000
--- a/fs/relayfs/relay.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Public API and common code for RelayFS.
- *
- * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
- *
- * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
- * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
- *
- * This file is released under the GPL.
- */
-
-#include <linux/errno.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/relayfs_fs.h>
-#include "relay.h"
-#include "buffers.h"
-
-/**
- *	relay_buf_empty - boolean, is the channel buffer empty?
- *	@buf: channel buffer
- *
- *	Returns 1 if the buffer is empty, 0 otherwise.
- */
-int relay_buf_empty(struct rchan_buf *buf)
-{
-	return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
-}
-
-/**
- *	relay_buf_full - boolean, is the channel buffer full?
- *	@buf: channel buffer
- *
- *	Returns 1 if the buffer is full, 0 otherwise.
- */
-int relay_buf_full(struct rchan_buf *buf)
-{
-	size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
-	return (ready >= buf->chan->n_subbufs) ? 1 : 0;
-}
-
-/*
- * High-level relayfs kernel API and associated functions.
- */
-
-/*
- * rchan_callback implementations defining default channel behavior.  Used
- * in place of corresponding NULL values in client callback struct.
- */
-
-/*
- * subbuf_start() default callback.  Does nothing.
- */
-static int subbuf_start_default_callback (struct rchan_buf *buf,
-					  void *subbuf,
-					  void *prev_subbuf,
-					  size_t prev_padding)
-{
-	if (relay_buf_full(buf))
-		return 0;
-
-	return 1;
-}
-
-/*
- * buf_mapped() default callback.  Does nothing.
- */
-static void buf_mapped_default_callback(struct rchan_buf *buf,
-					struct file *filp)
-{
-}
-
-/*
- * buf_unmapped() default callback.  Does nothing.
- */
-static void buf_unmapped_default_callback(struct rchan_buf *buf,
-					  struct file *filp)
-{
-}
-
-/*
- * create_buf_file_create() default callback.  Creates file to represent buf.
- */
-static struct dentry *create_buf_file_default_callback(const char *filename,
-						       struct dentry *parent,
-						       int mode,
-						       struct rchan_buf *buf,
-						       int *is_global)
-{
-	return relayfs_create_file(filename, parent, mode,
-				   &relay_file_operations, buf);
-}
-
-/*
- * remove_buf_file() default callback.  Removes file representing relay buffer.
- */
-static int remove_buf_file_default_callback(struct dentry *dentry)
-{
-	return relayfs_remove(dentry);
-}
-
-/* relay channel default callbacks */
-static struct rchan_callbacks default_channel_callbacks = {
-	.subbuf_start = subbuf_start_default_callback,
-	.buf_mapped = buf_mapped_default_callback,
-	.buf_unmapped = buf_unmapped_default_callback,
-	.create_buf_file = create_buf_file_default_callback,
-	.remove_buf_file = remove_buf_file_default_callback,
-};
-
-/**
- *	wakeup_readers - wake up readers waiting on a channel
- *	@private: the channel buffer
- *
- *	This is the work function used to defer reader waking.  The
- *	reason waking is deferred is that calling directly from write
- *	causes problems if you're writing from say the scheduler.
- */
-static void wakeup_readers(void *private)
-{
-	struct rchan_buf *buf = private;
-	wake_up_interruptible(&buf->read_wait);
-}
-
-/**
- *	__relay_reset - reset a channel buffer
- *	@buf: the channel buffer
- *	@init: 1 if this is a first-time initialization
- *
- *	See relay_reset for description of effect.
- */
-static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
-{
-	size_t i;
-
-	if (init) {
-		init_waitqueue_head(&buf->read_wait);
-		kref_init(&buf->kref);
-		INIT_WORK(&buf->wake_readers, NULL, NULL);
-	} else {
-		cancel_delayed_work(&buf->wake_readers);
-		flush_scheduled_work();
-	}
-
-	buf->subbufs_produced = 0;
-	buf->subbufs_consumed = 0;
-	buf->bytes_consumed = 0;
-	buf->finalized = 0;
-	buf->data = buf->start;
-	buf->offset = 0;
-
-	for (i = 0; i < buf->chan->n_subbufs; i++)
-		buf->padding[i] = 0;
-
-	buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
-}
-
-/**
- *	relay_reset - reset the channel
- *	@chan: the channel
- *
- *	This has the effect of erasing all data from all channel buffers
- *	and restarting the channel in its initial state.  The buffers
- *	are not freed, so any mappings are still in effect.
- *
- *	NOTE: Care should be taken that the channel isn't actually
- *	being used by anything when this call is made.
- */
-void relay_reset(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		__relay_reset(chan->buf[i], 0);
-		prev = chan->buf[i];
-	}
-}
-
-/**
- *	relay_open_buf - create a new channel buffer in relayfs
- *
- *	Internal - used by relay_open().
- */
-static struct rchan_buf *relay_open_buf(struct rchan *chan,
-					const char *filename,
-					struct dentry *parent,
-					int *is_global)
-{
-	struct rchan_buf *buf;
-	struct dentry *dentry;
-
-	if (*is_global)
-		return chan->buf[0];
-
- 	buf = relay_create_buf(chan);
- 	if (!buf)
- 		return NULL;
-
-	/* Create file in fs */
- 	dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
- 					   buf, is_global);
- 	if (!dentry) {
- 		relay_destroy_buf(buf);
-		return NULL;
- 	}
-
-	buf->dentry = dentry;
-	__relay_reset(buf, 1);
-
-	return buf;
-}
-
-/**
- *	relay_close_buf - close a channel buffer
- *	@buf: channel buffer
- *
- *	Marks the buffer finalized and restores the default callbacks.
- *	The channel buffer and channel buffer data structure are then freed
- *	automatically when the last reference is given up.
- */
-static inline void relay_close_buf(struct rchan_buf *buf)
-{
-	buf->finalized = 1;
-	buf->chan->cb = &default_channel_callbacks;
-	cancel_delayed_work(&buf->wake_readers);
-	flush_scheduled_work();
-	kref_put(&buf->kref, relay_remove_buf);
-}
-
-static inline void setup_callbacks(struct rchan *chan,
-				   struct rchan_callbacks *cb)
-{
-	if (!cb) {
-		chan->cb = &default_channel_callbacks;
-		return;
-	}
-
-	if (!cb->subbuf_start)
-		cb->subbuf_start = subbuf_start_default_callback;
-	if (!cb->buf_mapped)
-		cb->buf_mapped = buf_mapped_default_callback;
-	if (!cb->buf_unmapped)
-		cb->buf_unmapped = buf_unmapped_default_callback;
-	if (!cb->create_buf_file)
-		cb->create_buf_file = create_buf_file_default_callback;
-	if (!cb->remove_buf_file)
-		cb->remove_buf_file = remove_buf_file_default_callback;
-	chan->cb = cb;
-}
-
-/**
- *	relay_open - create a new relayfs channel
- *	@base_filename: base name of files to create
- *	@parent: dentry of parent directory, NULL for root directory
- *	@subbuf_size: size of sub-buffers
- *	@n_subbufs: number of sub-buffers
- *	@cb: client callback functions
- *
- *	Returns channel pointer if successful, NULL otherwise.
- *
- *	Creates a channel buffer for each cpu using the sizes and
- *	attributes specified.  The created channel buffer files
- *	will be named base_filename0...base_filenameN-1.  File
- *	permissions will be S_IRUSR.
- */
-struct rchan *relay_open(const char *base_filename,
-			 struct dentry *parent,
-			 size_t subbuf_size,
-			 size_t n_subbufs,
-			 struct rchan_callbacks *cb)
-{
-	unsigned int i;
-	struct rchan *chan;
-	char *tmpname;
-	int is_global = 0;
-
-	if (!base_filename)
-		return NULL;
-
-	if (!(subbuf_size && n_subbufs))
-		return NULL;
-
-	chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
-	if (!chan)
-		return NULL;
-
-	chan->version = RELAYFS_CHANNEL_VERSION;
-	chan->n_subbufs = n_subbufs;
-	chan->subbuf_size = subbuf_size;
-	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
-	setup_callbacks(chan, cb);
-	kref_init(&chan->kref);
-
-	tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-	if (!tmpname)
-		goto free_chan;
-
-	for_each_online_cpu(i) {
-		sprintf(tmpname, "%s%d", base_filename, i);
-		chan->buf[i] = relay_open_buf(chan, tmpname, parent,
-					      &is_global);
-		chan->buf[i]->cpu = i;
-		if (!chan->buf[i])
-			goto free_bufs;
-	}
-
-	kfree(tmpname);
-	return chan;
-
-free_bufs:
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i])
-			break;
-		relay_close_buf(chan->buf[i]);
-		if (is_global)
-			break;
-	}
-	kfree(tmpname);
-
-free_chan:
-	kref_put(&chan->kref, relay_destroy_channel);
-	return NULL;
-}
-
-/**
- *	relay_switch_subbuf - switch to a new sub-buffer
- *	@buf: channel buffer
- *	@length: size of current event
- *
- *	Returns either the length passed in or 0 if full.
-
- *	Performs sub-buffer-switch tasks such as invoking callbacks,
- *	updating padding counts, waking up readers, etc.
- */
-size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
-{
-	void *old, *new;
-	size_t old_subbuf, new_subbuf;
-
-	if (unlikely(length > buf->chan->subbuf_size))
-		goto toobig;
-
-	if (buf->offset != buf->chan->subbuf_size + 1) {
-		buf->prev_padding = buf->chan->subbuf_size - buf->offset;
-		old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
-		buf->padding[old_subbuf] = buf->prev_padding;
-		buf->subbufs_produced++;
-		if (waitqueue_active(&buf->read_wait)) {
-			PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
-			schedule_delayed_work(&buf->wake_readers, 1);
-		}
-	}
-
-	old = buf->data;
-	new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
-	new = buf->start + new_subbuf * buf->chan->subbuf_size;
-	buf->offset = 0;
-	if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
-		buf->offset = buf->chan->subbuf_size + 1;
-		return 0;
-	}
-	buf->data = new;
-	buf->padding[new_subbuf] = 0;
-
-	if (unlikely(length + buf->offset > buf->chan->subbuf_size))
-		goto toobig;
-
-	return length;
-
-toobig:
-	buf->chan->last_toobig = length;
-	return 0;
-}
-
-/**
- *	relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
- *	@chan: the channel
- *	@cpu: the cpu associated with the channel buffer to update
- *	@subbufs_consumed: number of sub-buffers to add to current buf's count
- *
- *	Adds to the channel buffer's consumed sub-buffer count.
- *	subbufs_consumed should be the number of sub-buffers newly consumed,
- *	not the total consumed.
- *
- *	NOTE: kernel clients don't need to call this function if the channel
- *	mode is 'overwrite'.
- */
-void relay_subbufs_consumed(struct rchan *chan,
-			    unsigned int cpu,
-			    size_t subbufs_consumed)
-{
-	struct rchan_buf *buf;
-
-	if (!chan)
-		return;
-
-	if (cpu >= NR_CPUS || !chan->buf[cpu])
-		return;
-
-	buf = chan->buf[cpu];
-	buf->subbufs_consumed += subbufs_consumed;
-	if (buf->subbufs_consumed > buf->subbufs_produced)
-		buf->subbufs_consumed = buf->subbufs_produced;
-}
-
-/**
- *	relay_destroy_channel - free the channel struct
- *
- *	Should only be called from kref_put().
- */
-void relay_destroy_channel(struct kref *kref)
-{
-	struct rchan *chan = container_of(kref, struct rchan, kref);
-	kfree(chan);
-}
-
-/**
- *	relay_close - close the channel
- *	@chan: the channel
- *
- *	Closes all channel buffers and frees the channel.
- */
-void relay_close(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_close_buf(chan->buf[i]);
-		prev = chan->buf[i];
-	}
-
-	if (chan->last_toobig)
-		printk(KERN_WARNING "relayfs: one or more items not logged "
-		       "[item size (%Zd) > sub-buffer size (%Zd)]\n",
-		       chan->last_toobig, chan->subbuf_size);
-
-	kref_put(&chan->kref, relay_destroy_channel);
-}
-
-/**
- *	relay_flush - close the channel
- *	@chan: the channel
- *
- *	Flushes all channel buffers i.e. forces buffer switch.
- */
-void relay_flush(struct rchan *chan)
-{
-	unsigned int i;
-	struct rchan_buf *prev = NULL;
-
-	if (!chan)
-		return;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_switch_subbuf(chan->buf[i], 0);
-		prev = chan->buf[i];
-	}
-}
-
-EXPORT_SYMBOL_GPL(relay_open);
-EXPORT_SYMBOL_GPL(relay_close);
-EXPORT_SYMBOL_GPL(relay_flush);
-EXPORT_SYMBOL_GPL(relay_reset);
-EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
-EXPORT_SYMBOL_GPL(relay_switch_subbuf);
-EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
deleted file mode 100644
index 0993d3e5753..00000000000
--- a/fs/relayfs/relay.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _RELAY_H
-#define _RELAY_H
-
-extern int relayfs_remove(struct dentry *dentry);
-extern int relay_buf_empty(struct rchan_buf *buf);
-extern void relay_destroy_channel(struct kref *kref);
-
-#endif /* _RELAY_H */
-- 
cgit v1.2.3


From 2056a782f8e7e65fd4bfd027506b4ce1c5e9ccd4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 23 Mar 2006 20:00:26 +0100
Subject: [PATCH] Block queue IO tracing support (blktrace) as of 2006-03-23

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 fs/bio.c          | 4 ++++
 fs/compat_ioctl.c | 1 +
 2 files changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 8f1d2e815c9..0a8c59cb68f 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
+#include <linux/blktrace_api.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
 #define BIO_POOL_SIZE 256
@@ -1095,6 +1096,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	if (!bp)
 		return bp;
 
+	blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
+				bi->bi_sector + first_sectors);
+
 	BUG_ON(bi->bi_vcnt != 1);
 	BUG_ON(bi->bi_idx != 0);
 	atomic_set(&bp->cnt, 3);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c666769a875..7c031f00fd7 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -72,6 +72,7 @@
 #include <linux/i2c-dev.h>
 #include <linux/wireless.h>
 #include <linux/atalk.h>
+#include <linux/blktrace_api.h>
 
 #include <net/sock.h>          /* siocdevprivate_ioctl */
 #include <net/bluetooth/bluetooth.h>
-- 
cgit v1.2.3


From 9b04c997b1120feefa1e6ee8e2902270bc055cd2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 24 Mar 2006 03:15:10 -0800
Subject: [PATCH] vfs: MS_VERBOSE should be MS_SILENT

The meaning of MS_VERBOSE is backwards; if the bit is set, it really means,
"don't be verbose".  This is confusing and counter-intuitive.

In addition, there is also no way to set the MS_VERBOSE flag in the
mount(8) program in util-linux, but interesting, it does define options
which would do the right thing if MS_SILENT were defined, which
unfortunately we do not:

#ifdef MS_SILENT
  { "quiet",    0, 0, MS_SILENT    },   /* be quiet  */
  { "loud",     0, 1, MS_SILENT    },   /* print out messages. */
#endif

So the obvious fix is to deprecate the use of MS_VERBOSE and replace it
with MS_SILENT.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/super.c   | 2 +-
 fs/cifs/cifsfs.c | 2 +-
 fs/jffs2/super.c | 4 ++--
 fs/nfs/inode.c   | 4 ++--
 fs/super.c       | 6 +++---
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/super.c b/fs/afs/super.c
index d6fa8e5999d..53c56e7231a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -341,7 +341,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
 
 	sb->s_flags = flags;
 
-	ret = afs_fill_super(sb, &params, flags & MS_VERBOSE ? 1 : 0);
+	ret = afs_fill_super(sb, &params, flags & MS_SILENT ? 1 : 0);
 	if (ret < 0) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 79eeccd0437..1cd044ce82a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -479,7 +479,7 @@ cifs_get_sb(struct file_system_type *fs_type,
 
 	sb->s_flags = flags;
 
-	rc = cifs_read_super(sb, data, dev_name, flags & MS_VERBOSE ? 1 : 0);
+	rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
 	if (rc) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 93883817cbd..c8fac352a4c 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -152,7 +152,7 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
 	sb->s_op = &jffs2_super_operations;
 	sb->s_flags = flags | MS_NOATIME;
 
-	ret = jffs2_do_fill_super(sb, data, (flags&MS_VERBOSE)?1:0);
+	ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 
 	if (ret) {
 		/* Failure case... */
@@ -257,7 +257,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 	}
 
 	if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) {
-		if (!(flags & MS_VERBOSE)) /* Yes I mean this. Strangely */
+		if (!(flags & MS_SILENT))
 			printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n",
 			       dev_name);
 		goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95b7ef..37e55c328eb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1679,7 +1679,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
@@ -1996,7 +1996,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
diff --git a/fs/super.c b/fs/super.c
index 425861cb1ca..37554b87618 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -712,7 +712,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
@@ -756,7 +756,7 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
@@ -785,7 +785,7 @@ struct super_block *get_sb_single(struct file_system_type *fs_type,
 		return s;
 	if (!s->s_root) {
 		s->s_flags = flags;
-		error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-- 
cgit v1.2.3


From ef1597d527f119f14f093a7a3712221b9cb072c0 Mon Sep 17 00:00:00 2001
From: Horst Hummel <horst.hummel@de.ibm.com>
Date: Fri, 24 Mar 2006 03:15:23 -0800
Subject: [PATCH] s390: Remove old history/whitespave from partition code

Remove obsolete history and trailing whitespace.

Signed-off-by: Horst Hummel <horst.hummel@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/ibm.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 1e4a93835fe..830c55d86ab 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -1,15 +1,9 @@
 /*
- * File...........: linux/fs/partitions/ibm.c      
+ * File...........: linux/fs/partitions/ibm.c
  * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
  *                  Volker Sameske <sameske@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
-
- * History of changes (starts July 2000)
- * 07/10/00 Fixed detection of CMS formatted disks     
- * 02/13/00 VTOC partition support added
- * 12/27/01 fixed PL030593 (CMS reserved minidisk not detected on 64 bit)
- * 07/24/03 no longer using contents of freed page for CMS label recognition (BZ3611)
  */
 
 #include <linux/config.h>
@@ -25,7 +19,7 @@
 #include "ibm.h"
 
 /*
- * compute the block number from a 
+ * compute the block number from a
  * cyl-cyl-head-head structure
  */
 static inline int
@@ -34,9 +28,8 @@ cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
 	       ptr->hh * geo->sectors;
 }
 
-
 /*
- * compute the block number from a 
+ * compute the block number from a
  * cyl-cyl-head-head-block structure
  */
 static inline int
@@ -48,7 +41,7 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
 
 /*
  */
-int 
+int
 ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
 	int blocksize, offset, size;
@@ -77,7 +70,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		goto out_nogeo;
 	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
 		goto out_nolab;
-	
+
 	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
 		goto out_noioctl;
@@ -154,13 +147,13 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 			/* OK, we got valid partition data */
 		        offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-			size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) - 
+			size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
 				offset + geo->sectors;
 			if (counter >= state->limit)
 				break;
-			put_partition(state, counter + 1, 
-					 offset * (blocksize >> 9),
-					 size * (blocksize >> 9));
+			put_partition(state, counter + 1,
+				      offset * (blocksize >> 9),
+				      size * (blocksize >> 9));
 			counter++;
 			blk++;
 		}
@@ -175,7 +168,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 		offset = (info->label_block + 1);
 		size = i_size >> 9;
 		put_partition(state, 1, offset*(blocksize >> 9),
-				 size-offset*(blocksize >> 9));
+			      size-offset*(blocksize >> 9));
 	}
 
 	printk("\n");
@@ -183,7 +176,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	kfree(geo);
 	kfree(info);
 	return 1;
-	
+
 out_readerr:
 out_noioctl:
 	kfree(label);
-- 
cgit v1.2.3


From e8c96f8c29d89af0c13dc2819a9a00575846ca18 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@nuerscht.ch>
Date: Fri, 24 Mar 2006 03:15:34 -0800
Subject: [PATCH] fs: Use ARRAY_SIZE macro

Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0]) and remove a
duplicate of ARRAY_SIZE.  Some trailing whitespaces are also deleted.

Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Nathan Scott <nathans@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/cmservice.c           |  2 +-
 fs/compat_ioctl.c            |  5 ++---
 fs/jfs/jfs_debug.c           |  2 +-
 fs/lockd/mon.c               |  6 +++---
 fs/lockd/svc.c               |  2 +-
 fs/lockd/xdr.c               |  2 +-
 fs/nfs/inode.c               |  4 ++--
 fs/nfs/mount_clnt.c          |  2 +-
 fs/nfs/nfs2xdr.c             |  2 +-
 fs/nfs/nfs3xdr.c             |  2 +-
 fs/nfs/nfs4xdr.c             |  2 +-
 fs/nfsctl.c                  |  2 +-
 fs/nfsd/nfs4acl.c            |  4 ++--
 fs/nfsd/nfs4callback.c       |  4 ++--
 fs/nfsd/nfs4xdr.c            |  2 +-
 fs/nfsd/nfsctl.c             |  2 +-
 fs/nfsd/nfssvc.c             |  4 ++--
 fs/nls/nls_euc-jp.c          |  6 ++----
 fs/reiserfs/prints.c         |  9 +++------
 fs/sysv/super.c              |  4 ++--
 fs/udf/super.c               | 19 +++++++------------
 fs/xfs/linux-2.6/xfs_stats.c |  2 +-
 fs/xfs/xfs_vnodeops.c        |  2 +-
 23 files changed, 40 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 9eef6bf156a..3d097fddcb7 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -94,7 +94,7 @@ static struct rxrpc_service AFSCM_service = {
 	.error_func	= afscm_error,
 	.aemap_func	= afscm_aemap,
 	.ops_begin	= &AFSCM_ops[0],
-	.ops_end	= &AFSCM_ops[sizeof(AFSCM_ops) / sizeof(AFSCM_ops[0])],
+	.ops_end	= &AFSCM_ops[ARRAY_SIZE(AFSCM_ops)],
 };
 
 static DECLARE_COMPLETION(kafscmd_alive);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7c031f00fd7..d2c38875ab2 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1522,8 +1522,7 @@ static struct {
 	{ ATM_QUERYLOOP32,   ATM_QUERYLOOP }
 };
 
-#define NR_ATM_IOCTL (sizeof(atm_ioctl_map)/sizeof(atm_ioctl_map[0]))
-
+#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
 
 static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
@@ -1824,7 +1823,7 @@ static struct {
 	{ FDWERRORGET32, FDWERRORGET }
 };
 
-#define NR_FD_IOCTL_TRANS (sizeof(fd_ioctl_trans_table)/sizeof(fd_ioctl_trans_table[0]))
+#define NR_FD_IOCTL_TRANS ARRAY_SIZE(fd_ioctl_trans_table)
 
 static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 4caea6b43b9..81f0e514c49 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -111,7 +111,7 @@ static struct {
 	{ "loglevel",	loglevel_read, loglevel_write }
 #endif
 };
-#define NPROCENT	(sizeof(Entries)/sizeof(Entries[0]))
+#define NPROCENT	ARRAY_SIZE(Entries)
 
 void jfs_proc_init(void)
 {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0edc03e6796..a89cb8aa2c8 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -224,8 +224,8 @@ static struct rpc_procinfo	nsm_procedures[] = {
 };
 
 static struct rpc_version	nsm_version1 = {
-		.number		= 1, 
-		.nrprocs	= sizeof(nsm_procedures)/sizeof(nsm_procedures[0]),
+		.number		= 1,
+		.nrprocs	= ARRAY_SIZE(nsm_procedures),
 		.procs		= nsm_procedures
 };
 
@@ -238,7 +238,7 @@ static struct rpc_stat		nsm_stats;
 static struct rpc_program	nsm_program = {
 		.name		= "statd",
 		.number		= SM_PROGRAM,
-		.nrvers		= sizeof(nsm_version)/sizeof(nsm_version[0]),
+		.nrvers		= ARRAY_SIZE(nsm_version),
 		.version	= nsm_version,
 		.stats		= &nsm_stats
 };
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 71a30b416d1..5e85bde6c12 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -509,7 +509,7 @@ static struct svc_version *	nlmsvc_version[] = {
 
 static struct svc_stat		nlmsvc_stats;
 
-#define NLM_NRVERS	(sizeof(nlmsvc_version)/sizeof(nlmsvc_version[0]))
+#define NLM_NRVERS	ARRAY_SIZE(nlmsvc_version)
 static struct svc_program	nlmsvc_program = {
 	.pg_prog		= NLM_PROGRAM,		/* program number */
 	.pg_nvers		= NLM_NRVERS,		/* number of entries in nlmsvc_version */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 200fbda2c6d..1d700a4dd0b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -599,7 +599,7 @@ static struct rpc_stat		nlm_stats;
 struct rpc_program		nlm_program = {
 		.name		= "lockd",
 		.number		= NLM_PROGRAM,
-		.nrvers		= sizeof(nlm_versions) / sizeof(nlm_versions[0]),
+		.nrvers		= ARRAY_SIZE(nlm_versions),
 		.version	= nlm_versions,
 		.stats		= &nlm_stats,
 };
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 37e55c328eb..419d1d254f9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -103,7 +103,7 @@ static struct rpc_version *	nfs_version[] = {
 static struct rpc_program	nfs_program = {
 	.name			= "nfs",
 	.number			= NFS_PROGRAM,
-	.nrvers			= sizeof(nfs_version) / sizeof(nfs_version[0]),
+	.nrvers			= ARRAY_SIZE(nfs_version),
 	.version		= nfs_version,
 	.stats			= &nfs_rpcstat,
 	.pipe_dir_name		= "/nfs",
@@ -118,7 +118,7 @@ static struct rpc_version *	nfsacl_version[] = {
 struct rpc_program		nfsacl_program = {
 	.name =			"nfsacl",
 	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+	.nrvers =		ARRAY_SIZE(nfsacl_version),
 	.version =		nfsacl_version,
 	.stats =		&nfsacl_rpcstat,
 };
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f678f..0b9a78353d6 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -174,7 +174,7 @@ static struct rpc_stat		mnt_stats;
 static struct rpc_program	mnt_program = {
 	.name		= "mount",
 	.number		= NFS_MNT_PROGRAM,
-	.nrvers		= sizeof(mnt_version)/sizeof(mnt_version[0]),
+	.nrvers		= ARRAY_SIZE(mnt_version),
 	.version	= mnt_version,
 	.stats		= &mnt_stats,
 };
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560c89c..6548a65de94 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -704,6 +704,6 @@ struct rpc_procinfo	nfs_procedures[] = {
 
 struct rpc_version		nfs_version2 = {
 	.number			= 2,
-	.nrprocs		= sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs_procedures),
 	.procs			= nfs_procedures
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b5012bc..5224a191efb 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1138,7 +1138,7 @@ struct rpc_procinfo	nfs3_procedures[] = {
 
 struct rpc_version		nfs_version3 = {
 	.number			= 3,
-	.nrprocs		= sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
 	.procs			= nfs3_procedures
 };
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef5778..0a1bd36a483 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4384,7 +4384,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 
 struct rpc_version		nfs_version4 = {
 	.number			= 4,
-	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
 	.procs			= nfs4_procedures
 };
 
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 1c72c7f85dd..a5a18d4aca4 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -101,7 +101,7 @@ asmlinkage sys_nfsservctl(int cmd, struct nfsctl_arg __user *arg, void __user *r
 	if (version != NFSCTL_VERSION)
 		return -EINVAL;
 
-	if (cmd < 0 || cmd >= sizeof(map)/sizeof(map[0]) || !map[cmd].name)
+	if (cmd < 0 || cmd >= ARRAY_SIZE(map) || !map[cmd].name)
 		return -EINVAL;
 
 	file = do_open(map[cmd].name, map[cmd].rsize ? O_RDWR : O_WRONLY);	
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 4a2105552ac..7391f4aabed 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -907,7 +907,7 @@ nfs4_acl_get_whotype(char *p, u32 len)
 {
 	int i;
 
-	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
 		if (s2t_map[i].stringlen == len &&
 				0 == memcmp(s2t_map[i].string, p, len))
 			return s2t_map[i].type;
@@ -920,7 +920,7 @@ nfs4_acl_write_who(int who, char *p)
 {
 	int i;
 
-	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
 		if (s2t_map[i].type == who) {
 			memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
 			return s2t_map[i].stringlen;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d828662d737..8d3d23c8a4d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -335,7 +335,7 @@ static struct rpc_procinfo     nfs4_cb_procedures[] = {
 
 static struct rpc_version       nfs_cb_version4 = {
         .number                 = 1,
-        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
+        .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
         .procs                  = nfs4_cb_procedures
 };
 
@@ -411,7 +411,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
-	program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
+	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
 	program->stats = stat;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 69d3501173a..03857fd8112 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -992,7 +992,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (argp->opcnt > 100)
 		goto xdr_error;
 
-	if (argp->opcnt > sizeof(argp->iops)/sizeof(argp->iops[0])) {
+	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
 		argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
 		if (!argp->ops) {
 			argp->ops = argp->iops;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index a0871b3efeb..c8960aff096 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -105,7 +105,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
 	char *data;
 	ssize_t rv;
 
-	if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino])
+	if (ino >= ARRAY_SIZE(write_op) || !write_op[ino])
 		return -EINVAL;
 
 	data = simple_transaction_get(file, buf, size);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1d163b61691..3790727e5df 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -72,7 +72,7 @@ static struct svc_version *	nfsd_acl_version[] = {
 };
 
 #define NFSD_ACL_MINVERS            2
-#define NFSD_ACL_NRVERS		(sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
+#define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
 static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
@@ -101,7 +101,7 @@ static struct svc_version *	nfsd_version[] = {
 };
 
 #define NFSD_MINVERS    	2
-#define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
+#define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
 static struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c
index 80f108ae666..06640c3e402 100644
--- a/fs/nls/nls_euc-jp.c
+++ b/fs/nls/nls_euc-jp.c
@@ -268,8 +268,6 @@ static unsigned char euc2sjisibm_g3upper_map[][2] = {
 	{0xFC, 0x4B},
 };
 
-#define MAP_ELEMENT_OF(map)	(sizeof(map) / sizeof(map[0]))
-
 static inline int sjisibm2euc(unsigned char *euc, const unsigned char sjis_hi,
 			      const unsigned char sjis_lo);
 static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char euc_hi,
@@ -310,7 +308,7 @@ static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char
 	unsigned short euc;
 
 	min_index = 0;
-	max_index = MAP_ELEMENT_OF(euc2sjisibm_jisx0212_map) - 1;
+	max_index = ARRAY_SIZE(euc2sjisibm_jisx0212_map) - 1;
 	euc = (euc_hi << 8) | euc_lo;
 
 	while (min_index <= max_index) {
@@ -339,7 +337,7 @@ static inline int euc2sjisibm_g3upper(unsigned char *sjis, const unsigned char e
 	else
 		index = ((euc_hi << 8) | euc_lo) - 0xF4A1 + 12;
 
-	if ((index < 0) || (index >= MAP_ELEMENT_OF(euc2sjisibm_g3upper_map)))
+	if ((index < 0) || (index >= ARRAY_SIZE(euc2sjisibm_g3upper_map)))
 		return 0;
 
 	sjis[0] = euc2sjisibm_g3upper_map[index][0];
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index d55e164bd5c..78b40621b88 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -601,8 +601,7 @@ void store_print_tb(struct tree_balance *tb)
 		tb->tb_mode, PATH_LAST_POSITION(tb->tb_path),
 		tb->tb_path->pos_in_item);
 
-	for (h = 0; h < sizeof(tb->insert_size) / sizeof(tb->insert_size[0]);
-	     h++) {
+	for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) {
 		if (PATH_H_PATH_OFFSET(tb->tb_path, h) <=
 		    tb->tb_path->path_length
 		    && PATH_H_PATH_OFFSET(tb->tb_path,
@@ -658,15 +657,13 @@ void store_print_tb(struct tree_balance *tb)
 
 	/* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
 	h = 0;
-	for (i = 0; i < sizeof(tb->FEB) / sizeof(tb->FEB[0]); i++)
+	for (i = 0; i < ARRAY_SIZE(tb->FEB); i++)
 		sprintf(print_tb_buf + strlen(print_tb_buf),
 			"%p (%llu %d)%s", tb->FEB[i],
 			tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
 			b_blocknr : 0ULL,
 			tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
-			(i ==
-			 sizeof(tb->FEB) / sizeof(tb->FEB[0]) -
-			 1) ? "\n" : ", ");
+			(i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", ");
 
 	sprintf(print_tb_buf + strlen(print_tb_buf),
 		"======================== the end ====================================\n");
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 59e76b51142..e92b991e6dd 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -377,10 +377,10 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sb = sb;
 	sbi->s_block_base = 0;
 	sb->s_fs_info = sbi;
-	
+
 	sb_set_blocksize(sb, BLOCK_SIZE);
 
-	for (i = 0; i < sizeof(flavours)/sizeof(flavours[0]) && !size; i++) {
+	for (i = 0; i < ARRAY_SIZE(flavours) && !size; i++) {
 		brelse(bh);
 		bh = sb_bread(sb, flavours[i].block);
 		if (!bh)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9303c50c5d5..0d555616c93 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -660,8 +660,7 @@ udf_find_anchor(struct super_block *sb)
 		 *     lastblock
 		 *  however, if the disc isn't closed, it could be 512 */
 
-		for (i=0; (!lastblock && i<sizeof(last)/sizeof(int)); i++)
-		{
+		for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) {
 			if (last[i] < 0 || !(bh = sb_bread(sb, last[i])))
 			{
 				ident = location = 0;
@@ -672,7 +671,7 @@ udf_find_anchor(struct super_block *sb)
 				location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
 				udf_release_data(bh);
 			}
-	
+
 			if (ident == TAG_IDENT_AVDP)
 			{
 				if (location == last[i] - UDF_SB_SESSION(sb))
@@ -753,8 +752,7 @@ udf_find_anchor(struct super_block *sb)
 		}
 	}
 
-	for (i=0; i<sizeof(UDF_SB_ANCHOR(sb))/sizeof(int); i++)
-	{
+	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
 		if (UDF_SB_ANCHOR(sb)[i])
 		{
 			if (!(bh = udf_read_tagged(sb,
@@ -1313,8 +1311,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 	if (!sb)
 		return 1;
 
-	for (i=0; i<sizeof(UDF_SB_ANCHOR(sb))/sizeof(int); i++)
-	{
+	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
 		if (UDF_SB_ANCHOR(sb)[i] && (bh = udf_read_tagged(sb,
 			UDF_SB_ANCHOR(sb)[i], UDF_SB_ANCHOR(sb)[i], &ident)))
 		{
@@ -1325,7 +1322,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 			main_e = le32_to_cpu( anchor->mainVolDescSeqExt.extLength );
 			main_e = main_e >> sb->s_blocksize_bits;
 			main_e += main_s;
-	
+
 			/* Locate the reserve sequence */
 			reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation);
 			reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength);
@@ -1344,12 +1341,10 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 		}
 	}
 
-	if (i == sizeof(UDF_SB_ANCHOR(sb))/sizeof(int))
-	{
+	if (i == ARRAY_SIZE(UDF_SB_ANCHOR(sb))) {
 		udf_debug("No Anchor block found\n");
 		return 1;
-	}
-	else
+	} else
 		udf_debug("Using anchor in block %d\n", UDF_SB_ANCHOR(sb)[i]);
 
 	for (i=0; i<UDF_SB_NUMPARTS(sb); i++)
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 713e6a7505d..1f0589a05ec 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -56,7 +56,7 @@ xfs_read_xfsstats(
 	};
 
 	/* Loop over all stats groups */
-	for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+	for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
 		len += sprintf(buffer + len, xstats[i].desc);
 		/* inner loop does each group */
 		while (j < xstats[i].endpoint) {
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index a478f42e63f..0f0a64e81db 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1335,7 +1335,7 @@ xfs_inactive_symlink_rmt(
 	 */
 	done = 0;
 	XFS_BMAP_INIT(&free_list, &first_block);
-	nmaps = sizeof(mval) / sizeof(mval[0]);
+	nmaps = ARRAY_SIZE(mval);
 	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
 			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
 			&free_list)))
-- 
cgit v1.2.3


From 29c6e48601065908e7933c0d8440f383fffe155b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:52 -0800
Subject: [PATCH] fs/9p/: possible cleanups

- mux.c: v9fs_poll_mux() was inline but not static resuling in needless
         object size bloat
- mux.c: remove all "inline"s: gcc should know best what to inline
- #if 0 the following unused global functions:
  - 9p.c: v9fs_v9fs_t_flush()
  - conv.c: v9fs_create_tauth()
  - mux.c: v9fs_mux_rpcnb()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c   |  2 ++
 fs/9p/9p.h   |  2 --
 fs/9p/conv.c |  2 ++
 fs/9p/conv.h |  1 -
 fs/9p/mux.c  | 11 ++++++-----
 fs/9p/mux.h  |  2 --
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index f86a28d1d6a..c148e6ba07e 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -148,6 +148,7 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 	return ret;
 }
 
+#if 0
 /**
  * v9fs_v9fs_t_flush - flush a pending transaction
  * @v9ses: 9P2000 session information
@@ -171,6 +172,7 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 
 	return ret;
 }
+#endif  /*  0  */
 
 /**
  * v9fs_t_stat - read a file's meta-data
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 0cd374d9471..95d72aec1c1 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -348,8 +348,6 @@ int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
 
 int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
 
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
-
 int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
 		struct v9fs_fcall **rcall);
 
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index bf1f1006796..bba81714246 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -536,6 +536,7 @@ struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
 	return fc;
 }
 
+#if 0
 struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
 {
 	int size;
@@ -559,6 +560,7 @@ struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
       error:
 	return fc;
 }
+#endif  /*  0  */
 
 struct v9fs_fcall *
 v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index 26a736e4a2e..f5896628dae 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -33,7 +33,6 @@ int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
 void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
 
 struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
-struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
 struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
 	char *aname);
 struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8e8356c1c22..e2ae60adda9 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -143,7 +143,7 @@ void v9fs_mux_global_exit(void)
  *
  * The current implementation returns sqrt of the number of mounts.
  */
-inline int v9fs_mux_calc_poll_procs(int muxnum)
+static int v9fs_mux_calc_poll_procs(int muxnum)
 {
 	int n;
 
@@ -384,7 +384,7 @@ v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 /**
  * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
  */
-static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
+static void v9fs_poll_mux(struct v9fs_mux_data *m)
 {
 	int n;
 
@@ -762,9 +762,8 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 	return req;
 }
 
-static inline void
-v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
-		  int err)
+static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc,
+			      struct v9fs_fcall *rc, int err)
 {
 	v9fs_mux_req_callback cb;
 	int tag;
@@ -902,6 +901,7 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	return err;
 }
 
+#if 0
 /**
  * v9fs_mux_rpcnb - sends 9P request without waiting for response.
  * @m: mux data
@@ -925,6 +925,7 @@ int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 	dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
 	return 0;
 }
+#endif  /*  0  */
 
 /**
  * v9fs_mux_cancel - cancel all pending requests with error
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 9473b84f24b..17144fdfa11 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -50,8 +50,6 @@ void v9fs_mux_destroy(struct v9fs_mux_data *);
 int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
 struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
 int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
-int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-	v9fs_mux_req_callback cb, void *a);
 
 void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
 void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
-- 
cgit v1.2.3


From 2c2212901f8b3fc84f36cb98150cfc2f6b4752f8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:53 -0800
Subject: [PATCH] fs/ext2/: proper ext2_get_parent() prototype

Add a proper prototype for ext2_get_parent().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/ext2.h  | 3 +++
 fs/ext2/super.c | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 00de0a7312a..11035ac7986 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -138,6 +138,9 @@ extern void ext2_set_inode_flags(struct inode *inode);
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
 		       unsigned long);
 
+/* namei.c */
+struct dentry *ext2_get_parent(struct dentry *child);
+
 /* super.c */
 extern void ext2_error (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index cb6f9bd658d..7f3899bdeba 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -258,7 +258,6 @@ static struct super_operations ext2_sops = {
  * systems, but can be improved upon.
  * Currently only get_parent is required.
  */
-struct dentry *ext2_get_parent(struct dentry *child);
 static struct export_operations ext2_export_ops = {
 	.get_parent = ext2_get_parent,
 };
-- 
cgit v1.2.3


From c98d8cfbc600af88e9e6cffc84dd342280445760 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 24 Mar 2006 03:15:53 -0800
Subject: [PATCH] fs/coda/: proper prototypes

Introduce a file fs/coda/coda_int.h with proper prototypes for some code.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/coda/coda_int.h | 13 +++++++++++++
 fs/coda/dir.c      |  3 ++-
 fs/coda/file.c     |  2 ++
 fs/coda/inode.c    |  2 ++
 fs/coda/psdev.c    |  9 ++-------
 5 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 fs/coda/coda_int.h

(limited to 'fs')

diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
new file mode 100644
index 00000000000..9e6338fea51
--- /dev/null
+++ b/fs/coda/coda_int.h
@@ -0,0 +1,13 @@
+#ifndef _CODA_INT_
+#define _CODA_INT_
+
+extern struct file_system_type coda_fs_type;
+
+void coda_destroy_inodecache(void);
+int coda_init_inodecache(void);
+int coda_fsync(struct file *coda_file, struct dentry *coda_dentry,
+	       int datasync);
+
+#endif  /*  _CODA_INT_  */
+
+
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 8f1a517f8b4..54f76de8a68 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -27,6 +27,8 @@
 #include <linux/coda_cache.h>
 #include <linux/coda_proc.h>
 
+#include "coda_int.h"
+
 /* dir inode-ops */
 static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd);
 static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
@@ -50,7 +52,6 @@ static int coda_dentry_delete(struct dentry *);
 /* support routines */
 static int coda_venus_readdir(struct file *filp, filldir_t filldir,
 			      void *dirent, struct dentry *dir);
-int coda_fsync(struct file *, struct dentry *dentry, int datasync);
 
 /* same as fs/bad_inode.c */
 static int coda_return_EIO(void)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 30b4630bd73..146a991d6eb 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -24,6 +24,8 @@
 #include <linux/coda_psdev.h>
 #include <linux/coda_proc.h>
 
+#include "coda_int.h"
+
 /* if CODA_STORE fails with EOPNOTSUPP, venus clearly doesn't support
  * CODA_STORE/CODA_RELEASE and we fall back on using the CODA_CLOSE upcall */
 static int use_coda_close;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 04a73fb4848..7d7d52f74b2 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -31,6 +31,8 @@
 #include <linux/coda_fs_i.h>
 #include <linux/coda_cache.h>
 
+#include "coda_int.h"
+
 /* VFS super_block ops */
 static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6a3df88accf..98c74fe2e13 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -48,12 +48,9 @@
 #include <linux/coda_psdev.h>
 #include <linux/coda_proc.h>
 
-#define upc_free(r) kfree(r)
+#include "coda_int.h"
 
-/* 
- * Coda stuff
- */
-extern struct file_system_type coda_fs_type;
+#define upc_free(r) kfree(r)
 
 /* statistics */
 int           coda_hard;         /* allows signals during upcalls */
@@ -394,8 +391,6 @@ out:
 MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>");
 MODULE_LICENSE("GPL");
 
-extern int coda_init_inodecache(void);
-extern void coda_destroy_inodecache(void);
 static int __init init_coda(void)
 {
 	int status;
-- 
cgit v1.2.3


From 4b6a9316fab51af611dc8671f296734089f6a22a Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:05 -0800
Subject: [PATCH] cpuset memory spread: slab cache filesystems

Mark file system inode and similar slab caches subject to SLAB_MEM_SPREAD
memory spreading.

If a slab cache is marked SLAB_MEM_SPREAD, then anytime that a task that's
in a cpuset with the 'memory_spread_slab' option enabled goes to allocate
from such a slab cache, the allocations are spread evenly over all the
memory nodes (task->mems_allowed) allowed to that task, instead of favoring
allocation on the node local to the current cpu.

The following inode and similar caches are marked SLAB_MEM_SPREAD:

    file                               cache
    ====                               =====
    fs/adfs/super.c                    adfs_inode_cache
    fs/affs/super.c                    affs_inode_cache
    fs/befs/linuxvfs.c                 befs_inode_cache
    fs/bfs/inode.c                     bfs_inode_cache
    fs/block_dev.c                     bdev_cache
    fs/cifs/cifsfs.c                   cifs_inode_cache
    fs/coda/inode.c                    coda_inode_cache
    fs/dquot.c                         dquot
    fs/efs/super.c                     efs_inode_cache
    fs/ext2/super.c                    ext2_inode_cache
    fs/ext2/xattr.c (fs/mbcache.c)     ext2_xattr
    fs/ext3/super.c                    ext3_inode_cache
    fs/ext3/xattr.c (fs/mbcache.c)     ext3_xattr
    fs/fat/cache.c                     fat_cache
    fs/fat/inode.c                     fat_inode_cache
    fs/freevxfs/vxfs_super.c           vxfs_inode
    fs/hpfs/super.c                    hpfs_inode_cache
    fs/isofs/inode.c                   isofs_inode_cache
    fs/jffs/inode-v23.c                jffs_fm
    fs/jffs2/super.c                   jffs2_i
    fs/jfs/super.c                     jfs_ip
    fs/minix/inode.c                   minix_inode_cache
    fs/ncpfs/inode.c                   ncp_inode_cache
    fs/nfs/direct.c                    nfs_direct_cache
    fs/nfs/inode.c                     nfs_inode_cache
    fs/ntfs/super.c                    ntfs_big_inode_cache_name
    fs/ntfs/super.c                    ntfs_inode_cache
    fs/ocfs2/dlm/dlmfs.c               dlmfs_inode_cache
    fs/ocfs2/super.c                   ocfs2_inode_cache
    fs/proc/inode.c                    proc_inode_cache
    fs/qnx4/inode.c                    qnx4_inode_cache
    fs/reiserfs/super.c                reiser_inode_cache
    fs/romfs/inode.c                   romfs_inode_cache
    fs/smbfs/inode.c                   smb_inode_cache
    fs/sysv/inode.c                    sysv_inode_cache
    fs/udf/super.c                     udf_inode_cache
    fs/ufs/super.c                     ufs_inode_cache
    net/socket.c                       sock_inode_cache
    net/sunrpc/rpc_pipe.c              rpc_inode_cache

The choice of which slab caches to so mark was quite simple.  I marked
those already marked SLAB_RECLAIM_ACCOUNT, except for fs/xfs, dentry_cache,
inode_cache, and buffer_head, which were marked in a previous patch.  Even
though SLAB_RECLAIM_ACCOUNT is for a different purpose, it marks the same
potentially large file system i/o related slab caches as we need for memory
spreading.

Given that the rule now becomes "wherever you would have used a
SLAB_RECLAIM_ACCOUNT slab cache flag before (usually the inode cache), use
the SLAB_MEM_SPREAD flag too", this should be easy enough to maintain.
Future file system writers will just copy one of the existing file system
slab cache setups and tend to get it right without thinking.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c          | 2 +-
 fs/affs/super.c          | 2 +-
 fs/befs/linuxvfs.c       | 2 +-
 fs/bfs/inode.c           | 2 +-
 fs/block_dev.c           | 2 +-
 fs/cifs/cifsfs.c         | 2 +-
 fs/coda/inode.c          | 2 +-
 fs/dquot.c               | 2 +-
 fs/efs/super.c           | 2 +-
 fs/ext2/super.c          | 2 +-
 fs/ext3/super.c          | 2 +-
 fs/fat/cache.c           | 2 +-
 fs/fat/inode.c           | 2 +-
 fs/freevxfs/vxfs_super.c | 2 +-
 fs/hpfs/super.c          | 2 +-
 fs/isofs/inode.c         | 2 +-
 fs/jffs/inode-v23.c      | 4 ++--
 fs/jffs2/super.c         | 2 +-
 fs/jfs/super.c           | 2 +-
 fs/mbcache.c             | 2 +-
 fs/minix/inode.c         | 2 +-
 fs/ncpfs/inode.c         | 2 +-
 fs/nfs/direct.c          | 2 +-
 fs/nfs/inode.c           | 2 +-
 fs/ntfs/super.c          | 4 ++--
 fs/ocfs2/dlm/dlmfs.c     | 2 +-
 fs/ocfs2/super.c         | 2 +-
 fs/proc/inode.c          | 2 +-
 fs/qnx4/inode.c          | 2 +-
 fs/reiserfs/super.c      | 2 +-
 fs/romfs/inode.c         | 2 +-
 fs/smbfs/inode.c         | 2 +-
 fs/sysv/inode.c          | 2 +-
 fs/udf/super.c           | 2 +-
 fs/ufs/super.c           | 2 +-
 35 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 243963228d1..80f798b7d76 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -241,7 +241,7 @@ static int init_inodecache(void)
 {
 	adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
 					     sizeof(struct adfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (adfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index aaec015a16e..216536d77a5 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,7 @@ static int init_inodecache(void)
 {
 	affs_inode_cachep = kmem_cache_create("affs_inode_cache",
 					     sizeof(struct affs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (affs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dd6048ce053..ac031686d6d 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -427,7 +427,7 @@ befs_init_inodecache(void)
 {
 	befs_inode_cachep = kmem_cache_create("befs_inode_cache",
 					      sizeof (struct befs_inode_info),
-					      0, SLAB_RECLAIM_ACCOUNT,
+					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					      init_once, NULL);
 	if (befs_inode_cachep == NULL) {
 		printk(KERN_ERR "befs_init_inodecache: "
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 3af6c73c5b5..584a0838ac6 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -257,7 +257,7 @@ static int init_inodecache(void)
 {
 	bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
 					     sizeof(struct bfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (bfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 44d05e6e34d..80f97729e57 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -319,7 +319,7 @@ void __init bdev_cache_init(void)
 {
 	int err;
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
-			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
 			init_once, NULL);
 	err = register_filesystem(&bd_type);
 	if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1cd044ce82a..ba5a24b99a1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,7 +695,7 @@ cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
 					      sizeof (struct cifsInodeInfo),
-					      0, SLAB_RECLAIM_ACCOUNT,
+					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					      cifs_init_once, NULL);
 	if (cifs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7d7d52f74b2..ada1a81df6b 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -71,7 +71,7 @@ int coda_init_inodecache(void)
 {
 	coda_inode_cachep = kmem_cache_create("coda_inode_cache",
 				sizeof(struct coda_inode_info),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (coda_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/dquot.c b/fs/dquot.c
index acf07e581f8..1405755b8e4 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1821,7 +1821,7 @@ static int __init dquot_init(void)
 
 	dquot_cachep = kmem_cache_create("dquot", 
 			sizeof(struct dquot), sizeof(unsigned long) * 4,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
 			NULL, NULL);
 
 	order = 0;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index afc4891feb3..dff623e3ddb 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -81,7 +81,7 @@ static int init_inodecache(void)
 {
 	efs_inode_cachep = kmem_cache_create("efs_inode_cache",
 				sizeof(struct efs_inode_info),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (efs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7f3899bdeba..e153f0cc240 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -175,7 +175,7 @@ static int init_inodecache(void)
 {
 	ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
 					     sizeof(struct ext2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ext2_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index efe5b20d7a5..e4a0a7cbb5b 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -481,7 +481,7 @@ static int init_inodecache(void)
 {
 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 					     sizeof(struct ext3_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ext3_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 1acc941245f..97b967b84fc 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -49,7 +49,7 @@ int __init fat_cache_init(void)
 {
 	fat_cache_cachep = kmem_cache_create("fat_cache",
 				sizeof(struct fat_cache),
-				0, SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				init_once, NULL);
 	if (fat_cache_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e78d7b4842c..a75708901e7 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,7 @@ static int __init fat_init_inodecache(void)
 {
 	fat_inode_cachep = kmem_cache_create("fat_inode_cache",
 					     sizeof(struct msdos_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (fat_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 6aa6fbe4f8e..b44c916d24a 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -260,7 +260,7 @@ vxfs_init(void)
 {
 	vxfs_inode_cachep = kmem_cache_create("vxfs_inode",
 			sizeof(struct vxfs_inode_info), 0, 
-			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (vxfs_inode_cachep)
 		return register_filesystem(&vxfs_fs_type);
 	return -ENOMEM;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 9488a794076..25fbefe4ed0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -191,7 +191,7 @@ static int init_inodecache(void)
 {
 	hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
 					     sizeof(struct hpfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (hpfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 298f08be22d..fcb68151ad8 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -87,7 +87,7 @@ static int init_inodecache(void)
 {
 	isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
 					     sizeof(struct iso_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (isofs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 890d7ff7456..ffa49861d98 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1812,14 +1812,14 @@ init_jffs_fs(void)
 	}
 #endif
 	fm_cache = kmem_cache_create("jffs_fm", sizeof(struct jffs_fm),
-				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, 
+				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				     NULL, NULL);
 	if (!fm_cache) {
 		return -ENOMEM;
 	}
 
 	node_cache = kmem_cache_create("jffs_node",sizeof(struct jffs_node),
-				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, 
+				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				       NULL, NULL);
 	if (!node_cache) {
 		kmem_cache_destroy(fm_cache);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c8fac352a4c..f2563389581 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,7 +331,7 @@ static int __init init_jffs2_fs(void)
 
 	jffs2_inode_cachep = kmem_cache_create("jffs2_i",
 					     sizeof(struct jffs2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     jffs2_i_init_once, NULL);
 	if (!jffs2_inode_cachep) {
 		printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n");
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 18f69e6aa71..4ac40bfdbd8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -664,7 +664,7 @@ static int __init init_jfs_fs(void)
 
 	jfs_inode_cachep =
 	    kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 
-			    SLAB_RECLAIM_ACCOUNT, init_once, NULL);
+			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, init_once, NULL);
 	if (jfs_inode_cachep == NULL)
 		return -ENOMEM;
 
diff --git a/fs/mbcache.c b/fs/mbcache.c
index f5bbe4c97c5..73e754fea2d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -288,7 +288,7 @@ mb_cache_create(const char *name, struct mb_cache_op *cache_op,
 			INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
 	}
 	cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
-		SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+		SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (!cache->c_entry_cache)
 		goto fail;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 790cc0d0e97..4fabef0b651 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -80,7 +80,7 @@ static int init_inodecache(void)
 {
 	minix_inode_cachep = kmem_cache_create("minix_inode_cache",
 					     sizeof(struct minix_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (minix_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0b521d3d97c..2547ebaa654 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -72,7 +72,7 @@ static int init_inodecache(void)
 {
 	ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
 					     sizeof(struct ncp_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ncp_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4e9b3a1b36c..751f5b5e7e0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -781,7 +781,7 @@ int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
 						sizeof(struct nfs_direct_req),
-						0, SLAB_RECLAIM_ACCOUNT,
+						0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 						NULL, NULL);
 	if (nfs_direct_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 419d1d254f9..834c1e905ce 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2163,7 +2163,7 @@ static int nfs_init_inodecache(void)
 {
 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
 					     sizeof(struct nfs_inode),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (nfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 7646b505938..27833f6df49 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3163,7 +3163,7 @@ static int __init init_ntfs_fs(void)
 
 	ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
 			sizeof(ntfs_inode), 0,
-			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
 	if (!ntfs_inode_cache) {
 		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
 				ntfs_inode_cache_name);
@@ -3172,7 +3172,7 @@ static int __init init_ntfs_fs(void)
 
 	ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
 			sizeof(big_ntfs_inode), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 			ntfs_big_inode_init_once, NULL);
 	if (!ntfs_big_inode_cache) {
 		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index dd2d24dc25e..ca8587cc58b 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -596,7 +596,7 @@ static int __init init_dlmfs_fs(void)
 
 	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
 				sizeof(struct dlmfs_inode_private),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 				dlmfs_init_once, NULL);
 	if (!dlmfs_inode_cache)
 		return -ENOMEM;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 09e1c57a86a..3fe7896c666 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -951,7 +951,7 @@ static int ocfs2_initialize_mem_caches(void)
 {
 	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
 					       sizeof(struct ocfs2_inode_info),
-					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					       ocfs2_inode_init_once, NULL);
 	if (!ocfs2_inode_cachep)
 		return -ENOMEM;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 075d3e94560..5ac781801ae 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -121,7 +121,7 @@ int __init proc_init_inodecache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (proc_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 80f32911c0c..463142c80ae 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -546,7 +546,7 @@ static int init_inodecache(void)
 {
 	qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
 					     sizeof(struct qnx4_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (qnx4_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d63da756eb4..bf434605787 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,7 @@ static int init_inodecache(void)
 	reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
 						  sizeof(struct
 							 reiserfs_inode_info),
-						  0, SLAB_RECLAIM_ACCOUNT,
+						  0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 						  init_once, NULL);
 	if (reiserfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 0a13859fd57..223bebb8b45 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -579,7 +579,7 @@ static int init_inodecache(void)
 {
 	romfs_inode_cachep = kmem_cache_create("romfs_inode_cache",
 					     sizeof(struct romfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (romfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 02e3e82d465..9b14542cc18 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -80,7 +80,7 @@ static int init_inodecache(void)
 {
 	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
 					     sizeof(struct smb_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (smb_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index fa33eceb001..3ff89cc5833 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -342,7 +342,7 @@ int __init sysv_init_icache(void)
 {
 	sysv_inode_cachep = kmem_cache_create("sysv_inode_cache",
 			sizeof(struct sysv_inode_info), 0,
-			SLAB_RECLAIM_ACCOUNT,
+			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 			init_once, NULL);
 	if (!sysv_inode_cachep)
 		return -ENOMEM;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0d555616c93..e120f33f847 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -140,7 +140,7 @@ static int init_inodecache(void)
 {
 	udf_inode_cachep = kmem_cache_create("udf_inode_cache",
 					     sizeof(struct udf_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (udf_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e9055ef7f5a..684018d3c58 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1184,7 +1184,7 @@ static int init_inodecache(void)
 {
 	ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
 					     sizeof(struct ufs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT,
+					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
 					     init_once, NULL);
 	if (ufs_inode_cachep == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From fffb60f93ce5880aade88e01d7133b52a4879710 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:06 -0800
Subject: [PATCH] cpuset memory spread: slab cache format

Rewrap the overly long source code lines resulting from the previous
patch's addition of the slab cache flag SLAB_MEM_SPREAD.  This patch
contains only formatting changes, and no function change.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c      |  3 ++-
 fs/affs/super.c      |  3 ++-
 fs/befs/linuxvfs.c   |  3 ++-
 fs/bfs/inode.c       |  3 ++-
 fs/block_dev.c       |  3 ++-
 fs/cifs/cifsfs.c     |  3 ++-
 fs/dquot.c           |  3 ++-
 fs/ext2/super.c      |  3 ++-
 fs/ext3/super.c      |  3 ++-
 fs/fat/inode.c       |  3 ++-
 fs/hpfs/super.c      |  3 ++-
 fs/isofs/inode.c     |  3 ++-
 fs/jffs/inode-v23.c  | 10 ++++++----
 fs/jffs2/super.c     |  3 ++-
 fs/jfs/super.c       |  3 ++-
 fs/minix/inode.c     |  3 ++-
 fs/ncpfs/inode.c     |  3 ++-
 fs/nfs/direct.c      |  3 ++-
 fs/nfs/inode.c       |  3 ++-
 fs/ocfs2/dlm/dlmfs.c |  3 ++-
 fs/ocfs2/super.c     |  8 +++++---
 fs/proc/inode.c      |  3 ++-
 fs/qnx4/inode.c      |  3 ++-
 fs/reiserfs/super.c  |  3 ++-
 fs/romfs/inode.c     |  3 ++-
 fs/smbfs/inode.c     |  3 ++-
 fs/udf/super.c       |  3 ++-
 fs/ufs/super.c       |  3 ++-
 28 files changed, 63 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 80f798b7d76..252abda0d20 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -241,7 +241,8 @@ static int init_inodecache(void)
 {
 	adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
 					     sizeof(struct adfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (adfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 216536d77a5..4d7e5b19e5c 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,8 @@ static int init_inodecache(void)
 {
 	affs_inode_cachep = kmem_cache_create("affs_inode_cache",
 					     sizeof(struct affs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (affs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index ac031686d6d..044a5958782 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -427,7 +427,8 @@ befs_init_inodecache(void)
 {
 	befs_inode_cachep = kmem_cache_create("befs_inode_cache",
 					      sizeof (struct befs_inode_info),
-					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					      0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					      init_once, NULL);
 	if (befs_inode_cachep == NULL) {
 		printk(KERN_ERR "befs_init_inodecache: "
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 584a0838ac6..55a7a78332f 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -257,7 +257,8 @@ static int init_inodecache(void)
 {
 	bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
 					     sizeof(struct bfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (bfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 80f97729e57..2d096057ab5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -319,7 +319,8 @@ void __init bdev_cache_init(void)
 {
 	int err;
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
-			0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
+			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+				SLAB_MEM_SPREAD|SLAB_PANIC),
 			init_once, NULL);
 	err = register_filesystem(&bd_type);
 	if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ba5a24b99a1..221b3334b73 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,7 +695,8 @@ cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
 					      sizeof (struct cifsInodeInfo),
-					      0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					      0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					      cifs_init_once, NULL);
 	if (cifs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/dquot.c b/fs/dquot.c
index 1405755b8e4..6b388692093 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1821,7 +1821,8 @@ static int __init dquot_init(void)
 
 	dquot_cachep = kmem_cache_create("dquot", 
 			sizeof(struct dquot), sizeof(unsigned long) * 4,
-			SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_PANIC,
+			(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+				SLAB_MEM_SPREAD|SLAB_PANIC),
 			NULL, NULL);
 
 	order = 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e153f0cc240..268b73f5847 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -175,7 +175,8 @@ static int init_inodecache(void)
 {
 	ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
 					     sizeof(struct ext2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ext2_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e4a0a7cbb5b..a3e2a8e7dca 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -481,7 +481,8 @@ static int init_inodecache(void)
 {
 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 					     sizeof(struct ext3_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ext3_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a75708901e7..297300fe81c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,8 @@ static int __init fat_init_inodecache(void)
 {
 	fat_inode_cachep = kmem_cache_create("fat_inode_cache",
 					     sizeof(struct msdos_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (fat_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 25fbefe4ed0..d72d8c87c99 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -191,7 +191,8 @@ static int init_inodecache(void)
 {
 	hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
 					     sizeof(struct hpfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (hpfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index fcb68151ad8..70adbb98bad 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -87,7 +87,8 @@ static int init_inodecache(void)
 {
 	isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
 					     sizeof(struct iso_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (isofs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index ffa49861d98..5a4519e834d 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1812,15 +1812,17 @@ init_jffs_fs(void)
 	}
 #endif
 	fm_cache = kmem_cache_create("jffs_fm", sizeof(struct jffs_fm),
-				     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-				     NULL, NULL);
+		       0,
+		       SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+		       NULL, NULL);
 	if (!fm_cache) {
 		return -ENOMEM;
 	}
 
 	node_cache = kmem_cache_create("jffs_node",sizeof(struct jffs_node),
-				       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-				       NULL, NULL);
+		       0,
+		       SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+		       NULL, NULL);
 	if (!node_cache) {
 		kmem_cache_destroy(fm_cache);
 		return -ENOMEM;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f2563389581..ffd8e84b22c 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,7 +331,8 @@ static int __init init_jffs2_fs(void)
 
 	jffs2_inode_cachep = kmem_cache_create("jffs2_i",
 					     sizeof(struct jffs2_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     jffs2_i_init_once, NULL);
 	if (!jffs2_inode_cachep) {
 		printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n");
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4ac40bfdbd8..db6f41d6dd6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -664,7 +664,8 @@ static int __init init_jfs_fs(void)
 
 	jfs_inode_cachep =
 	    kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 
-			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, init_once, NULL);
+			    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+			    init_once, NULL);
 	if (jfs_inode_cachep == NULL)
 		return -ENOMEM;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4fabef0b651..d9ffc43fee5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -80,7 +80,8 @@ static int init_inodecache(void)
 {
 	minix_inode_cachep = kmem_cache_create("minix_inode_cache",
 					     sizeof(struct minix_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (minix_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2547ebaa654..a1f3e972c6e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -72,7 +72,8 @@ static int init_inodecache(void)
 {
 	ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
 					     sizeof(struct ncp_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ncp_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 751f5b5e7e0..4ae2f3b33fe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -781,7 +781,8 @@ int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
 						sizeof(struct nfs_direct_req),
-						0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+						0, (SLAB_RECLAIM_ACCOUNT|
+							SLAB_MEM_SPREAD),
 						NULL, NULL);
 	if (nfs_direct_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 834c1e905ce..3413996f9a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2163,7 +2163,8 @@ static int nfs_init_inodecache(void)
 {
 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
 					     sizeof(struct nfs_inode),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (nfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index ca8587cc58b..7e88e24b347 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -596,7 +596,8 @@ static int __init init_dlmfs_fs(void)
 
 	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
 				sizeof(struct dlmfs_inode_private),
-				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+					SLAB_MEM_SPREAD),
 				dlmfs_init_once, NULL);
 	if (!dlmfs_inode_cache)
 		return -ENOMEM;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3fe7896c666..44d8b524823 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -950,9 +950,11 @@ static void ocfs2_inode_init_once(void *data,
 static int ocfs2_initialize_mem_caches(void)
 {
 	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
-					       sizeof(struct ocfs2_inode_info),
-					       0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
-					       ocfs2_inode_init_once, NULL);
+				       sizeof(struct ocfs2_inode_info),
+				       0,
+				       (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+				       ocfs2_inode_init_once, NULL);
 	if (!ocfs2_inode_cachep)
 		return -ENOMEM;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 5ac781801ae..722b9c46311 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -121,7 +121,8 @@ int __init proc_init_inodecache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (proc_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 463142c80ae..2ecd46f85e9 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -546,7 +546,8 @@ static int init_inodecache(void)
 {
 	qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
 					     sizeof(struct qnx4_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (qnx4_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index bf434605787..93e6ef9360e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,8 @@ static int init_inodecache(void)
 	reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
 						  sizeof(struct
 							 reiserfs_inode_info),
-						  0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+						  0, (SLAB_RECLAIM_ACCOUNT|
+							SLAB_MEM_SPREAD),
 						  init_once, NULL);
 	if (reiserfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 223bebb8b45..c2fc424d7d5 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -579,7 +579,8 @@ static int init_inodecache(void)
 {
 	romfs_inode_cachep = kmem_cache_create("romfs_inode_cache",
 					     sizeof(struct romfs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (romfs_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9b14542cc18..44ed1d418b4 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -80,7 +80,8 @@ static int init_inodecache(void)
 {
 	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
 					     sizeof(struct smb_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (smb_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e120f33f847..e45789fe38e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -140,7 +140,8 @@ static int init_inodecache(void)
 {
 	udf_inode_cachep = kmem_cache_create("udf_inode_cache",
 					     sizeof(struct udf_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (udf_inode_cachep == NULL)
 		return -ENOMEM;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 684018d3c58..d257644a1ae 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1184,7 +1184,8 @@ static int init_inodecache(void)
 {
 	ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
 					     sizeof(struct ufs_inode_info),
-					     0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
 					     init_once, NULL);
 	if (ufs_inode_cachep == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From b0196009d8c3ecf6ea6ec080c63d2ccc146e7ad9 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 24 Mar 2006 03:16:09 -0800
Subject: [PATCH] cpuset memory spread slab cache hooks

Change the kmem_cache_create calls for certain slab caches to support cpuset
memory spreading.

See the previous patches, cpuset_mem_spread, for an explanation of cpuset
memory spreading, and cpuset_mem_spread_slab_cache for the slab cache support
for memory spreading.

The slab caches marked for now are: dentry_cache, inode_cache, some xfs slab
caches, and buffer_head.  This list may change over time.  In particular,
other file system types that are used extensively on large NUMA systems may
want to allow for spreading their directory and inode slab cache entries.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c             | 7 +++++--
 fs/dcache.c             | 3 ++-
 fs/inode.c              | 9 +++++++--
 fs/xfs/linux-2.6/kmem.h | 2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 0d6ca7bac6c..36c7253bea7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3143,8 +3143,11 @@ void __init buffer_init(void)
 	int nrpages;
 
 	bh_cachep = kmem_cache_create("buffer_head",
-			sizeof(struct buffer_head), 0,
-			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
+					sizeof(struct buffer_head), 0,
+					(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					SLAB_MEM_SPREAD),
+					init_buffer_head,
+					NULL);
 
 	/*
 	 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/dcache.c b/fs/dcache.c
index 11dc83092d4..653f64ce98e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1682,7 +1682,8 @@ static void __init dcache_init(unsigned long mempages)
 	dentry_cache = kmem_cache_create("dentry_cache",
 					 sizeof(struct dentry),
 					 0,
-					 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					 SLAB_MEM_SPREAD),
 					 NULL, NULL);
 	
 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
diff --git a/fs/inode.c b/fs/inode.c
index 25967b67903..a51c671c54c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1375,8 +1375,13 @@ void __init inode_init(unsigned long mempages)
 	int loop;
 
 	/* inode slab cache */
-	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
-				0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_once, NULL);
+	inode_cachep = kmem_cache_create("inode_cache",
+					 sizeof(struct inode),
+					 0,
+					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+					 SLAB_MEM_SPREAD),
+					 init_once,
+					 NULL);
 	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
 
 	/* Hash may have been set up in inode_init_early */
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index f0268a84e6f..2cfd33d4d8a 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,7 +100,7 @@ extern void  kmem_free(void *, size_t);
 
 #define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
 #define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD	0
+#define KM_ZONE_SPREAD	SLAB_MEM_SPREAD
 
 #define kmem_zone	kmem_cache
 #define kmem_zone_t	struct kmem_cache
-- 
cgit v1.2.3


From 53b3531bbbf70ac7551b32d1acc229d94de52658 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 24 Mar 2006 03:16:13 -0800
Subject: [PATCH] s/;;/;/g

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/cifssmb.c | 2 +-
 fs/pnode.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b41e8b37965..a243fe2792d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4908,7 +4908,7 @@ SetEARetry:
 	parm_data->list_len = cpu_to_le32(count);
 	parm_data->list[0].EA_flags = 0;
 	/* we checked above that name len is less than 255 */
-	parm_data->list[0].name_len = (__u8)name_len;;
+	parm_data->list[0].name_len = (__u8)name_len;
 	/* EA names are always ASCII */
 	if(ea_name)
 		strncpy(parm_data->list[0].name,ea_name,name_len);
diff --git a/fs/pnode.c b/fs/pnode.c
index f1871f773f6..37b568ed0e0 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -130,7 +130,7 @@ static struct vfsmount *get_source(struct vfsmount *dest,
 {
 	struct vfsmount *p_last_src = NULL;
 	struct vfsmount *p_last_dest = NULL;
-	*type = CL_PROPAGATION;;
+	*type = CL_PROPAGATION;
 
 	if (IS_MNT_SHARED(dest))
 		*type |= CL_MAKE_SHARED;
-- 
cgit v1.2.3


From 5b3cf3e0f019fcfe1e0c26aedb4d54b5e5be3a7b Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 24 Mar 2006 03:16:14 -0800
Subject: [PATCH] isofs: remove unused debugging macros

Remove unused debugging macros from isofs.  The referred debug functions do
not exist in the kernel.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/isofs.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 38c75151fc6..439a19b1bf3 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -178,15 +178,3 @@ extern struct inode_operations isofs_dir_inode_operations;
 extern struct file_operations isofs_dir_operations;
 extern struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
-
-/* The following macros are used to check for memory leaks. */
-#ifdef LEAK_CHECK
-#define free_s leak_check_free_s
-#define malloc leak_check_malloc
-#define sb_bread leak_check_bread
-#define brelse leak_check_brelse
-extern void * leak_check_malloc(unsigned int size);
-extern void leak_check_free_s(void * obj, int size);
-extern struct buffer_head * leak_check_bread(struct super_block *sb, int block);
-extern void leak_check_brelse(struct buffer_head * bh);
-#endif /* LEAK_CHECK */
-- 
cgit v1.2.3


From 09fe316a7b10219be592118626850e1dfdfcc1aa Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Fri, 24 Mar 2006 03:16:16 -0800
Subject: [PATCH] fast ext3_statfs

Under I/O load it may take up to a dozen seconds to read all group
descriptors.  This is what ext3_statfs() does.  At the same time, we already
maintain global numbers of free inodes/blocks.  Why don't we use them instead
of group reading and summing?

Cc: Ravikiran G Thirumalai <kiran@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a3e2a8e7dca..86e443182de 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2326,7 +2326,8 @@ restore_opts:
 
 static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
 	unsigned long overhead;
 	int i;
 
@@ -2368,12 +2369,12 @@ static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
 	buf->f_type = EXT3_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
-	buf->f_bfree = ext3_count_free_blocks (sb);
+	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
 	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
 	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
-	buf->f_ffree = ext3_count_free_inodes (sb);
+	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT3_NAME_LEN;
 	return 0;
 }
-- 
cgit v1.2.3


From 38885bd4c2a4b59ddb22271d3e6c621859c76f02 Mon Sep 17 00:00:00 2001
From: Coywolf Qi Hunt <qiyong@fc-cn.com>
Date: Fri, 24 Mar 2006 03:18:05 -0800
Subject: [PATCH] sb_set_blocksize cleanup

sb_set_blocksize() cleanup: make sb_set_blocksize() use blksize_bits().

Signed-off-by: Coywolf Qi Hunt <qiyong@fc-cn.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2d096057ab5..573fc8e0b67 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -86,16 +86,12 @@ EXPORT_SYMBOL(set_blocksize);
 
 int sb_set_blocksize(struct super_block *sb, int size)
 {
-	int bits = 9; /* 2^9 = 512 */
-
 	if (set_blocksize(sb->s_bdev, size))
 		return 0;
 	/* If we get here, we know size is power of two
 	 * and it's value is between 512 and PAGE_SIZE */
 	sb->s_blocksize = size;
-	for (size >>= 10; size; size >>= 1)
-		++bits;
-	sb->s_blocksize_bits = bits;
+	sb->s_blocksize_bits = blksize_bits(size);
 	return sb->s_blocksize;
 }
 
-- 
cgit v1.2.3


From 8a14342683b1e3adcf5f78660a42fcbd95b44a35 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 24 Mar 2006 03:18:10 -0800
Subject: [PATCH] HOTPLUG_CPU: avoid hitting too many cachelines in
 recalc_bh_state()

Instead of using for_each_cpu(i), we can use for_each_online_cpu(i).

When a CPU goes offline (ie removed from online map), it might have a non
null bh_accounting.nr, so this patch adds a transfer of this counter to an
online CPU counter.

We already have a hotcpu_notifier, (function buffer_cpu_notify()), where we
can do this bh_accounting.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 36c7253bea7..11ca6eb46a3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3078,7 +3078,7 @@ static void recalc_bh_state(void)
 	if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
 		return;
 	__get_cpu_var(bh_accounting).ratelimit = 0;
-	for_each_cpu(i)
+	for_each_online_cpu(i)
 		tot += per_cpu(bh_accounting, i).nr;
 	buffer_heads_over_limit = (tot > max_buffer_heads);
 }
@@ -3127,6 +3127,9 @@ static void buffer_exit_cpu(int cpu)
 		brelse(b->bhs[i]);
 		b->bhs[i] = NULL;
 	}
+	get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
+	per_cpu(bh_accounting, cpu).nr = 0;
+	put_cpu_var(bh_accounting);
 }
 
 static int buffer_cpu_notify(struct notifier_block *self,
-- 
cgit v1.2.3


From 4741c9fd36b3bcadd37238321c469049da94a4b9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 24 Mar 2006 03:18:11 -0800
Subject: [PATCH] set_page_dirty() return value fixes

We need set_page_dirty() to return true if it actually transitioned the page
from a clean to dirty state.  This wasn't right in a couple of places.  Do a
kernel-wide audit, fix things up.

This leaves open the possibility of returning a negative errno from
set_page_dirty() sometime in the future.  But we don't do that at present.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 11ca6eb46a3..24262ea8cc5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -865,8 +865,8 @@ int __set_page_dirty_buffers(struct page *page)
 		}
 		write_unlock_irq(&mapping->tree_lock);
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+		return 1;
 	}
-	
 	return 0;
 }
 EXPORT_SYMBOL(__set_page_dirty_buffers);
-- 
cgit v1.2.3


From 18e79b40ed9c5223b88771f805c69f5993fc131b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 24 Mar 2006 03:18:14 -0800
Subject: [PATCH] fsync: extract internal code

Pull the guts out of do_fsync() - we can use it elsewhere.

Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 24262ea8cc5..6d77ce9f54e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -327,31 +327,24 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-static long do_fsync(unsigned int fd, int datasync)
+long do_fsync(struct file *file, int datasync)
 {
-	struct file * file;
-	struct address_space *mapping;
-	int ret, err;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
+	int ret;
+	int err;
+	struct address_space *mapping = file->f_mapping;
 
-	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
-		goto out_putf;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	mapping = file->f_mapping;
-
 	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
 	/*
-	 * We need to protect against concurrent writers,
-	 * which could cause livelocks in fsync_buffers_list
+	 * We need to protect against concurrent writers, which could cause
+	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
 	err = file->f_op->fsync(file, file->f_dentry, datasync);
@@ -362,21 +355,31 @@ static long do_fsync(unsigned int fd, int datasync)
 	if (!ret)
 		ret = err;
 	current->flags &= ~PF_SYNCWRITE;
-
-out_putf:
-	fput(file);
 out:
 	return ret;
 }
 
+static long __do_fsync(unsigned int fd, int datasync)
+{
+	struct file *file;
+	int ret = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		ret = do_fsync(file, datasync);
+		fput(file);
+	}
+	return ret;
+}
+
 asmlinkage long sys_fsync(unsigned int fd)
 {
-	return do_fsync(fd, 0);
+	return __do_fsync(fd, 0);
 }
 
 asmlinkage long sys_fdatasync(unsigned int fd)
 {
-	return do_fsync(fd, 1);
+	return __do_fsync(fd, 1);
 }
 
 /*
-- 
cgit v1.2.3


From 82d821ddca8f5c990067cc37543010aa9346a172 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Fri, 24 Mar 2006 03:18:20 -0800
Subject: [PATCH] Conditionalize compat_sys_newfstatat

If we don't want sys_newfstatat because __ARCH_WANT_STAT64 is defined, then
we certainly don't want compat_sys_newfstatat either.

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 5333c7d7427..2a88477330f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -114,6 +114,7 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
 	return error;
 }
 
+#ifndef __ARCH_WANT_STAT64
 asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
 		struct compat_stat __user *statbuf, int flag)
 {
@@ -134,6 +135,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
 out:
 	return error;
 }
+#endif
 
 asmlinkage long compat_sys_newfstat(unsigned int fd,
 		struct compat_stat __user * statbuf)
-- 
cgit v1.2.3


From b5a7c4f5835ae2805d00ca39709002cb03364143 Mon Sep 17 00:00:00 2001
From: Glauber de Oliveira Costa <glommer@br.ibm.com>
Date: Fri, 24 Mar 2006 03:18:37 -0800
Subject: [PATCH] ext3: Properly report backup block present in a group

In filesystems with the meta block group flag on, ext3_bg_num_gdb() fails
to report the correct number of blocks used to store the group descriptor
backups in a given group.  It happens because meta_bg follows a different
logic from the original ext3 backup placement in groups multiples of 3, 5
and 7.

Signed-off-by: Glauber de Oliveira Costa <glommer@br.ibm.com>
Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6250fcdf14a..46623f77666 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1493,12 +1493,33 @@ static int ext3_group_sparse(int group)
  */
 int ext3_bg_has_super(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-	    !ext3_group_sparse(group))
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
 		return 0;
 	return 1;
 }
 
+static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+{
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+
+	if (group == first || group == first + 1 || group == last)
+		return 1;
+	return 0;
+}
+
+static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return EXT3_SB(sb)->s_gdb_count;
+}
+
 /**
  *	ext3_bg_num_gdb - number of blocks used by the group table in group
  *	@sb: superblock for filesystem
@@ -1510,9 +1531,14 @@ int ext3_bg_has_super(struct super_block *sb, int group)
  */
 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-	    !ext3_group_sparse(group))
-		return 0;
-	return EXT3_SB(sb)->s_gdb_count;
-}
+	unsigned long first_meta_bg =
+			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+			metagroup < first_meta_bg)
+		return ext3_bg_num_gdb_nometa(sb,group);
 
+	return ext3_bg_num_gdb_meta(sb,group);
+
+}
-- 
cgit v1.2.3


From c690a72253b962b7274559f2cdf4844553076c03 Mon Sep 17 00:00:00 2001
From: Michael Owen <mowen@costco.com>
Date: Fri, 24 Mar 2006 18:21:44 +0100
Subject: typo patch for fs/ufs/super.c

Quick and simple typo fix. neTXstep -> neXTstep

Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/ufs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d257644a1ae..db98a4c71e6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -575,7 +575,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 		if (!silent)
 			printk("You didn't specify the type of your ufs filesystem\n\n"
 			"mount -t ufs -o ufstype="
-			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|netxstep-cd|openstep ...\n\n"
+			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
 			">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
 			"default is ufstype=old\n");
 		ufs_set_opt (sbi->s_mount_opt, UFSTYPE_OLD);
-- 
cgit v1.2.3


From c30fe7f73194650148b58ee80908c1bc38246397 Mon Sep 17 00:00:00 2001
From: Uwe Zeisberger <zeisberg@informatik.uni-freiburg.de>
Date: Fri, 24 Mar 2006 18:23:14 +0100
Subject: fix typos "wich" -> "which"

Signed-off-by: Uwe Zeisberger <zeisberg@informatik.uni-freiburg.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/befs/datastream.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 785f6b2d5d1..b7d6b920f65 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -118,7 +118,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data,
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
  * @ds: Datastrem to read from
- * @buf: Buffer in wich to place long symlink data
+ * @buf: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
  * Returns the number of bytes read
-- 
cgit v1.2.3


From 88bcd51262ed45212d1b3a65abbac46eaf36bfeb Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Fri, 24 Mar 2006 18:38:48 +0100
Subject: BUG_ON() Conversion in fs/binfmt_elf_fdpic.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/binfmt_elf_fdpic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b3076e8ee9..a2e48c999c2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -572,8 +572,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	csp -= sizeof(unsigned long);
 	__put_user(bprm->argc, (unsigned long *) csp);
 
-	if (csp != sp)
-		BUG();
+	BUG_ON(csp != sp);
 
 	/* fill in the argv[] array */
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3


From c5d3237c2424c4a3cf69d33abc1f229943468367 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Fri, 24 Mar 2006 18:42:13 +0100
Subject: BUG_ON() Conversion in fs/coda/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/coda/cache.c | 2 +-
 fs/coda/cnode.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index c607d923350..5d052713326 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -51,7 +51,7 @@ void coda_cache_clear_all(struct super_block *sb)
         struct coda_sb_info *sbi;
 
         sbi = coda_sbp(sb);
-        if (!sbi) BUG();
+	BUG_ON(!sbi);
 
 	atomic_inc(&permission_epoch);
 }
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 23aeef5aa81..4c9fecbfa91 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -120,8 +120,7 @@ void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid,
 	
 	cii = ITOC(inode);
 
-	if (!coda_fideq(&cii->c_fid, oldfid))
-		BUG();
+	BUG_ON(!coda_fideq(&cii->c_fid, oldfid));
 
 	/* replace fid and rehash inode */
 	/* XXX we probably need to hold some lock here! */
-- 
cgit v1.2.3


From a74e1f0e8a7858c9ba6065480c88d7feba3520ac Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 3 Mar 2006 10:18:58 -0800
Subject: ocfs2: use __attribute__ format

Use the "format" attribute on ocfs2_error() and ocfs2_abort() so that the
compiler will warn when we get calls to those functions wrong.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/super.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index c564177dfbd..783f5270f2a 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -33,12 +33,16 @@ int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
 
 void __ocfs2_error(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 void __ocfs2_abort(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 #endif /* OCFS2_SUPER_H */
-- 
cgit v1.2.3


From 9c6510a5bfe2f1c5f5b93386c06954be02e974e4 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Thu, 2 Mar 2006 18:09:26 -0800
Subject: [PATCH] ocfs2: fix hang in dlm lock resource mastery

fixes hangs in lock mastery related to refcounting on the mle structure

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 124 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 847dd3cc4cf..78ac3a00eb5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -792,7 +792,15 @@ redo_request:
 			mlog_errno(ret);
 		if (mle->master != O2NM_MAX_NODES) {
 			/* found a master ! */
-			break;
+			if (mle->master <= nodenum)
+				break;
+			/* if our master request has not reached the master
+			 * yet, keep going until it does.  this is how the
+			 * master will know that asserts are needed back to
+			 * the lower nodes. */
+			mlog(0, "%s:%.*s: requests only up to %u but master "
+			     "is %u, keep going\n", dlm->name, namelen,
+			     lockid, nodenum, mle->master);
 		}
 	}
 
@@ -860,7 +868,19 @@ recheck:
 	/* check if another node has already become the owner */
 	spin_lock(&res->spinlock);
 	if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
+		mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name,
+		     res->lockname.len, res->lockname.name, res->owner);
 		spin_unlock(&res->spinlock);
+		/* this will cause the master to re-assert across
+		 * the whole cluster, freeing up mles */
+		ret = dlm_do_master_request(mle, res->owner);
+		if (ret < 0) {
+			/* give recovery a chance to run */
+			mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
+			msleep(500);
+			goto recheck;
+		}
+		ret = 0;
 		goto leave;
 	}
 	spin_unlock(&res->spinlock);
@@ -1244,13 +1264,14 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 {
 	u8 response = DLM_MASTER_RESP_MAYBE;
 	struct dlm_ctxt *dlm = data;
-	struct dlm_lock_resource *res;
+	struct dlm_lock_resource *res = NULL;
 	struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
 	struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
 	char *name;
 	unsigned int namelen;
 	int found, ret;
 	int set_maybe;
+	int dispatch_assert = 0;
 
 	if (!dlm_grab(dlm))
 		return DLM_MASTER_RESP_NO;
@@ -1287,7 +1308,6 @@ way_up_top:
 		}
 
 		if (res->owner == dlm->node_num) {
-			u32 flags = DLM_ASSERT_MASTER_MLE_CLEANUP;
 			spin_unlock(&res->spinlock);
 			// mlog(0, "this node is the master\n");
 			response = DLM_MASTER_RESP_YES;
@@ -1300,16 +1320,7 @@ way_up_top:
 			 * caused all nodes up to this one to
 			 * create mles.  this node now needs to
 			 * go back and clean those up. */
-			mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
-			     dlm->node_num, res->lockname.len, res->lockname.name);
-			ret = dlm_dispatch_assert_master(dlm, res, 1,
-							 request->node_idx,
-							 flags);
-			if (ret < 0) {
-				mlog(ML_ERROR, "failed to dispatch assert "
-				     "master work\n");
-				response = DLM_MASTER_RESP_ERROR;
-			}
+			dispatch_assert = 1;
 			goto send_response;
 		} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			spin_unlock(&res->spinlock);
@@ -1357,9 +1368,13 @@ way_up_top:
 			}
 		} else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			set_maybe = 0;
-			if (tmpmle->master == dlm->node_num)
+			if (tmpmle->master == dlm->node_num) {
 				response = DLM_MASTER_RESP_YES;
-			else
+				/* this node will be the owner.
+				 * go back and clean the mles on any
+				 * other nodes */
+				dispatch_assert = 1;
+			} else
 				response = DLM_MASTER_RESP_NO;
 		} else {
 			// mlog(0, "this node is attempting to "
@@ -1398,8 +1413,8 @@ way_up_top:
 			mle = (struct dlm_master_list_entry *)
 				kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL);
 			if (!mle) {
-				// bad bad bad... this sucks.
 				response = DLM_MASTER_RESP_ERROR;
+				mlog_errno(-ENOMEM);
 				goto send_response;
 			}
 			spin_lock(&dlm->spinlock);
@@ -1418,25 +1433,19 @@ way_up_top:
 		// mlog(0, "mle was found\n");
 		set_maybe = 1;
 		spin_lock(&tmpmle->spinlock);
+		if (tmpmle->master == dlm->node_num) {
+			mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
+			BUG();
+		}
 		if (tmpmle->type == DLM_MLE_BLOCK)
 			response = DLM_MASTER_RESP_NO;
 		else if (tmpmle->type == DLM_MLE_MIGRATION) {
 			mlog(0, "migration mle was found (%u->%u)\n",
 			     tmpmle->master, tmpmle->new_master);
-			if (tmpmle->master == dlm->node_num) {
-				mlog(ML_ERROR, "no lockres, but migration mle "
-				     "says that this node is master!\n");
-				BUG();
-			}
 			/* real master can respond on its own */
 			response = DLM_MASTER_RESP_NO;
-		} else {
-			if (tmpmle->master == dlm->node_num) {
-				response = DLM_MASTER_RESP_YES;
-				set_maybe = 0;
-			} else
-				response = DLM_MASTER_RESP_MAYBE;
-		}
+		} else
+			response = DLM_MASTER_RESP_MAYBE;
 		if (set_maybe)
 			set_bit(request->node_idx, tmpmle->maybe_map);
 		spin_unlock(&tmpmle->spinlock);
@@ -1449,6 +1458,24 @@ way_up_top:
 		dlm_put_mle(tmpmle);
 	}
 send_response:
+
+	if (dispatch_assert) {
+		if (response != DLM_MASTER_RESP_YES)
+			mlog(ML_ERROR, "invalid response %d\n", response);
+		if (!res) {
+			mlog(ML_ERROR, "bad lockres while trying to assert!\n");
+			BUG();
+		}
+		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
+			     dlm->node_num, res->lockname.len, res->lockname.name);
+		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 
+						 DLM_ASSERT_MASTER_MLE_CLEANUP);
+		if (ret < 0) {
+			mlog(ML_ERROR, "failed to dispatch assert master work\n");
+			response = DLM_MASTER_RESP_ERROR;
+		}
+	}
+
 	dlm_put(dlm);
 	return response;
 }
@@ -1471,8 +1498,11 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 	int to, tmpret;
 	struct dlm_node_iter iter;
 	int ret = 0;
+	int reassert;
 
 	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+again:
+	reassert = 0;
 
 	/* note that if this nodemap is empty, it returns 0 */
 	dlm_node_iter_init(nodemap, &iter);
@@ -1504,9 +1534,17 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 			     "got %d.\n", namelen, lockname, to, r);
 			dlm_dump_lock_resources(dlm);
 			BUG();
+		} else if (r == EAGAIN) {
+			mlog(0, "%.*s: node %u create mles on other "
+			     "nodes and requests a re-assert\n", 
+			     namelen, lockname, to);
+			reassert = 1;
 		}
 	}
 
+	if (reassert)
+		goto again;
+
 	return ret;
 }
 
@@ -1528,6 +1566,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 	char *name;
 	unsigned int namelen;
 	u32 flags;
+	int master_request = 0;
+	int ret = 0;
 
 	if (!dlm_grab(dlm))
 		return 0;
@@ -1642,11 +1682,22 @@ ok:
 	// mlog(0, "woo!  got an assert_master from node %u!\n",
 	// 	     assert->node_idx);
 	if (mle) {
-		int extra_ref;
+		int extra_ref = 0;
+		int nn = -1;
 		
 		spin_lock(&mle->spinlock);
-		extra_ref = !!(mle->type == DLM_MLE_BLOCK
-			       || mle->type == DLM_MLE_MIGRATION);
+		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
+			extra_ref = 1;
+		else {
+			/* MASTER mle: if any bits set in the response map
+			 * then the calling node needs to re-assert to clear
+			 * up nodes that this node contacted */
+			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 
+						    nn+1)) < O2NM_MAX_NODES) {
+				if (nn != dlm->node_num && nn != assert->node_idx)
+					master_request = 1;
+			}
+		}
 		mle->master = assert->node_idx;
 		atomic_set(&mle->woken, 1);
 		wake_up(&mle->wq);
@@ -1677,10 +1728,15 @@ ok:
 	}
 
 done:
+	ret = 0;
 	if (res)
 		dlm_lockres_put(res);
 	dlm_put(dlm);
-	return 0;
+	if (master_request) {
+		mlog(0, "need to tell master to reassert\n");
+		ret = EAGAIN;  // positive. negative would shoot down the node.
+	}
+	return ret;
 
 kill:
 	/* kill the caller! */
@@ -1713,6 +1769,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 	item->u.am.request_from = request_from;
 	item->u.am.flags = flags;
 
+	if (ignore_higher) 
+		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 
+		     res->lockname.name);
+		
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
-- 
cgit v1.2.3


From c03872f5f50bc10f2a1a485f08879a8d01bcfe49 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Mon, 6 Mar 2006 14:08:49 -0800
Subject: [PATCH] ocfs2: dlm recovery fixes

when starting lock mastery (excepting the recovery lock) wait on any nodes
needing recovery. fix one instance where lock resources were left attached
to the recovery list after recovery completed.  ensure that the node_down
code is run uniformly regardless of which node found the dead node first.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h   |   6 +++
 fs/ocfs2/dlm/dlmlock.c     |  14 +++++-
 fs/ocfs2/dlm/dlmmaster.c   | 103 +++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/dlm/dlmrecovery.c |  38 +++++++++--------
 4 files changed, 142 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9c772583744..a8aec934134 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -658,6 +658,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
 
@@ -762,6 +763,11 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master);
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master);
+
 
 int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       struct dlm_lock_resource *res,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 671d4ff222c..6fea28318d6 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
 					  res->lockname.len)) {
 			kick_thread = 1;
 			call_ast = 1;
+		} else {
+			mlog(0, "%s: returning DLM_NORMAL to "
+			     "node %u for reco lock\n", dlm->name,
+			     lock->ml.node);
 		}
 	} else {
 		/* for NOQUEUE request, unless we get the
 		 * lock right away, return DLM_NOTQUEUED */
-		if (flags & LKM_NOQUEUE)
+		if (flags & LKM_NOQUEUE) {
 			status = DLM_NOTQUEUED;
-		else {
+			if (dlm_is_recovery_lock(res->lockname.name,
+						 res->lockname.len)) {
+				mlog(0, "%s: returning NOTQUEUED to "
+				     "node %u for reco lock\n", dlm->name,
+				     lock->ml.node);
+			}
+		} else {
 			dlm_lock_get(lock);
 			list_add_tail(&lock->list, &res->blocked);
 			kick_thread = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 78ac3a00eb5..940be4c13b1 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
 				       struct dlm_lock_resource *res,
 				       u8 target);
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res);
 
 
 int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	struct dlm_node_iter iter;
 	unsigned int namelen;
 	int tries = 0;
+	int bit, wait_on_recovery = 0;
 
 	BUG_ON(!lockid);
 
@@ -762,6 +765,18 @@ lookup:
 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
 		set_bit(dlm->node_num, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
+
+		/* still holding the dlm spinlock, check the recovery map
+		 * to see if there are any nodes that still need to be 
+		 * considered.  these will not appear in the mle nodemap
+		 * but they might own this lockres.  wait on them. */
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		}
 	}
 
 	/* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
+	while (wait_on_recovery) {
+		/* any cluster changes that occurred after dropping the
+		 * dlm spinlock would be detectable be a change on the mle,
+		 * so we only need to clear out the recovery map once. */
+		if (dlm_is_recovery_lock(lockid, namelen)) {
+			mlog(ML_NOTICE, "%s: recovery map is not empty, but "
+			     "must master $RECOVERY lock now\n", dlm->name);
+			if (!dlm_pre_master_reco_lockres(dlm, res))
+				wait_on_recovery = 0;
+			else {
+				mlog(0, "%s: waiting 500ms for heartbeat state "
+				    "change\n", dlm->name);
+				msleep(500);
+			}
+			continue;
+		} 
+
+		dlm_kick_recovery_thread(dlm);
+		msleep(100);
+		dlm_wait_for_recovery(dlm);
+
+		spin_lock(&dlm->spinlock);
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		} else
+			wait_on_recovery = 0;
+		spin_unlock(&dlm->spinlock);
+	}
+
 	/* must wait for lock to be mastered elsewhere */
 	if (blocked)
 		goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
 	mlog(0, "finished with dlm_assert_master_worker\n");
 }
 
+/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
+ * We cannot wait for node recovery to complete to begin mastering this
+ * lockres because this lockres is used to kick off recovery! ;-)
+ * So, do a pre-check on all living nodes to see if any of those nodes
+ * think that $RECOVERY is currently mastered by a dead node.  If so,
+ * we wait a short time to allow that node to get notified by its own
+ * heartbeat stack, then check again.  All $RECOVERY lock resources
+ * mastered by dead nodes are purged when the hearbeat callback is 
+ * fired, so we can know for sure that it is safe to continue once
+ * the node returns a live node or no node.  */
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res)
+{
+	struct dlm_node_iter iter;
+	int nodenum;
+	int ret = 0;
+	u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
+
+	spin_lock(&dlm->spinlock);
+	dlm_node_iter_init(dlm->domain_map, &iter);
+	spin_unlock(&dlm->spinlock);
+
+	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+		/* do not send to self */
+		if (nodenum == dlm->node_num)
+			continue;
+		ret = dlm_do_master_requery(dlm, res, nodenum, &master);
+		if (ret < 0) {
+			mlog_errno(ret);
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
+		}
+
+		if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
+			/* check to see if this master is in the recovery map */
+			spin_lock(&dlm->spinlock);
+			if (test_bit(master, dlm->recovery_map)) {
+				mlog(ML_NOTICE, "%s: node %u has not seen "
+				     "node %u go down yet, and thinks the "
+				     "dead node is mastering the recovery "
+				     "lock.  must wait.\n", dlm->name,
+				     nodenum, master);
+				ret = -EAGAIN;
+			}
+			spin_unlock(&dlm->spinlock);
+			mlog(0, "%s: reco lock master is %u\n", dlm->name, 
+			     master);
+			break;
+		}
+	}
+	return ret;
+}
+
 
 /*
  * DLM_MIGRATE_LOCKRES
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f..36610bdf123 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_recovery_thread(void *data);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 static int dlm_do_recovery(struct dlm_ctxt *dlm);
 
 static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
 				    u8 send_to,
 				    struct dlm_lock_resource *res,
 				    int total_locks);
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master);
 static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 				     struct dlm_lock_resource *res,
 				     struct dlm_migratable_lockres *mres);
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master);
 static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
 static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
 				 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
  * RECOVERY THREAD
  */
 
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
 {
 	/* wake the recovery thread
 	 * this will wake the reco thread in one of three places
@@ -1316,9 +1310,8 @@ leave:
 
 
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master)
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master)
 {
 	struct dlm_node_iter iter;
 	int nodenum;
@@ -1360,8 +1353,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
 		if (ret < 0) {
 			mlog_errno(ret);
-			BUG();
-			/* TODO: need to figure a way to restart this */
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
 		}
 		if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1367,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 }
 
 
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master)
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master)
 {
 	int ret = -EINVAL;
 	struct dlm_master_requery req;
@@ -1739,6 +1733,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 				} else
 					continue;
 
+				if (!list_empty(&res->recovering)) {
+					mlog(0, "%s:%.*s: lockres was "
+					     "marked RECOVERING, owner=%u\n",
+					     dlm->name, res->lockname.len,
+					     res->lockname.name, res->owner);
+					list_del_init(&res->recovering);
+				}
 				spin_lock(&res->spinlock);
 				dlm_change_lockres_owner(dlm, res, new_master);
 				res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2259,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 			mlog(0, "%u not in domain/live_nodes map "
 			     "so setting it in reco map manually\n",
 			     br->dead_node);
-		set_bit(br->dead_node, dlm->recovery_map);
+		/* force the recovery cleanup in __dlm_hb_node_down
+		 * both of these will be cleared in a moment */
+		set_bit(br->dead_node, dlm->domain_map);
+		set_bit(br->dead_node, dlm->live_nodes_map);
 		__dlm_hb_node_down(dlm, br->dead_node);
 	}
 	spin_unlock(&dlm->spinlock);
-- 
cgit v1.2.3


From 70bacbdbfa6f63f8cd10432891f9ecee62397ff2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 2 Mar 2006 11:10:05 -0800
Subject: ocfs2: don't use MLF* in cluster/ files

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d08971d29b6..bff0f0d0686 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -449,11 +449,11 @@ static u32 o2hb_compute_block_crc_le(struct o2hb_region *reg,
 
 static void o2hb_dump_slot(struct o2hb_disk_heartbeat_block *hb_block)
 {
-	mlog(ML_ERROR, "Dump slot information: seq = 0x%"MLFx64", node = %u, "
-	     "cksum = 0x%x, generation 0x%"MLFx64"\n",
-	     le64_to_cpu(hb_block->hb_seq), hb_block->hb_node,
-	     le32_to_cpu(hb_block->hb_cksum),
-	     le64_to_cpu(hb_block->hb_generation));
+	mlog(ML_ERROR, "Dump slot information: seq = 0x%llx, node = %u, "
+	     "cksum = 0x%x, generation 0x%llx\n",
+	     (long long)le64_to_cpu(hb_block->hb_seq),
+	     hb_block->hb_node, le32_to_cpu(hb_block->hb_cksum),
+	     (long long)le64_to_cpu(hb_block->hb_generation));
 }
 
 static int o2hb_verify_crc(struct o2hb_region *reg,
@@ -516,8 +516,9 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
 	hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
 								   hb_block));
 
-	mlog(ML_HB_BIO, "our node generation = 0x%"MLFx64", cksum = 0x%x\n",
-	     cpu_to_le64(generation), le32_to_cpu(hb_block->hb_cksum));
+	mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n",
+	     (long long)cpu_to_le64(generation),
+	     le32_to_cpu(hb_block->hb_cksum));
 }
 
 static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
@@ -686,19 +687,20 @@ static int o2hb_check_slot(struct o2hb_region *reg,
 	if (slot->ds_last_generation != le64_to_cpu(hb_block->hb_generation)) {
 		gen_changed = 1;
 		slot->ds_equal_samples = 0;
-		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%"MLFx64" "
-		     "to 0x%"MLFx64")\n", slot->ds_node_num,
-		     slot->ds_last_generation,
-		     le64_to_cpu(hb_block->hb_generation));
+		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%llx "
+		     "to 0x%llx)\n", slot->ds_node_num,
+		     (long long)slot->ds_last_generation,
+		     (long long)le64_to_cpu(hb_block->hb_generation));
 	}
 
 	slot->ds_last_generation = le64_to_cpu(hb_block->hb_generation);
 
-	mlog(ML_HEARTBEAT, "Slot %d gen 0x%"MLFx64" cksum 0x%x "
-	     "seq %"MLFu64" last %"MLFu64" changed %u equal %u\n",
-	     slot->ds_node_num, slot->ds_last_generation,
-	     le32_to_cpu(hb_block->hb_cksum), le64_to_cpu(hb_block->hb_seq), 
-	     slot->ds_last_time, slot->ds_changed_samples,
+	mlog(ML_HEARTBEAT, "Slot %d gen 0x%llx cksum 0x%x "
+	     "seq %llu last %llu changed %u equal %u\n",
+	     slot->ds_node_num, (long long)slot->ds_last_generation,
+	     le32_to_cpu(hb_block->hb_cksum),
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq), 
+	     (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
 	     slot->ds_equal_samples);
 
 	spin_lock(&o2hb_live_lock);
@@ -708,8 +710,8 @@ fire_callbacks:
 	 * changes at any time during their dead time */
 	if (list_empty(&slot->ds_live_item) &&
 	    slot->ds_changed_samples >= O2HB_LIVE_THRESHOLD) {
-		mlog(ML_HEARTBEAT, "Node %d (id 0x%"MLFx64") joined my "
-		     "region\n", slot->ds_node_num, slot->ds_last_generation);
+		mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
+		     slot->ds_node_num, (long long)slot->ds_last_generation);
 
 		/* first on the list generates a callback */
 		if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
-- 
cgit v1.2.3


From 29004858a76ba9e26393dd8a85e653f105a33753 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Thu, 2 Mar 2006 16:43:36 -0800
Subject: ocfs2: don't use MLF* in dlm/ files

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c      | 22 +++++++++++++++-------
 fs/ocfs2/dlm/dlmcommon.h   | 15 +++++++++++++++
 fs/ocfs2/dlm/dlmconvert.c  | 11 +++++++----
 fs/ocfs2/dlm/dlmdebug.c    | 18 ++++++++++++------
 fs/ocfs2/dlm/dlmrecovery.c | 12 ++++++++----
 fs/ocfs2/dlm/dlmunlock.c   | 11 +++++++----
 6 files changed, 64 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 8d17d28ef91..355593dd8ef 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -307,8 +307,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	if (past->type != DLM_AST &&
 	    past->type != DLM_BAST) {
-		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", "
-		     "name=%.*s\n", past->type, cookie, locklen, name);
+		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
+		     "name=%.*s\n", past->type, 
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -316,9 +319,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 	res = dlm_lookup_lockres(dlm, name, locklen);
 	if (!res) {
 		mlog(ML_ERROR, "got %sast for unknown lockres! "
-			       "cookie=%"MLFu64", name=%.*s, namelen=%u\n",
+			       "cookie=%u:%llu, name=%.*s, namelen=%u\n",
 		     past->type == DLM_AST ? "" : "b",
-		     cookie, locklen, name, locklen);
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name, locklen);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -360,9 +365,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%"MLFu64", "
-		       "name=%.*s, namelen=%u\n",
-             past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen);
+	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
+		       "name=%.*s, namelen=%u\n", 
+		       past->type == DLM_AST ? "" : "b", 
+		       dlm_get_lock_cookie_node(cookie),
+		       dlm_get_lock_cookie_seq(cookie),
+		       locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a8aec934134..88cc43df18f 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -630,6 +630,21 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res)
 	return status;
 }
 
+static inline u8 dlm_get_lock_cookie_node(u64 cookie)
+{
+	u8 ret;
+	cookie >>= 56;
+	ret = (u8)(cookie & 0xffULL);
+	return ret;
+}
+
+static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie)
+{
+	unsigned long long ret;
+	ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL;
+	return ret;
+}
+
 struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 			       struct dlm_lockstatus *lksb);
 void dlm_lock_get(struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f66e2d818cc..8285228d9e3 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -284,8 +284,10 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 	if (lock->ml.convert_type != LKM_IVMODE) {
 		__dlm_print_one_lock_resource(res);
 		mlog(ML_ERROR, "converting a remote lock that is already "
-		     "converting! (cookie=%"MLFu64", conv=%d)\n",
-		     lock->ml.cookie, lock->ml.convert_type);
+		     "converting! (cookie=%u:%llu, conv=%d)\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     lock->ml.convert_type);
 		status = DLM_DENIED;
 		goto bail;
 	}
@@ -513,8 +515,9 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 leave:
 	if (!lock)
 		mlog(ML_ERROR, "did not find lock to convert on grant queue! "
-			       "cookie=%"MLFu64"\n",
-		     cnv->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(cnv->cookie),
+			       dlm_get_lock_cookie_seq(cnv->cookie));
 	else
 		dlm_lock_put(lock);
 
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 54f61b76ab5..c7eae5d3324 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -72,8 +72,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -85,8 +87,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -98,8 +102,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 36610bdf123..805cbabac05 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -744,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 		     dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
 		     dead_node, reco_master);
 		mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
-		     "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
+		     "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
 		     dlm->name, mres->lockname_len, mres->lockname, mres->master,
 		     mres->num_locks, mres->total_locks, mres->flags,
-		     mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags,
+		     dlm_get_lock_cookie_node(mres->ml[0].cookie),
+		     dlm_get_lock_cookie_seq(mres->ml[0].cookie),
+		     mres->ml[0].list, mres->ml[0].flags,
 		     mres->ml[0].type, mres->ml[0].convert_type,
 		     mres->ml[0].highest_blocked, mres->ml[0].node);
 		BUG();
@@ -1513,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 			/* lock is always created locally first, and
 			 * destroyed locally last.  it must be on the list */
 			if (!lock) {
+				u64 c = ml->cookie;
 				mlog(ML_ERROR, "could not find local lock "
-					       "with cookie %"MLFu64"!\n",
-				     ml->cookie);
+					       "with cookie %u:%llu!\n",
+					       dlm_get_lock_cookie_node(c),
+					       dlm_get_lock_cookie_seq(c));
 				BUG();
 			}
 			BUG_ON(lock->ml.node != ml->node);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index c95f08d2e92..7b1a2754267 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -244,8 +244,10 @@ leave:
 	if (actions & DLM_UNLOCK_FREE_LOCK) {
 		/* this should always be coupled with list removal */
 		BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
-		mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n",
-		     lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1);
+		mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     atomic_read(&lock->lock_refs.refcount)-1);
 		dlm_lock_put(lock);
 	}
 	if (actions & DLM_UNLOCK_CALL_AST)
@@ -493,8 +495,9 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 not_found:
 	if (!found)
 		mlog(ML_ERROR, "failed to find lock to unlock! "
-			       "cookie=%"MLFu64"\n",
-		     unlock->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(unlock->cookie),
+			       dlm_get_lock_cookie_seq(unlock->cookie));
 	else {
 		/* send the lksb->status back to the other node */
 		status = lksb->status;
-- 
cgit v1.2.3


From b0697053f9e8de9cea3d510d9e290851ece9460b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 3 Mar 2006 10:24:33 -0800
Subject: ocfs2: don't use MLF* in the file system

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c          |  88 ++++++++++++++++++-----------------
 fs/ocfs2/aops.c           |  18 +++----
 fs/ocfs2/buffer_head_io.c |  11 +++--
 fs/ocfs2/dcache.c         |   9 ++--
 fs/ocfs2/dir.c            |  42 ++++++++---------
 fs/ocfs2/dlmglue.c        | 101 ++++++++++++++++++++--------------------
 fs/ocfs2/export.c         |  24 +++++-----
 fs/ocfs2/extent_map.c     |  34 +++++++-------
 fs/ocfs2/file.c           |  42 +++++++++--------
 fs/ocfs2/inode.c          | 116 ++++++++++++++++++++++++----------------------
 fs/ocfs2/journal.c        |  27 +++++------
 fs/ocfs2/localalloc.c     |  17 +++----
 fs/ocfs2/namei.c          |  79 ++++++++++++++++---------------
 fs/ocfs2/ocfs2.h          |  12 ++---
 fs/ocfs2/suballoc.c       |  72 +++++++++++++++-------------
 fs/ocfs2/super.c          |  14 +++---
 fs/ocfs2/uptodate.c       |  40 +++++++++-------
 fs/ocfs2/vote.c           |  63 +++++++++++++------------
 18 files changed, 425 insertions(+), 384 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 6b9812db377..edaab05a93e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -566,9 +566,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 			next_free = le16_to_cpu(el->l_next_free_rec);
 			if (next_free == 0) {
 				ocfs2_error(inode->i_sb,
-					    "Dinode %"MLFu64" has a bad "
-					    "extent list",
-					    OCFS2_I(inode)->ip_blkno);
+					    "Dinode %llu has a bad extent list",
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 				status = -EIO;
 				goto bail;
 			}
@@ -611,9 +610,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		next_free = le16_to_cpu(el->l_next_free_rec);
 		if (next_free == 0) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" has a bad "
-				    "extent list",
-				    OCFS2_I(inode)->ip_blkno);
+				    "Dinode %llu has a bad extent list",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
@@ -652,8 +650,9 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		/* having an empty extent at eof is legal. */
 		if (el->l_recs[i].e_cpos != fe->i_clusters) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" trailing extent is bad: "
+				    "Dinode %llu trailing extent is bad: "
 				    "cpos (%u) != number of clusters (%u)",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    le32_to_cpu(el->l_recs[i].e_cpos),
 				    le32_to_cpu(fe->i_clusters));
 			status = -EIO;
@@ -747,19 +746,19 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 
 	while(le16_to_cpu(el->l_tree_depth) > 1) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty "
+			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
 				    "extent list (next_free_rec == 0)",
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (!blkno) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent "
+			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
 				    "list where extent # %d has no physical "
 				    "block start",
-				    OCFS2_I(inode)->ip_blkno, i);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
 			status = -EIO;
 			goto bail;
 		}
@@ -826,9 +825,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "add %u clusters starting at block %"MLFu64" to "
-		"inode %"MLFu64"\n",
-	     new_clusters, start_blk, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
+	     new_clusters, (unsigned long long)start_blk,
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 	el = &fe->id2.i_list;
@@ -963,8 +962,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	struct ocfs2_dinode *di;
 	struct ocfs2_truncate_log *tl;
 
-	mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
-		   num_clusters);
+	mlog_entry("start_blk = %llu, num_clusters = %u\n",
+		   (unsigned long long)start_blk, num_clusters);
 
 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
@@ -981,8 +980,9 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	tl_count = le16_to_cpu(tl->tl_count);
 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
 			tl_count == 0,
-			"Truncate record count on #%"MLFu64" invalid ("
-			"wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
+			"Truncate record count on #%llu invalid "
+			"wanted %u, actual %u\n",
+			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
 			ocfs2_truncate_recs_per_inode(osb->sb),
 			le16_to_cpu(tl->tl_count));
 
@@ -1002,8 +1002,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	}
 
 	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
-	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
-	     OCFS2_I(tl_inode)->ip_blkno, index);
+	     "%llu (index = %d)\n", num_clusters, start_cluster,
+	     (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
 
 	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
 		/*
@@ -1134,8 +1134,8 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	}
 
 	num_to_flush = le16_to_cpu(tl->tl_used);
-	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
-	     num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
+	mlog(0, "Flush %u records from truncate log #%llu\n",
+	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
 	if (!num_to_flush) {
 		status = 0;
 		goto bail;
@@ -1360,8 +1360,8 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 
 	tl = &tl_copy->id2.i_dealloc;
 	num_recs = le16_to_cpu(tl->tl_used);
-	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
-	     tl_copy->i_blkno);
+	mlog(0, "cleanup %u records from %llu\n", num_recs,
+	     (unsigned long long)tl_copy->i_blkno);
 
 	mutex_lock(&tl_inode->i_mutex);
 	for(i = 0; i < num_recs; i++) {
@@ -1529,7 +1529,8 @@ static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
 
 	*new_last_eb = bh;
 	get_bh(*new_last_eb);
-	mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno));
+	mlog(0, "returning block %llu\n",
+	     (unsigned long long)le64_to_cpu(eb->h_blkno));
 bail:
 	if (bh)
 		brelse(bh);
@@ -1646,8 +1647,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 	/* if our tree depth > 0, update all the tree blocks below us. */
 	while (depth) {
-		mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n",
-		     depth,  next_eb);
+		mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
+		     depth,  (unsigned long long)next_eb);
 		status = ocfs2_read_block(osb, next_eb, &eb_bh,
 					  OCFS2_BH_CACHED, inode);
 		if (status < 0) {
@@ -1674,12 +1675,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 
-		mlog(0, "extent block %"MLFu64", before: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, before: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
@@ -1697,12 +1698,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			BUG_ON(!el->l_next_free_rec);
 			le16_add_cpu(&el->l_next_free_rec, -1);
 		}
-		mlog(0, "extent block %"MLFu64", after: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, after: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		status = ocfs2_journal_dirty(handle, eb_bh);
@@ -1792,10 +1793,10 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	last_eb = le64_to_cpu(fe->i_last_eb_blk);
 start:
 	mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
-	     "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", "
+	     "last_eb = %llu, fe->i_last_eb_blk = %llu, "
 	     "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
-	     le32_to_cpu(fe->i_clusters), last_eb,
-	     le64_to_cpu(fe->i_last_eb_blk),
+	     le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
+	     (unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
 	     le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
 
 	if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
@@ -1934,16 +1935,17 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
-	     "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size);
+	     "%llu\n", fe->i_clusters, new_i_clusters,
+	     (unsigned long long)fe->i_size);
 
 	if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
-		ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count "
-			    "%u and size %"MLFu64" whereas struct inode has "
+		ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
+			    "%u and size %llu whereas struct inode has "
 			    "cluster count %u and size %llu which caused an "
 			    "invalid truncate to %u clusters.",
-			    le64_to_cpu(fe->i_blkno),
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->i_clusters),
-			    le64_to_cpu(fe->i_size),
+			    (unsigned long long)le64_to_cpu(fe->i_size),
 			    OCFS2_I(inode)->ip_clusters, i_size_read(inode),
 			    new_i_clusters);
 		mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8f4467a930a..bf931ba1d36 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -74,8 +74,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 	fe = (struct ocfs2_dinode *) bh->b_data;
 
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		goto bail;
 	}
 
@@ -162,8 +162,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 					  NULL);
 	if (err) {
 		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
-		     "%"MLFu64", NULL)\n", err, inode,
-		     (unsigned long long)iblock, p_blkno);
+		     "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
+		     (unsigned long long)p_blkno);
 		goto bail;
 	}
 
@@ -171,13 +171,15 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
 	if (bh_result->b_blocknr == 0) {
 		err = -EIO;
-		mlog(ML_ERROR, "iblock = %llu p_blkno = %"MLFu64" "
-		     "blkno=(%"MLFu64")\n", (unsigned long long)iblock,
-		     p_blkno, OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n",
+		     (unsigned long long)iblock,
+		     (unsigned long long)p_blkno,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 
 	past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-	mlog(0, "Inode %lu, past_eof = %"MLFu64"\n", inode->i_ino, past_eof);
+	mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+	     (unsigned long long)past_eof);
 
 	if (create && (iblock >= past_eof))
 		set_buffer_new(bh_result);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index bae3d7548be..9a24adf9be6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -97,8 +97,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
 
-	mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n",
-		   block, nr, flags, inode);
+	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
+		   (unsigned long long)block, nr, flags, inode);
 
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
@@ -143,9 +143,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
-			     "bh (%llu), inode %"MLFu64" not uptodate\n",
+			     "bh (%llu), inode %llu not uptodate\n",
 			     (unsigned long long)bh->b_blocknr,
-			     OCFS2_I(inode)->ip_blkno);
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			ignore_cache = 1;
 		}
 
@@ -222,7 +222,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	     (unsigned long long)block, nr,
 	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
 
 bail:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index bd85182e97b..1a01380e387 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -64,15 +64,16 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 		/* did we or someone else delete this inode? */
 		if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
 			spin_unlock(&OCFS2_I(inode)->ip_lock);
-			mlog(0, "inode (%"MLFu64") deleted, returning false\n",
-			     OCFS2_I(inode)->ip_blkno);
+			mlog(0, "inode (%llu) deleted, returning false\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			goto bail;
 		}
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 		if (!inode->i_nlink) {
-			mlog(0, "Inode %"MLFu64" orphaned, returning false "
-			     "dir = %d\n", OCFS2_I(inode)->ip_blkno,
+			mlog(0, "Inode %llu orphaned, returning false "
+			     "dir = %d\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     S_ISDIR(inode->i_mode));
 			goto bail;
 		}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 57158fa75d9..ae47f450792 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -83,7 +83,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct super_block * sb = inode->i_sb;
 	int have_disk_lock = 0;
 
-	mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("dirino=%llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	stored = 0;
 	bh = NULL;
@@ -104,9 +105,9 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		blk = (filp->f_pos) >> sb->s_blocksize_bits;
 		bh = ocfs2_bread(inode, blk, &err, 0);
 		if (!bh) {
-			mlog(ML_ERROR, "directory #%"MLFu64" contains a hole "
-				       "at offset %lld\n",
-			     OCFS2_I(inode)->ip_blkno,
+			mlog(ML_ERROR,
+			     "directory #%llu contains a hole at offset %lld\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     filp->f_pos);
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
@@ -214,9 +215,9 @@ int ocfs2_find_files_on_disk(const char *name,
 	int status = -ENOENT;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	mlog_entry("(osb=%p, parent=%"MLFu64", name='%.*s', blkno=%p, "
-		   "inode=%p)\n",
-		   osb, OCFS2_I(inode)->ip_blkno, namelen, name, blkno, inode);
+	mlog_entry("(osb=%p, parent=%llu, name='%.*s', blkno=%p, inode=%p)\n",
+		   osb, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   namelen, name, blkno, inode);
 
 	*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
 	if (!*dirent_bh || !*dirent) {
@@ -255,8 +256,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
 	struct buffer_head *dirent_bh = NULL;
 	struct ocfs2_dir_entry *dirent = NULL;
 
-	mlog_entry("dir %"MLFu64", name '%.*s'\n", OCFS2_I(dir)->ip_blkno,
-		   namelen, name);
+	mlog_entry("dir %llu, name '%.*s'\n",
+		   (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
 
 	ret = -EEXIST;
 	dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
@@ -287,9 +288,8 @@ int ocfs2_empty_dir(struct inode *inode)
 	if ((i_size_read(inode) <
 	     (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
 	    !(bh = ocfs2_bread(inode, 0, &err, 0))) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no data block\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		return 1;
 	}
 
@@ -300,9 +300,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			!le64_to_cpu(de1->inode) ||
 			strcmp(".", de->name) ||
 			strcmp("..", de1->name)) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no `.' or `..'\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		brelse(bh);
 		return 1;
 	}
@@ -314,9 +313,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			bh = ocfs2_bread(inode,
 					 offset >> sb->s_blocksize_bits, &err, 0);
 			if (!bh) {
-				mlog(ML_ERROR, "directory #%"MLFu64" contains "
-					       "a hole at offset %lu\n",
-				     OCFS2_I(inode)->ip_blkno, offset);
+				mlog(ML_ERROR, "dir %llu has a hole at %lu\n",
+				     (unsigned long long)OCFS2_I(inode)->ip_blkno, offset);
 				offset += sb->s_blocksize;
 				continue;
 			}
@@ -406,8 +404,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	mlog_entry_void();
 
 	dir_i_size = i_size_read(dir);
-	mlog(0, "extending dir %"MLFu64" (i_size = %lld)\n",
-	     OCFS2_I(dir)->ip_blkno, dir_i_size);
+	mlog(0, "extending dir %llu (i_size = %lld)\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
 
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
@@ -531,8 +529,8 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "getting ready to insert namelen %d into dir %"MLFu64"\n",
-	     namelen, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "getting ready to insert namelen %d into dir %llu\n",
+	     namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	BUG_ON(!S_ISDIR(dir->i_mode));
 	fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e971ec2f840..84f153aca69 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -231,9 +231,9 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 
 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
 
-	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016"MLFx64"%08x",
-		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, blkno,
-		       generation);
+	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
+		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
+		       (long long)blkno, generation);
 
 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
 
@@ -533,8 +533,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 
 	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "AST fired for inode %"MLFu64", l_action = %u, type = %s\n",
-	     OCFS2_I(inode)->ip_blkno, lockres->l_action,
+	mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
 	     ocfs2_lock_type_string(lockres->l_type));
 
 	BUG_ON(!ocfs2_is_inode_lock(lockres));
@@ -544,8 +544,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 	lksb = &(lockres->l_lksb);
 	if (lksb->status != DLM_NORMAL) {
 		mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
-		     "on inode %"MLFu64"\n", lksb->status,
-		     OCFS2_I(inode)->ip_blkno);
+		     "on inode %llu\n", lksb->status,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		mlog_exit_void();
 		return;
@@ -646,10 +646,9 @@ static void ocfs2_inode_bast_func(void *opaque, int level)
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "BAST fired for inode %"MLFu64", blocking = %d, level = %d "
-	     "type = %s\n", OCFS2_I(inode)->ip_blkno, level,
-	     lockres->l_level,
-	     ocfs2_lock_type_string(lockres->l_type));
+	mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
+	     lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
 
 	ocfs2_generic_bast_func(osb, lockres, level);
 
@@ -1104,7 +1103,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	/* NOTE: That we don't increment any of the holder counts, nor
 	 * do we add anything to a journal handle. Since this is
@@ -1149,8 +1148,8 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
@@ -1173,8 +1172,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
@@ -1193,8 +1192,8 @@ int ocfs2_data_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	/* We'll allow faking a readonly data lock for
@@ -1278,8 +1277,8 @@ void ocfs2_data_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -1462,9 +1461,9 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 
 	spin_lock(&oi->ip_lock);
 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
-		mlog(0, "Orphaned inode %"MLFu64" was deleted while we "
+		mlog(0, "Orphaned inode %llu was deleted while we "
 		     "were waiting on a lock. ip_flags = 0x%x\n",
-		     oi->ip_blkno, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
 		spin_unlock(&oi->ip_lock);
 		status = -ENOENT;
 		goto bail;
@@ -1485,8 +1484,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	ocfs2_extent_map_trunc(inode, 0);
 
 	if (ocfs2_meta_lvb_is_trustable(lockres)) {
-		mlog(0, "Trusting LVB on inode %"MLFu64"\n",
-		     oi->ip_blkno);
+		mlog(0, "Trusting LVB on inode %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
 	} else {
 		/* Boo, we have to go to disk. */
@@ -1514,15 +1513,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 		}
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
-				"Invalid dinode %"MLFu64" disk generation: %u "
+				"Invalid dinode %llu disk generation: %u "
 				"inode->i_generation: %u\n",
-				oi->ip_blkno, le32_to_cpu(fe->i_generation),
+				(unsigned long long)oi->ip_blkno,
+				le32_to_cpu(fe->i_generation),
 				inode->i_generation);
 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
-				"Stale dinode %"MLFu64" dtime: %"MLFu64" "
-				"flags: 0x%x\n", oi->ip_blkno,
-				le64_to_cpu(fe->i_dtime),
+				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
+				(unsigned long long)oi->ip_blkno,
+				(unsigned long long)le64_to_cpu(fe->i_dtime),
 				le32_to_cpu(fe->i_flags));
 
 		ocfs2_refresh_inode(inode, fe);
@@ -1581,8 +1581,8 @@ int ocfs2_meta_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64", take %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu, take %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	status = 0;
@@ -1716,8 +1716,8 @@ void ocfs2_meta_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -2686,8 +2686,8 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 	mapping = inode->i_mapping;
 
 	if (filemap_fdatawrite(mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for downconvert!",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 	sync_mapping_buffers(mapping);
 	if (blocking == LKM_EXMODE) {
@@ -2717,7 +2717,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
@@ -2726,8 +2727,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2767,14 +2768,15 @@ int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 
        	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_do_unblock_meta(inode, requeue);
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2893,12 +2895,13 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     lockres->l_name, function, line);
 	mlog(level, "version: %u, clusters: %u\n",
 	     be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters));
-	mlog(level, "size: %"MLFu64", uid %u, gid %u, mode 0x%x\n",
-	     be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid),
-	     be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode));
-	mlog(level, "nlink %u, atime_packed 0x%"MLFx64", "
-	     "ctime_packed 0x%"MLFx64", mtime_packed 0x%"MLFx64"\n",
-	     be16_to_cpu(lvb->lvb_inlink), be64_to_cpu(lvb->lvb_iatime_packed),
-	     be64_to_cpu(lvb->lvb_ictime_packed),
-	     be64_to_cpu(lvb->lvb_imtime_packed));
+	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
+	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
+	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
+	     be16_to_cpu(lvb->lvb_imode));
+	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
+	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
 }
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5810160d92a..ec55ab3c121 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -95,8 +95,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	mlog_entry("(0x%p, '%.*s')\n", child,
 		   child->d_name.len, child->d_name.name);
 
-	mlog(0, "find parent of directory %"MLFu64"\n",
-	     OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find parent of directory %llu\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -115,7 +115,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		parent = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -160,8 +161,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 	blkno = OCFS2_I(inode)->ip_blkno;
 	generation = inode->i_generation;
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     blkno, generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)blkno, generation);
 
 	len = 3;
 	fh[0] = cpu_to_le32((u32)(blkno >> 32));
@@ -186,8 +187,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 		len = 6;
 		type = 2;
 
-		mlog(0, "Encoding parent: blkno: %"MLFu64", generation: %u\n",
-		     blkno, generation);
+		mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)blkno, generation);
 	}
 	
 	*max_len = len;
@@ -220,16 +221,17 @@ static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh,
 		parent.ih_blkno |= (u64)le32_to_cpu(fh[4]);
 		parent.ih_generation = le32_to_cpu(fh[5]);
 
-		mlog(0, "Decoding parent: blkno: %"MLFu64", generation: %u\n",
-		     parent.ih_blkno, parent.ih_generation);
+		mlog(0, "Decoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)parent.ih_blkno,
+		     parent.ih_generation);
 	}
 
 	handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32;
 	handle.ih_blkno |= (u64)le32_to_cpu(fh[1]);
 	handle.ih_generation = le32_to_cpu(fh[2]);
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     handle.ih_blkno, handle.ih_generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)handle.ih_blkno, handle.ih_generation);
 
 	ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent,
 						    acceptable, context);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e6f207eebab..4601fc256f1 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -182,10 +182,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (rec_end > OCFS2_I(inode)->ip_clusters) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+					    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 					    i,
-					    le64_to_cpu(rec->e_blkno),
-					    OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno),
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 					    OCFS2_I(inode)->ip_clusters);
 				goto out_free;
 			}
@@ -233,11 +233,11 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (blkno) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n",
+					    "Multiple extents for (cpos = %u, clusters = %u) on inode %llu; e_blkno %llu and rec %d at e_blkno %llu\n",
 					    cpos, clusters,
-					    OCFS2_I(inode)->ip_blkno,
-					    blkno, i,
-					    le64_to_cpu(rec->e_blkno));
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)blkno, i,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno));
 				goto out_free;
 			}
 
@@ -251,9 +251,9 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 		ret = -EBADR;
 		if (!blkno) {
 			ocfs2_error(inode->i_sb,
-				    "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n",
+				    "No record found for (cpos = %u, clusters = %u) on inode %llu\n",
 				    cpos, clusters,
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			mlog_errno(ret);
 			goto out_free;
 		}
@@ -288,10 +288,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+				    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 				    i,
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    OCFS2_I(inode)->ip_clusters);
 			return ret;
 		}
@@ -557,9 +557,9 @@ static int ocfs2_extent_map_insert(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Zero e_clusters on non-tail extent record at e_blkno %"MLFu64" on inode %"MLFu64"\n",
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno);
+				    "Zero e_clusters on non-tail extent record at e_blkno %llu on inode %llu\n",
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			return ret;
 		}
 
@@ -660,10 +660,10 @@ int ocfs2_extent_map_append(struct inode *inode,
 	mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
 			 le32_to_cpu(rec->e_clusters)) !=
 			(em->em_clusters + new_clusters),
-			"Inode %"MLFu64":\n"
+			"Inode %llu:\n"
 			"rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
 			"em->em_clusters = %u + new_clusters = %u = %u\n",
-			OCFS2_I(inode)->ip_blkno,
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
 			le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
 			em->em_clusters, new_clusters,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a4048b55fd..4b4cbadd583 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -220,8 +220,9 @@ static int ocfs2_truncate_file(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_truncate_context *tc = NULL;
 
-	mlog_entry("(inode = %"MLFu64", new_i_size = %"MLFu64"\n",
-		   OCFS2_I(inode)->ip_blkno, new_i_size);
+	mlog_entry("(inode = %llu, new_i_size = %llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   (unsigned long long)new_i_size);
 
 	truncate_inode_pages(inode->i_mapping, new_i_size);
 
@@ -233,23 +234,26 @@ static int ocfs2_truncate_file(struct inode *inode,
 	}
 
 	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
-			"Inode %"MLFu64", inode i_size = %lld != di "
-			"i_size = %"MLFu64", i_flags = 0x%x\n",
-			OCFS2_I(inode)->ip_blkno,
+			"Inode %llu, inode i_size = %lld != di "
+			"i_size = %llu, i_flags = 0x%x\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			i_size_read(inode),
-			le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags));
+			(unsigned long long)le64_to_cpu(fe->i_size),
+			le32_to_cpu(fe->i_flags));
 
 	if (new_i_size > le64_to_cpu(fe->i_size)) {
-		mlog(0, "asked to truncate file with size (%"MLFu64") "
-		     "to size (%"MLFu64")!\n",
-		     le64_to_cpu(fe->i_size), new_i_size);
+		mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
+		     (unsigned long long)le64_to_cpu(fe->i_size),
+		     (unsigned long long)new_i_size);
 		status = -EINVAL;
 		mlog_errno(status);
 		goto bail;
 	}
 
-	mlog(0, "inode %"MLFu64", i_size = %"MLFu64", new_i_size = %"MLFu64"\n",
-	     le64_to_cpu(fe->i_blkno), le64_to_cpu(fe->i_size), new_i_size);
+	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
+	     (unsigned long long)le64_to_cpu(fe->i_blkno),
+	     (unsigned long long)le64_to_cpu(fe->i_size),
+	     (unsigned long long)new_i_size);
 
 	/* lets handle the simple truncate cases before doing any more
 	 * cluster locking. */
@@ -378,8 +382,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "Allocating %u clusters at block %u for inode %"MLFu64"\n",
-	     num_bits, bit_off, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
+	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
 				     num_bits, meta_ac);
 	if (status < 0) {
@@ -449,9 +453,9 @@ static int ocfs2_extend_allocation(struct inode *inode,
 restart_all:
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 
-	mlog(0, "extend inode %"MLFu64", i_size = %lld, fe->i_clusters = %u, "
+	mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
 	     "clusters_to_add = %u\n",
-	     OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
 	     fe->i_clusters, clusters_to_add);
 
 	handle = ocfs2_alloc_handle(osb);
@@ -569,8 +573,8 @@ restarted_transaction:
 		}
 	}
 
-	mlog(0, "fe: i_clusters = %u, i_size=%"MLFu64"\n",
-	     fe->i_clusters, fe->i_size);
+	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
+	     fe->i_clusters, (unsigned long long)fe->i_size);
 	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
 	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
 
@@ -865,8 +869,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di;
 
-	mlog_entry("(Inode %"MLFu64", mode 0%o)\n", oi->ip_blkno,
-		   inode->i_mode);
+	mlog_entry("(Inode %llu, mode 0%o)\n",
+		   (unsigned long long)oi->ip_blkno, inode->i_mode);
 
 	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 315472a5c19..327a5b7b86e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -95,7 +95,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 	struct super_block *sb = osb->sb;
 	struct ocfs2_find_inode_args args;
 
-	mlog_entry("(blkno = %"MLFu64")\n", blkno);
+	mlog_entry("(blkno = %llu)\n", (unsigned long long)blkno);
 
 	/* Ok. By now we've either got the offsets passed to us by the
 	 * caller, or we just pulled them off the bh. Lets do some
@@ -134,8 +134,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 
 bail:
 	if (!IS_ERR(inode)) {
-		mlog(0, "returning inode with number %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(0, "returning inode with number %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		mlog_exit_ptr(inode);
 	} else
 		mlog_errno(PTR_ERR(inode));
@@ -219,7 +219,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	struct ocfs2_super *osb;
 	int status = -EINVAL;
 
-	mlog_entry("(0x%p, size:%"MLFu64")\n", inode, fe->i_size);
+	mlog_entry("(0x%p, size:%llu)\n", inode,
+		   (unsigned long long)fe->i_size);
 
 	sb = inode->i_sb;
 	osb = OCFS2_SB(sb);
@@ -228,9 +229,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	 * today.  change if needed. */
 	if (!OCFS2_IS_VALID_DINODE(fe) ||
 	    !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
-		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%"MLFu64", "
+		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
 		     "signature = %.*s, flags = 0x%x\n",
-		     inode->i_ino, le64_to_cpu(fe->i_blkno), 7,
+		     inode->i_ino,
+		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
 		     fe->i_signature, le32_to_cpu(fe->i_flags));
 		goto bail;
 	}
@@ -268,8 +270,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
 		mlog(ML_ERROR,
-		     "ip_blkno %"MLFu64" != i_blkno %"MLFu64"!\n",
-		     OCFS2_I(inode)->ip_blkno, fe->i_blkno);
+		     "ip_blkno %llu != i_blkno %llu!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)fe->i_blkno);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
@@ -278,8 +281,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_ino = ino_from_blkno(inode->i_sb,
 			       le64_to_cpu(fe->i_blkno));
 
-	mlog(0, "blkno = %"MLFu64", ino = %lu, create_ino = %s\n",
-	     fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
+	mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
+	     (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
 
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 
@@ -371,8 +374,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -386,8 +389,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	status = -EINVAL;
 	if (ocfs2_populate_inode(inode, fe, 0) < 0) {
-		mlog(ML_ERROR, "populate inode failed! i_blkno=%"MLFu64", "
-		     "i_ino=%lu\n", fe->i_blkno, inode->i_ino);
+		mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -675,8 +678,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
 	 * never get here as system file inodes should always have a
 	 * positive link count. */
 	if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
-		mlog(ML_ERROR, "Skipping delete of system file %"MLFu64".\n",
-		     oi->ip_blkno);
+		mlog(ML_ERROR, "Skipping delete of system file %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail_unlock;
 	}
 
@@ -715,16 +718,16 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * ocfs2_delete_inode, another node might have asked to delete
 	 * the inode. Recheck our flags to catch this. */
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
-		mlog(0, "Skipping delete of %"MLFu64" because flags changed\n",
-		     oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because flags changed\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 
 	/* Now that we have an up to date inode, we can double check
 	 * the link count. */
 	if (inode->i_nlink) {
-		mlog(0, "Skipping delete of %"MLFu64" because nlink = %u\n",
-		     oi->ip_blkno, inode->i_nlink);
+		mlog(0, "Skipping delete of %llu because nlink = %u\n",
+		     (unsigned long long)oi->ip_blkno, inode->i_nlink);
 		goto bail;
 	}
 
@@ -734,9 +737,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		/* for lack of a better error? */
 		status = -EEXIST;
 		mlog(ML_ERROR,
-		     "Inode %"MLFu64" (on-disk %"MLFu64") not orphaned! "
+		     "Inode %llu (on-disk %llu) not orphaned! "
 		     "Disk flags  0x%x, inode flags 0x%x\n",
-		     oi->ip_blkno, di->i_blkno, di->i_flags, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno,
+		     (unsigned long long)di->i_blkno, di->i_flags,
+		     oi->ip_flags);
 		goto bail;
 	}
 
@@ -753,8 +758,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * disk and let them worry about deleting it. */
 	if (status == -EBUSY) {
 		status = 0;
-		mlog(0, "Skipping delete of %"MLFu64" because it is in use on"
-		     "other nodes\n", oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because it is in use on"
+		     "other nodes\n", (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 	if (status < 0) {
@@ -768,13 +773,13 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		 * into. This may happen during node death and
 		 * recovery knows how to clean it up so we can safely
 		 * ignore this inode for now on. */
-		mlog(0, "Nobody knew where inode %"MLFu64" was orphaned!\n",
-		     oi->ip_blkno);
+		mlog(0, "Nobody knew where inode %llu was orphaned!\n",
+		     (unsigned long long)oi->ip_blkno);
 	} else {
 		*wipe = 1;
 
-		mlog(0, "Inode %"MLFu64" is ok to wipe from orphan dir %d\n",
-		     oi->ip_blkno, oi->ip_orphaned_slot);
+		mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
+		     (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
 	}
 	spin_unlock(&oi->ip_lock);
 
@@ -788,8 +793,8 @@ bail:
 static void ocfs2_cleanup_delete_inode(struct inode *inode,
 				       int sync_data)
 {
-	mlog(0, "Cleanup inode %"MLFu64", sync = %d\n",
-	     OCFS2_I(inode)->ip_blkno, sync_data);
+	mlog(0, "Cleanup inode %llu, sync = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
 	if (sync_data)
 		write_inode_now(inode, 1);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -897,8 +902,8 @@ void ocfs2_clear_inode(struct inode *inode)
 	if (!inode)
 		goto bail;
 
-	mlog(0, "Clearing inode: %"MLFu64", nlink = %u\n",
-	     OCFS2_I(inode)->ip_blkno, inode->i_nlink);
+	mlog(0, "Clearing inode: %llu, nlink = %u\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
@@ -919,8 +924,8 @@ void ocfs2_clear_inode(struct inode *inode)
 		ocfs2_checkpoint_inode(inode);
 
 	mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
-			"Clear inode of %"MLFu64", inode has io markers\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has io markers\n",
+			(unsigned long long)oi->ip_blkno);
 
 	ocfs2_extent_map_drop(inode, 0);
 	ocfs2_extent_map_init(inode);
@@ -936,20 +941,20 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_metadata_cache_purge(inode);
 
 	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
-			"Clear inode of %"MLFu64", inode has %u cache items\n",
-			oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+			"Clear inode of %llu, inode has %u cache items\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
 
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Clear inode of %"MLFu64", inode has a bad flag\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has a bad flag\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(spin_is_locked(&oi->ip_lock),
-			"Clear inode of %"MLFu64", inode is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode is locked\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
-			"Clear inode of %"MLFu64", io_mutex is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, io_mutex is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	mutex_unlock(&oi->ip_io_mutex);
 
 	/*
@@ -957,19 +962,19 @@ void ocfs2_clear_inode(struct inode *inode)
 	 * kernel 1, world 0
 	 */
 	mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem),
-			"Clear inode of %"MLFu64", alloc_sem is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, alloc_sem is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	up_write(&oi->ip_alloc_sem);
 
 	mlog_bug_on_msg(oi->ip_open_count,
-			"Clear inode of %"MLFu64" has open count %d\n",
-			oi->ip_blkno, oi->ip_open_count);
+			"Clear inode of %llu has open count %d\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
 	mlog_bug_on_msg(!list_empty(&oi->ip_handle_list),
-			"Clear inode of %"MLFu64" has non empty handle list\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle list\n",
+			(unsigned long long)oi->ip_blkno);
 	mlog_bug_on_msg(oi->ip_handle,
-			"Clear inode of %"MLFu64" has non empty handle pointer\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle pointer\n",
+			(unsigned long long)oi->ip_blkno);
 
 	/* Clear all other flags. */
 	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
@@ -991,8 +996,8 @@ void ocfs2_drop_inode(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Drop inode %"MLFu64", nlink = %u, ip_flags = 0x%x\n",
-	     oi->ip_blkno, inode->i_nlink, oi->ip_flags);
+	mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
+	     (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
 
 	/* Testing ip_orphaned_slot here wouldn't work because we may
 	 * not have gotten a delete_inode vote from any other nodes
@@ -1069,8 +1074,8 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	int status = 0;
 
-	mlog_entry("(inode = 0x%p, ino = %"MLFu64")\n", inode,
-		   inode ? OCFS2_I(inode)->ip_blkno : 0ULL);
+	mlog_entry("(inode = 0x%p, ino = %llu)\n", inode,
+		   inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL);
 
 	if (!inode) {
 		mlog(0, "eep, no inode!\n");
@@ -1114,7 +1119,8 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 	int status;
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
 
-	mlog_entry("(inode %"MLFu64")\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("(inode %llu)\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_journal_access(handle, inode, bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4be801f4559..ae3440ca083 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -503,8 +503,8 @@ static void ocfs2_handle_cleanup_locks(struct ocfs2_journal *journal,
 		ocfs2_meta_unlock(inode, 1);
 		if (atomic_read(&inode->i_count) == 1)
 			mlog(ML_ERROR,
-			     "Inode %"MLFu64", I'm doing a last iput for!",
-			     OCFS2_I(inode)->ip_blkno);
+			     "Inode %llu, I'm doing a last iput for!",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		iput(inode);
 		kmem_cache_free(ocfs2_lock_cache, lock);
 	}
@@ -640,8 +640,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		/* This is called from startup/shutdown which will
 		 * handle the errors in a specific manner, so no need
 		 * to call ocfs2_error() here. */
-		mlog(ML_ERROR, "Journal dinode %"MLFu64"  has invalid "
-		     "signature: %.*s", fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Journal dinode %llu  has invalid "
+		     "signature: %.*s", (unsigned long long)fe->i_blkno, 7,
+		     fe->i_signature);
 		status = -EIO;
 		goto out;
 	}
@@ -934,8 +935,8 @@ void ocfs2_complete_recovery(void *data)
 
 		la_dinode = item->lri_la_dinode;
 		if (la_dinode) {
-			mlog(0, "Clean up local alloc %"MLFu64"\n",
-			     la_dinode->i_blkno);
+			mlog(0, "Clean up local alloc %llu\n",
+			     (unsigned long long)la_dinode->i_blkno);
 
 			ret = ocfs2_complete_local_alloc_recovery(osb,
 								  la_dinode);
@@ -947,8 +948,8 @@ void ocfs2_complete_recovery(void *data)
 
 		tl_dinode = item->lri_tl_dinode;
 		if (tl_dinode) {
-			mlog(0, "Clean up truncate log %"MLFu64"\n",
-			     tl_dinode->i_blkno);
+			mlog(0, "Clean up truncate log %llu\n",
+			     (unsigned long long)tl_dinode->i_blkno);
 
 			ret = ocfs2_complete_truncate_log_recovery(osb,
 								   tl_dinode);
@@ -1473,11 +1474,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (de->file_type > OCFS2_FT_MAX) {
 				mlog(ML_ERROR,
 				     "block %llu contains invalid de: "
-				     "inode = %"MLFu64", rec_len = %u, "
+				     "inode = %llu, rec_len = %u, "
 				     "name_len = %u, file_type = %u, "
 				     "name='%.*s'\n",
 				     (unsigned long long)bh->b_blocknr,
-				     le64_to_cpu(de->inode),
+				     (unsigned long long)le64_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len),
 				     de->name_len,
 				     de->file_type,
@@ -1494,8 +1495,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (IS_ERR(iter))
 				continue;
 
-			mlog(0, "queue orphan %"MLFu64"\n",
-			     OCFS2_I(iter)->ip_blkno);
+			mlog(0, "queue orphan %llu\n",
+			     (unsigned long long)OCFS2_I(iter)->ip_blkno);
 			/* No locking is required for the next_orphan
 			 * queue as there is only ever a single
 			 * process doing orphan recovery. */
@@ -1588,7 +1589,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 
 	while (inode) {
 		oi = OCFS2_I(inode);
-		mlog(0, "iput orphan %"MLFu64"\n", oi->ip_blkno);
+		mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno);
 
 		iter = oi->ip_next_orphan;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 149b3518166..0d1973ea32b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -143,8 +143,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
 	if (!(le32_to_cpu(alloc->i_flags) &
 	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
-		mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		status = -EINVAL;
 		goto bail;
 	}
@@ -493,9 +493,9 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
 	    ocfs2_local_alloc_count_bits(alloc)) {
-		ocfs2_error(osb->sb, "local alloc inode %"MLFu64" says it has "
+		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
 			    "%u free bits, but a count shows %u",
-			    le64_to_cpu(alloc->i_blkno),
+			    (unsigned long long)le64_to_cpu(alloc->i_blkno),
 			    le32_to_cpu(alloc->id1.bitmap1.i_used),
 			    ocfs2_local_alloc_count_bits(alloc));
 		status = -EIO;
@@ -753,10 +753,11 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 				ocfs2_clusters_to_blocks(osb->sb,
 							 start - count);
 
-			mlog(0, "freeing %u bits starting at local "
-			     "alloc bit %u (la_start_blk = %"MLFu64", "
-			     "blkno = %"MLFu64")\n", count, start - count,
-			     la_start_blk, blkno);
+			mlog(0, "freeing %u bits starting at local alloc bit "
+			     "%u (la_start_blk = %llu, blkno = %llu)\n",
+			     count, start - count,
+			     (unsigned long long)la_start_blk,
+			     (unsigned long long)blkno);
 
 			status = ocfs2_free_clusters(handle, main_bm_inode,
 						     main_bm_bh, blkno, count);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f6b77ff1d2b..274f61d0cda 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -161,8 +161,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 		goto bail;
 	}
 
-	mlog(0, "find name %.*s in directory %"MLFu64"\n", dentry->d_name.len,
-	     dentry->d_name.name, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
+	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -180,7 +180,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		ret = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -310,8 +311,8 @@ static int ocfs2_mknod(struct inode *dir,
 	osb = OCFS2_SB(dir->i_sb);
 
 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %"MLFu64" has i_nlink of %u\n",
-		     OCFS2_I(dir)->ip_blkno, dir->i_nlink);
+		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
 		status = -EMLINK;
 		goto leave;
 	}
@@ -562,9 +563,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
-		     "i_blkno=%"MLFu64", i_ino=%lu\n",
+		     "i_blkno=%llu, i_ino=%lu\n",
 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
-		     fe->i_blkno, inode->i_ino);
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		BUG();
 	}
 
@@ -765,7 +766,7 @@ static int ocfs2_unlink(struct inode *dir,
 
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
-	mlog(0, "ino = %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	if (inode == osb->root_inode) {
 		mlog(0, "Cannot delete the root directory\n");
@@ -799,9 +800,9 @@ static int ocfs2_unlink(struct inode *dir,
 	if (OCFS2_I(inode)->ip_blkno != blkno) {
 		status = -ENOENT;
 
-		mlog(0, "ip_blkno (%"MLFu64") != dirent blkno (%"MLFu64") "
-		     "ip_flags = %x\n", OCFS2_I(inode)->ip_blkno, blkno,
-		     OCFS2_I(inode)->ip_flags);
+		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
 		goto leave;
 	}
 
@@ -946,8 +947,9 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	struct buffer_head **tmpbh;
 	struct inode *tmpinode;
 
-	mlog_entry("(inode1 = %"MLFu64", inode2 = %"MLFu64")\n",
-		   oi1->ip_blkno, oi2->ip_blkno);
+	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
+		   (unsigned long long)oi1->ip_blkno,
+		   (unsigned long long)oi2->ip_blkno);
 
 	BUG_ON(!handle);
 
@@ -1187,9 +1189,9 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
 			status = -EACCES;
 
-			mlog(0, "Inode blkno (%"MLFu64") and dir (%"MLFu64") "
-			     "disagree. ip_flags = %x\n",
-			     OCFS2_I(new_inode)->ip_blkno, newfe_blkno,
+			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
+			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
+			     (unsigned long long)newfe_blkno,
 			     OCFS2_I(new_inode)->ip_flags);
 			goto bail;
 		}
@@ -1215,9 +1217,9 @@ static int ocfs2_rename(struct inode *old_dir,
 
 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
 
-		mlog(0, "aha rename over existing... new_de=%p "
-		     "new_blkno=%"MLFu64" newfebh=%p bhblocknr=%llu\n",
-		     new_de, newfe_blkno, newfe_bh, newfe_bh ?
+		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
+		     "newfebh=%p bhblocknr=%llu\n", new_de,
+		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
@@ -1354,8 +1356,8 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (new_dir_nlink != new_dir->i_nlink) {
 			if (!new_dir_bh) {
 				mlog(ML_ERROR, "need to change nlink for new "
-				     "dir %"MLFu64" from %d to %d but bh is "
-				     "NULL\n", OCFS2_I(new_dir)->ip_blkno,
+				     "dir %llu from %d to %d but bh is NULL\n",
+				     (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
 				     (int)new_dir_nlink, new_dir->i_nlink);
 			} else {
 				struct ocfs2_dinode *fe;
@@ -1372,10 +1374,9 @@ static int ocfs2_rename(struct inode *old_dir,
 	if (old_dir_nlink != old_dir->i_nlink) {
 		if (!old_dir_bh) {
 			mlog(ML_ERROR, "need to change nlink for old dir "
-			     "%"MLFu64" from %d to %d but bh is NULL!\n",
-			     OCFS2_I(old_dir)->ip_blkno,
-			     (int)old_dir_nlink,
-			     old_dir->i_nlink);
+			     "%llu from %d to %d but bh is NULL!\n",
+			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
+			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
 			status = ocfs2_journal_access(handle, old_dir,
@@ -1634,9 +1635,9 @@ static int ocfs2_symlink(struct inode *dir,
 						    NULL);
 		if (status < 0) {
 			if (status != -ENOSPC && status != -EINTR) {
-				mlog(ML_ERROR, "Failed to extend file to "
-					       "%"MLFu64"\n",
-				     newsize);
+				mlog(ML_ERROR,
+				     "Failed to extend file to %llu\n",
+				     (unsigned long long)newsize);
 				mlog_errno(status);
 				status = -ENOSPC;
 			}
@@ -1716,10 +1717,11 @@ int ocfs2_check_dir_entry(struct inode * dir,
 		error_msg = "directory entry across blocks";
 
 	if (error_msg != NULL)
-		mlog(ML_ERROR, "bad entry in directory #%"MLFu64": %s - "
-		     "offset=%lu, inode=%"MLFu64", rec_len=%d, name_len=%d\n",
-		     OCFS2_I(dir)->ip_blkno, error_msg, offset,
-		     le64_to_cpu(de->inode), rlen, de->name_len);
+		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
+		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
+		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
+		     de->name_len);
 	return error_msg == NULL ? 1 : 0;
 }
 
@@ -2021,8 +2023,8 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
 
 	mlog_entry_void();
 
-	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016"MLFx64,
-			   blkno);
+	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
+			   (long long)blkno);
 	if (namelen <= 0) {
 		if (namelen)
 			status = namelen;
@@ -2167,8 +2169,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
-	mlog(0, "Inode %"MLFu64" orphaned in slot %d\n",
-	     OCFS2_I(inode)->ip_blkno, osb->slot_num);
+	mlog(0, "Inode %llu orphaned in slot %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
 leave:
 	if (orphan_dir_inode)
@@ -2202,8 +2204,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	mlog(0, "removing '%s' from orphan dir %"MLFu64" (namelen=%d)\n",
-	     name, OCFS2_I(orphan_dir_inode)->ip_blkno, OCFS2_ORPHAN_NAMELEN);
+	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
+	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
+	     OCFS2_ORPHAN_NAMELEN);
 
 	/* find it's spot in the orphan directory */
 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e89de9b6e49..da1093039c0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -357,8 +357,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_DINODE(__sb, __di)	do {			\
 	typeof(__di) ____di = (__di);					\
 	ocfs2_error((__sb), 						\
-		"Dinode # %"MLFu64" has bad signature %.*s",		\
-		(____di)->i_blkno, 7,					\
+		"Dinode # %llu has bad signature %.*s",			\
+		(unsigned long long)(____di)->i_blkno, 7,		\
 		(____di)->i_signature);					\
 } while (0);
 
@@ -368,8 +368,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb)	do {		\
 	typeof(__eb) ____eb = (__eb);					\
 	ocfs2_error((__sb), 						\
-		"Extent Block # %"MLFu64" has bad signature %.*s",	\
-		(____eb)->h_blkno, 7,					\
+		"Extent Block # %llu has bad signature %.*s",		\
+		(unsigned long long)(____eb)->h_blkno, 7,		\
 		(____eb)->h_signature);					\
 } while (0);
 
@@ -379,8 +379,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd)	do {		\
 	typeof(__gd) ____gd = (__gd);					\
 		ocfs2_error((__sb),					\
-		"Group Descriptor # %"MLFu64" has bad signature %.*s",	\
-		(____gd)->bg_blkno, 7,					\
+		"Group Descriptor # %llu has bad signature %.*s",	\
+		(unsigned long long)(____gd)->bg_blkno, 7,		\
 		(____gd)->bg_signature);				\
 } while (0);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c46c164aefb..195523090c8 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -157,8 +157,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
 	mlog_entry_void();
 
 	if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
-		ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
-			    "!= b_blocknr (%llu)", group_blkno,
+		ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+			    "b_blocknr (%llu)",
+			    (unsigned long long)group_blkno,
 			    (unsigned long long) bg_bh->b_blocknr);
 		status = -EIO;
 		goto bail;
@@ -280,8 +281,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	/* setup the group */
 	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
-	     alloc_rec, bg_blkno);
+	mlog(0, "new descriptor, record %u, at block %llu\n",
+	     alloc_rec, (unsigned long long)bg_blkno);
 
 	bg_bh = sb_getblk(osb->sb, bg_blkno);
 	if (!bg_bh) {
@@ -382,8 +383,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
-		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
-			    "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno));
 		status = -EIO;
 		goto bail;
 	}
@@ -829,9 +830,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
 		goto out;
 	}
 
-	mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
-	     "top, prev = %"MLFu64"\n",
-	     fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+	mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+	     (unsigned long long)fe->i_blkno, chain,
+	     (unsigned long long)bg->bg_blkno,
+	     (unsigned long long)prev_bg->bg_blkno);
 
 	fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -974,8 +976,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	struct ocfs2_group_desc *bg;
 
 	chain = ac->ac_chain;
-	mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
-	     bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+	mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+	     bits_wanted, chain,
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
 	status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
 				  le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1027,8 +1030,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
-	     tmp_bits, bg->bg_blkno);
+	mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+	     tmp_bits, (unsigned long long)bg->bg_blkno);
 
 	*num_bits = tmp_bits;
 
@@ -1092,8 +1095,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
-	     *num_bits, fe->i_blkno);
+	mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+	     (unsigned long long)fe->i_blkno);
 
 	*bg_blkno = le64_to_cpu(bg->bg_blkno);
 bail:
@@ -1134,9 +1137,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	}
 	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
 	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
-		ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
-			    "used bits but only %u total.",
-			    le64_to_cpu(fe->i_blkno),
+		ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+			    "bits but only %u total.",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->id1.bitmap1.i_used),
 			    le32_to_cpu(fe->id1.bitmap1.i_total));
 		status = -EIO;
@@ -1479,10 +1482,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
 	}
 	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
-	mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-	     ", starting at %u\n",
-	     OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
-	     start_bit);
+	mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+	     (unsigned long long)bg_blkno, start_bit);
 
 	status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
 				  alloc_inode);
@@ -1592,10 +1594,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
 	ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
 				     &bg_start_bit);
 
-	mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
-	     num_clusters, start_blk);
-	mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
-	     bg_blkno, bg_start_bit);
+	mlog(0, "want to free %u clusters starting at block %llu\n",
+	     num_clusters, (unsigned long long)start_blk);
+	mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+	     (unsigned long long)bg_blkno, bg_start_bit);
 
 	status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
 					  bg_start_bit, bg_blkno,
@@ -1616,18 +1618,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
 	printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
 	printk("bg_chain:           %u\n", bg->bg_chain);
 	printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-	printk("bg_next_group:      %"MLFu64"\n", bg->bg_next_group);
-	printk("bg_parent_dinode:   %"MLFu64"\n", bg->bg_parent_dinode);
-	printk("bg_blkno:           %"MLFu64"\n", bg->bg_blkno);
+	printk("bg_next_group:      %llu\n",
+	       (unsigned long long)bg->bg_next_group);
+	printk("bg_parent_dinode:   %llu\n",
+	       (unsigned long long)bg->bg_parent_dinode);
+	printk("bg_blkno:           %llu\n",
+	       (unsigned long long)bg->bg_blkno);
 }
 
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 {
 	int i;
 
-	printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+	printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
 	printk("i_signature:                  %s\n", fe->i_signature);
-	printk("i_size:                       %"MLFu64"\n", fe->i_size);
+	printk("i_size:                       %llu\n",
+	       (unsigned long long)fe->i_size);
 	printk("i_clusters:                   %u\n", fe->i_clusters);
 	printk("i_generation:                 %u\n",
 	       le32_to_cpu(fe->i_generation));
@@ -1645,7 +1651,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 		       fe->id2.i_chain.cl_recs[i].c_free);
 		printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
 		       fe->id2.i_chain.cl_recs[i].c_total);
-		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
-		       fe->id2.i_chain.cl_recs[i].c_blkno);
+		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+		       (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
 	}
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 44d8b524823..949b3dac30f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1428,8 +1428,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
 	mlog(0, "vol_label: %s\n", osb->vol_label);
 	mlog(0, "uuid: %s\n", osb->uuid_str);
-	mlog(0, "root_blkno=%"MLFu64", system_dir_blkno=%"MLFu64"\n",
-	     osb->root_blkno, osb->system_dir_blkno);
+	mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
+	     (unsigned long long)osb->root_blkno,
+	     (unsigned long long)osb->system_dir_blkno);
 
 	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
 	if (!osb->osb_dlm_debug) {
@@ -1472,8 +1473,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
 	osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
 	brelse(bitmap_bh);
-	mlog(0, "cluster bitmap inode: %"MLFu64", clusters per group: %u\n",
-	     osb->bitmap_blkno, osb->bitmap_cpg);
+	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
+	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
 
 	status = ocfs2_init_slot_info(osb);
 	if (status < 0) {
@@ -1531,8 +1532,9 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 			     OCFS2_MINOR_REV_LEVEL);
 		} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
 			mlog(ML_ERROR, "bad block number on superblock: "
-			     "found %"MLFu64", should be %llu\n",
-			     di->i_blkno, (unsigned long long)bh->b_blocknr);
+			     "found %llu, should be %llu\n",
+			     (unsigned long long)di->i_blkno,
+			     (unsigned long long)bh->b_blocknr);
 		} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
 			    le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
 			mlog(ML_ERROR, "bad cluster size found: %u\n",
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 300b5bedfb2..04a684dfdd9 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -119,8 +119,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 	to_purge = ci->ci_num_cached;
 
-	mlog(0, "Purge %u %s items from Inode %"MLFu64"\n", to_purge,
-	     tree ? "array" : "tree", oi->ip_blkno);
+	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
+	     tree ? "array" : "tree", (unsigned long long)oi->ip_blkno);
 
 	/* If we're a tree, save off the root so that we can safely
 	 * initialize the cache. We do the work to free tree members
@@ -136,8 +136,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	 * easily detect counting errors. Unfortunately, this is only
 	 * meaningful for trees. */
 	if (tree && purged != to_purge)
-		mlog(ML_ERROR, "Inode %"MLFu64", count = %u, purged = %u\n",
-		     oi->ip_blkno, to_purge, purged);
+		mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n",
+		     (unsigned long long)oi->ip_blkno, to_purge, purged);
 }
 
 /* Returns the index in the cache array, -1 if not found.
@@ -186,8 +186,9 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 
 	spin_lock(&oi->ip_lock);
 
-	mlog(0, "Inode %"MLFu64", query block %llu (inline = %u)\n",
-	     oi->ip_blkno, (unsigned long long) bh->b_blocknr,
+	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) bh->b_blocknr,
 	     !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
@@ -293,12 +294,12 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
-			"Inode %"MLFu64", num cached = %u, should be %u\n",
-			oi->ip_blkno, ci->ci_num_cached,
+			"Inode %llu, num cached = %u, should be %u\n",
+			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
 			OCFS2_INODE_MAX_CACHE_ARRAY);
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Inode %"MLFu64" not marked as inline anymore!\n",
-			oi->ip_blkno);
+			"Inode %llu not marked as inline anymore!\n",
+			(unsigned long long)oi->ip_blkno);
 	assert_spin_locked(&oi->ip_lock);
 
 	/* Be careful to initialize the tree members *first* because
@@ -316,8 +317,8 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 		tree[i] = NULL;
 	}
 
-	mlog(0, "Expanded %"MLFu64" to a tree cache: flags 0x%x, num = %u\n",
-	     oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
+	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
+	     (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -332,8 +333,9 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
 		{ NULL, };
 
-	mlog(0, "Inode %"MLFu64", block %llu, expand = %d\n",
-	     oi->ip_blkno, (unsigned long long) block, expand_tree);
+	mlog(0, "Inode %llu, block %llu, expand = %d\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)block, expand_tree);
 
 	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL);
 	if (!new) {
@@ -414,8 +416,9 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	if (ocfs2_buffer_cached(oi, bh))
 		return;
 
-	mlog(0, "Inode %"MLFu64", inserting block %llu\n", oi->ip_blkno,
-	     (unsigned long long) bh->b_blocknr);
+	mlog(0, "Inode %llu, inserting block %llu\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
 	 * ip_io_mutex */
@@ -504,8 +507,9 @@ void ocfs2_remove_from_cache(struct inode *inode,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	spin_lock(&oi->ip_lock);
-	mlog(0, "Inode %"MLFu64", remove %llu, items = %u, array = %u\n",
-	     oi->ip_blkno, (unsigned long long) block, ci->ci_num_cached,
+	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) block, ci->ci_num_cached,
 	     oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 021978e0576..53049a20419 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -190,20 +190,21 @@ static int ocfs2_process_delete_request(struct inode *inode,
 				OCFS2_INVALID_SLOT &&
 				OCFS2_I(inode)->ip_orphaned_slot !=
 				(*orphaned_slot),
-				"Inode %"MLFu64": This node thinks it's "
+				"Inode %llu: This node thinks it's "
 				"orphaned in slot %d, messaged it's in %d\n",
-				OCFS2_I(inode)->ip_blkno,
+				(unsigned long long)OCFS2_I(inode)->ip_blkno,
 				OCFS2_I(inode)->ip_orphaned_slot,
 				*orphaned_slot);
 
-		mlog(0, "Setting orphaned slot for inode %"MLFu64" to %d\n",
-		     OCFS2_I(inode)->ip_blkno, *orphaned_slot);
+		mlog(0, "Setting orphaned slot for inode %llu to %d\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     *orphaned_slot);
 
 		OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot;
 	} else {
-		mlog(0, "Sending back orphaned slot %d for inode %"MLFu64"\n",
+		mlog(0, "Sending back orphaned slot %d for inode %llu\n",
 		     OCFS2_I(inode)->ip_orphaned_slot,
-		     OCFS2_I(inode)->ip_blkno);
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 		*orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
 	}
@@ -226,8 +227,8 @@ static int ocfs2_process_delete_request(struct inode *inode,
 	}
 
 	if (filemap_fdatawrite(inode->i_mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for delete!\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for delete!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		goto done;
 	}
 	sync_mapping_buffers(inode->i_mapping);
@@ -302,8 +303,8 @@ static void ocfs2_process_dentry_request(struct inode *inode,
 	struct list_head *p;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
-	mlog(0, "parent %"MLFu64", namelen = %u, name = %.*s\n", parent_blkno,
-	     namelen, namelen, name);
+	mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
+	     (unsigned long long)parent_blkno, namelen, namelen, name);
 
 	spin_lock(&dcache_lock);
 
@@ -370,9 +371,10 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	if (request == OCFS2_VOTE_REQ_DELETE)
 		orphaned_slot = be32_to_cpu(msg->md1.v_orphaned_slot);
 
-	mlog(0, "processing vote: request = %u, blkno = %"MLFu64", "
+	mlog(0, "processing vote: request = %u, blkno = %llu, "
 	     "generation = %u, node_num = %u, priv1 = %u\n", request,
-	     blkno, generation, node_num, be32_to_cpu(msg->md1.v_generic1));
+	     (unsigned long long)blkno, generation, node_num,
+	     be32_to_cpu(msg->md1.v_generic1));
 
 	if (!ocfs2_is_valid_vote_request(request)) {
 		mlog(ML_ERROR, "Invalid vote request %d from node %u\n",
@@ -419,11 +421,12 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	 * we had not found an inode in the first place. */
 	if (inode->i_generation != generation) {
 		mlog(0, "generation passed %u != inode generation = %u, "
-		     "ip_flags = %x, ip_blkno = %"MLFu64", msg %"MLFu64", "
-		     "i_count = %u, message type = %u\n",
-		     generation, inode->i_generation, OCFS2_I(inode)->ip_flags,
-		     OCFS2_I(inode)->ip_blkno, blkno,
-		     atomic_read(&inode->i_count), request);
+		     "ip_flags = %x, ip_blkno = %llu, msg %llu, i_count = %u, "
+		     "message type = %u\n", generation, inode->i_generation,
+		     OCFS2_I(inode)->ip_flags,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, atomic_read(&inode->i_count),
+		     request);
 		iput(inode);
 		inode = NULL;
 		goto respond;
@@ -830,8 +833,9 @@ static void ocfs2_delete_response_cb(void *priv,
 
 	orphaned_slot = be32_to_cpu(resp->r_orphaned_slot);
 	node = be32_to_cpu(resp->r_hdr.h_node_num);
-	mlog(0, "node %d tells us that inode %"MLFu64" is orphaned in slot "
-	     "%d\n", node, OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "node %d tells us that inode %llu is orphaned in slot %d\n",
+	     node, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+	     orphaned_slot);
 
 	/* The other node may not actually know which slot the inode
 	 * is orphaned in. */
@@ -845,9 +849,9 @@ static void ocfs2_delete_response_cb(void *priv,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != orphaned_slot &&
 			OCFS2_I(inode)->ip_orphaned_slot
-			!= OCFS2_INVALID_SLOT, "Inode %"MLFu64": Node %d "
-			"says it's orphaned in slot %d, we think it's in %d\n",
-			OCFS2_I(inode)->ip_blkno,
+			!= OCFS2_INVALID_SLOT, "Inode %llu: Node %d says it's "
+			"orphaned in slot %d, we think it's in %d\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			be32_to_cpu(resp->r_hdr.h_node_num),
 			orphaned_slot, OCFS2_I(inode)->ip_orphaned_slot);
 
@@ -869,8 +873,8 @@ int ocfs2_request_delete_vote(struct inode *inode)
 	delete_cb.rc_cb = ocfs2_delete_response_cb;
 	delete_cb.rc_priv = inode;
 
-	mlog(0, "Inode %"MLFu64", we start thinking orphaned slot is %d\n",
-	     OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "Inode %llu, we start thinking orphaned slot is %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, orphaned_slot);
 
 	status = -ENOMEM;
 	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
@@ -895,8 +899,8 @@ static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
 	 * d_delete against it. Parent directory block and full name
 	 * should suffice. */
 
-	mlog(0, "unlink/rename request: parent: %"MLFu64" name: %.*s\n",
-	     OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
+	mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
+	     (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
 	     dentry->d_name.name);
 
 	request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
@@ -1082,7 +1086,8 @@ static int ocfs2_handle_response_message(struct o2net_msg *msg,
 	mlog(0, "received response message:\n");
 	mlog(0, "h_response_id = %u\n", response_id);
 	mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n", be64_to_cpu(resp->r_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n", node_num);
 	mlog(0, "r_response = %d\n", response_status);
@@ -1138,8 +1143,8 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
 	mlog(0, "h_response_id = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_response_id));
 	mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n",
-	     be64_to_cpu(work->w_msg.v_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n",
-- 
cgit v1.2.3


From ea8aa68d36026c4c3d938170b06957aeac9682f4 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 6 Mar 2006 12:33:14 -0800
Subject: ocfs2: finally remove MLF* macros

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/masklog.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2cadc3009c8..73edad78253 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -256,16 +256,6 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 	}								\
 } while (0)
 
-#if (BITS_PER_LONG == 32) || defined(CONFIG_X86_64) || (defined(CONFIG_UML_X86) && defined(CONFIG_64BIT))
-#define MLFi64 "lld"
-#define MLFu64 "llu"
-#define MLFx64 "llx"
-#else
-#define MLFi64 "ld"
-#define MLFu64 "lu"
-#define MLFx64 "lx"
-#endif
-
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 int mlog_sys_init(struct subsystem *o2cb_subsys);
-- 
cgit v1.2.3


From 214fda1f6e1b8ef2a5292b0372744037fc80d318 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 25 Mar 2006 03:06:36 -0800
Subject: [PATCH] Optimise d_find_alias()

The attached patch optimises d_find_alias() to only take the spinlock if
there's anything in the the inode's alias list.  If there isn't, it returns
NULL immediately.

With respect to the superblock sharing patch, this should reduce by one the
number of times the dcache_lock is taken by nfs_lookup() for ordinary
directory lookups.

Only in the case where there's already a dentry for particular directory inode
(such as might happen when another mountpoint is rooted at that dentry) will
the lock then be taken the extra time.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 653f64ce98e..139e5fd22fa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -325,10 +325,13 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
 
 struct dentry * d_find_alias(struct inode *inode)
 {
-	struct dentry *de;
-	spin_lock(&dcache_lock);
-	de = __d_find_alias(inode, 0);
-	spin_unlock(&dcache_lock);
+	struct dentry *de = NULL;
+
+	if (!list_empty(&inode->i_dentry)) {
+		spin_lock(&dcache_lock);
+		de = __d_find_alias(inode, 0);
+		spin_unlock(&dcache_lock);
+	}
 	return de;
 }
 
-- 
cgit v1.2.3


From 871751e25d956ad24f129ca972b7851feaa61d53 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 25 Mar 2006 03:06:39 -0800
Subject: [PATCH] slab: implement /proc/slab_allocators

Implement /proc/slab_allocators.   It produces output like:

idr_layer_cache: 80 idr_pre_get+0x33/0x4e
buffer_head: 2555 alloc_buffer_head+0x20/0x75
mm_struct: 9 mm_alloc+0x1e/0x42
mm_struct: 20 dup_mm+0x36/0x370
vm_area_struct: 384 dup_mm+0x18f/0x370
vm_area_struct: 151 do_mmap_pgoff+0x2e0/0x7c3
vm_area_struct: 1 split_vma+0x5a/0x10e
vm_area_struct: 11 do_brk+0x206/0x2e2
vm_area_struct: 2 copy_vma+0xda/0x142
vm_area_struct: 9 setup_arg_pages+0x99/0x214
fs_cache: 8 copy_fs_struct+0x21/0x133
fs_cache: 29 copy_process+0xf38/0x10e3
files_cache: 30 alloc_files+0x1b/0xcf
signal_cache: 81 copy_process+0xbaa/0x10e3
sighand_cache: 77 copy_process+0xe65/0x10e3
sighand_cache: 1 de_thread+0x4d/0x5f8
anon_vma: 241 anon_vma_prepare+0xd9/0xf3
size-2048: 1 add_sect_attrs+0x5f/0x145
size-2048: 2 journal_init_revoke+0x99/0x302
size-2048: 2 journal_init_revoke+0x137/0x302
size-2048: 2 journal_init_inode+0xf9/0x1c4

Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexander Nyberg <alexn@telia.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
DESC
slab-leaks3-locking-fix
EDESC
From: Andrew Morton <akpm@osdl.org>

Update for slab-remove-cachep-spinlock.patch

Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexander Nyberg <alexn@telia.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 826c131994c..1e9ea37d457 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -485,6 +485,40 @@ static struct file_operations proc_slabinfo_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+extern struct seq_operations slabstats_op;
+static int slabstats_open(struct inode *inode, struct file *file)
+{
+	unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	int ret = -ENOMEM;
+	if (n) {
+		ret = seq_open(file, &slabstats_op);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			*n = PAGE_SIZE / (2 * sizeof(unsigned long));
+			m->private = n;
+			n = NULL;
+		}
+		kfree(n);
+	}
+	return ret;
+}
+
+static int slabstats_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	kfree(m->private);
+	return seq_release(inode, file);
+}
+
+static struct file_operations proc_slabstats_operations = {
+	.open		= slabstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= slabstats_release,
+};
+#endif
 #endif
 
 static int show_stat(struct seq_file *p, void *v)
@@ -744,6 +778,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations);
+#endif
 #endif
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
-- 
cgit v1.2.3


From e3df18983ea090a2e00dd5c2c6167bb431a0e0a2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 25 Mar 2006 03:06:53 -0800
Subject: [PATCH] jbd: embed j_commit_timer in journal struct

The kjournald timer is currently on the kernel thread's stack and the journal
structure points at it.  Save a pointer hop by moving the timer into the
journal structure.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c     | 19 +++++++++----------
 fs/jbd/transaction.c |  4 ++--
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 95a628d8cac..6bd4647dc5a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -111,18 +111,17 @@ static void commit_timeout(unsigned long __data)
 
 static int kjournald(void *arg)
 {
-	journal_t *journal = (journal_t *) arg;
+	journal_t *journal = arg;
 	transaction_t *transaction;
-	struct timer_list timer;
 
 	daemonize("kjournald");
 
-	/* Set up an interval timer which can be used to trigger a
-           commit wakeup after the commit interval expires */
-	init_timer(&timer);
-	timer.data = (unsigned long) current;
-	timer.function = commit_timeout;
-	journal->j_commit_timer = &timer;
+	/*
+	 * Set up an interval timer which can be used to trigger a commit wakeup
+	 * after the commit interval expires
+	 */
+	setup_timer(&journal->j_commit_timer, commit_timeout,
+			(unsigned long)current);
 
 	/* Record that the journal thread is running */
 	journal->j_task = current;
@@ -146,7 +145,7 @@ loop:
 	if (journal->j_commit_sequence != journal->j_commit_request) {
 		jbd_debug(1, "OK, requests differ\n");
 		spin_unlock(&journal->j_state_lock);
-		del_timer_sync(journal->j_commit_timer);
+		del_timer_sync(&journal->j_commit_timer);
 		journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
 		goto loop;
@@ -203,7 +202,7 @@ loop:
 
 end_loop:
 	spin_unlock(&journal->j_state_lock);
-	del_timer_sync(journal->j_commit_timer);
+	del_timer_sync(&journal->j_commit_timer);
 	journal->j_task = NULL;
 	wake_up(&journal->j_wait_done_commit);
 	jbd_debug(1, "Journal thread exiting.\n");
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5fc40888f4c..ada31fa272e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 
 	/* Set up the commit timer for the new transaction. */
-	journal->j_commit_timer->expires = transaction->t_expires;
-	add_timer(journal->j_commit_timer);
+	journal->j_commit_timer.expires = transaction->t_expires;
+	add_timer(&journal->j_commit_timer);
 
 	J_ASSERT(journal->j_running_transaction == NULL);
 	journal->j_running_transaction = transaction;
-- 
cgit v1.2.3


From 8d8c85117fbcbaea7718870ad4b1ddd12940d9b0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 25 Mar 2006 03:06:53 -0800
Subject: [PATCH] jbd: convert kjournald to kthread API

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 6bd4647dc5a..7f96b5cb678 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -33,9 +33,11 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/pagemap.h>
+#include <linux/kthread.h>
+#include <linux/proc_fs.h>
+
 #include <asm/uaccess.h>
 #include <asm/page.h>
-#include <linux/proc_fs.h>
 
 EXPORT_SYMBOL(journal_start);
 EXPORT_SYMBOL(journal_restart);
@@ -114,8 +116,6 @@ static int kjournald(void *arg)
 	journal_t *journal = arg;
 	transaction_t *transaction;
 
-	daemonize("kjournald");
-
 	/*
 	 * Set up an interval timer which can be used to trigger a commit wakeup
 	 * after the commit interval expires
@@ -211,7 +211,7 @@ end_loop:
 
 static void journal_start_thread(journal_t *journal)
 {
-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	kthread_run(kjournald, journal, "kjournald");
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
 }
 
-- 
cgit v1.2.3


From 4af4c52f34606bdaab6930a845550c6fb02078a4 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:06:54 -0800
Subject: [PATCH] Missed error checking for intent's filp in open_namei().

It seems there is error check missing in open_namei for errors returned
through intent.open.file (from lookup_instantiate_filp).

If there is plain open performed, then such a check done inside
__path_lookup_intent_open called from path_lookup_open(), but when the open
is performed with O_CREAT flag set, then __path_lookup_intent_open is only
called with LOOKUP_PARENT set where no file opening can occur yet.

Later on lookup_hash is called where exact opening might take place and
intent.open.file may be filled.  If it is filled with error value of some
sort, then we get kernel attempting to dereference this error value as
address (and corresponding oops) in nameidata_to_filp() called from
filp_open().

While this is relatively simple to workaround in ->lookup() method by just
checking lookup_instantiate_filp() return value and returning error as
needed, this is not so easy in ->d_revalidate(), where we can only return
"yes, dentry is valid" or "no, dentry is invalid, perform full lookup
again", and just returning 0 on error would cause extra lookup (with
potential extra costly RPCs).

So in short, I believe that there should be no difference in error handling
for opening a file and creating a file in open_namei() and propose this
simple patch as a solution.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index c72b940797f..1baf1b06fe4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1628,6 +1628,12 @@ do_last:
 		goto exit;
 	}
 
+	if (IS_ERR(nd->intent.open.file)) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		error = PTR_ERR(nd->intent.open.file);
+		goto exit_dput;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!path.dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
-- 
cgit v1.2.3


From bdfc326614b90e7bc47ee4a8fed05988555f0169 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Mar 2006 03:06:56 -0800
Subject: [PATCH] fs/inode.c: make iprune_mutex static

There's no reason for iprune_mutex being global.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index a51c671c54c..85da11044ad 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -91,7 +91,7 @@ DEFINE_SPINLOCK(inode_lock);
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
  */
-DEFINE_MUTEX(iprune_mutex);
+static DEFINE_MUTEX(iprune_mutex);
 
 /*
  * Statistics gathering..
-- 
cgit v1.2.3


From 23f9e0f891c9b159a199629d4426f6ae0c383508 Mon Sep 17 00:00:00 2001
From: Alexander Zarochentzev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:06:57 -0800
Subject: [PATCH] reiserfs: fix transaction overflowing

This patch fixes a bug in reiserfs truncate.  A transaction might overflow
when truncating long highly fragmented file.  The fix is to split
truncation into several transactions to avoid overflowing.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Cc; Charles McColgan <cm@chuck.net>
Cc: Alexander Zarochentsev <zam@namesys.com>
Cc: Hans Reiser <reiser@namesys.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/stree.c | 210 ++++++++++++++++++----------------------------------
 1 file changed, 72 insertions(+), 138 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index e2d08d7bcff..d2b25e1ba6e 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -981,6 +981,8 @@ static inline int prepare_for_direntry_item(struct path *path,
 	return M_CUT;
 }
 
+#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
+
 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
     If the path points to an indirect item, remove some number of its unformatted nodes.
     In case of file truncate calculate whether this item must be deleted/truncated or last
@@ -1020,148 +1022,79 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
 
 	/* Case of an indirect item. */
 	{
-		int n_unfm_number,	/* Number of the item unformatted nodes. */
-		 n_counter, n_blk_size;
-		__le32 *p_n_unfm_pointer;	/* Pointer to the unformatted node number. */
-		__u32 tmp;
-		struct item_head s_ih;	/* Item header. */
-		char c_mode;	/* Returned mode of the balance. */
-		int need_research;
-
-		n_blk_size = p_s_sb->s_blocksize;
-
-		/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
-		do {
-			need_research = 0;
-			p_s_bh = PATH_PLAST_BUFFER(p_s_path);
-			/* Copy indirect item header to a temp variable. */
-			copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
-			/* Calculate number of unformatted nodes in this item. */
-			n_unfm_number = I_UNFM_NUM(&s_ih);
-
-			RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
-			       pos_in_item(p_s_path) + 1 != n_unfm_number,
-			       "PAP-5240: invalid item %h "
-			       "n_unfm_number = %d *p_n_pos_in_item = %d",
-			       &s_ih, n_unfm_number, pos_in_item(p_s_path));
-
-			/* Calculate balance mode and position in the item to remove unformatted nodes. */
-			if (n_new_file_length == max_reiserfs_offset(inode)) {	/* Case of delete. */
-				pos_in_item(p_s_path) = 0;
-				*p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
-				c_mode = M_DELETE;
-			} else {	/* Case of truncate. */
-				if (n_new_file_length < le_ih_k_offset(&s_ih)) {
-					pos_in_item(p_s_path) = 0;
-					*p_n_cut_size =
-					    -(IH_SIZE + ih_item_len(&s_ih));
-					c_mode = M_DELETE;	/* Delete this item. */
-				} else {
-					/* indirect item must be truncated starting from *p_n_pos_in_item-th position */
-					pos_in_item(p_s_path) =
-					    (n_new_file_length + n_blk_size -
-					     le_ih_k_offset(&s_ih)) >> p_s_sb->
-					    s_blocksize_bits;
-
-					RFALSE(pos_in_item(p_s_path) >
-					       n_unfm_number,
-					       "PAP-5250: invalid position in the item");
-
-					/* Either convert last unformatted node of indirect item to direct item or increase
-					   its free space.  */
-					if (pos_in_item(p_s_path) ==
-					    n_unfm_number) {
-						*p_n_cut_size = 0;	/* Nothing to cut. */
-						return M_CONVERT;	/* Maybe convert last unformatted node to the direct item. */
-					}
-					/* Calculate size to cut. */
-					*p_n_cut_size =
-					    -(ih_item_len(&s_ih) -
-					      pos_in_item(p_s_path) *
-					      UNFM_P_SIZE);
-
-					c_mode = M_CUT;	/* Cut from this indirect item. */
-				}
-			}
+	    int blk_size = p_s_sb->s_blocksize;
+	    struct item_head s_ih;
+	    int need_re_search;
+	    int delete = 0;
+	    int result = M_CUT;
+	    int pos = 0;
+
+	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {
+		/* prepare_for_delete_or_cut() is called by
+		 * reiserfs_delete_item() */
+		n_new_file_length = 0;
+		delete = 1;
+	    }
+
+	    do {
+		need_re_search = 0;
+		*p_n_cut_size = 0;
+		p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+		pos = I_UNFM_NUM(&s_ih);
 
-			RFALSE(n_unfm_number <= pos_in_item(p_s_path),
-			       "PAP-5260: invalid position in the indirect item");
-
-			/* pointers to be cut */
-			n_unfm_number -= pos_in_item(p_s_path);
-			/* Set pointer to the last unformatted node pointer that is to be cut. */
-			p_n_unfm_pointer =
-			    (__le32 *) B_I_PITEM(p_s_bh,
-						 &s_ih) + I_UNFM_NUM(&s_ih) -
-			    1 - *p_n_removed;
-
-			/* We go through the unformatted nodes pointers of the indirect
-			   item and look for the unformatted nodes in the cache. If we
-			   found some of them we free it, zero corresponding indirect item
-			   entry and log buffer containing that indirect item. For this we
-			   need to prepare last path element for logging. If some
-			   unformatted node has b_count > 1 we must not free this
-			   unformatted node since it is in use. */
-			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
-			// note: path could be changed, first line in for loop takes care
-			// of it
+		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
+		    __u32 *unfm, block;
 
-			for (n_counter = *p_n_removed;
-			     n_counter < n_unfm_number;
-			     n_counter++, p_n_unfm_pointer--) {
+		    /* Each unformatted block deletion may involve one additional
+		     * bitmap block into the transaction, thereby the initial
+		     * journal space reservation might not be enough. */
+		    if (!delete && (*p_n_cut_size) != 0 &&
+			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
+			break;
+		    }
 
-				cond_resched();
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-				RFALSE(p_n_unfm_pointer <
-				       (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
-				       || p_n_unfm_pointer >
-				       (__le32 *) B_I_PITEM(p_s_bh,
-							    &s_ih) +
-				       I_UNFM_NUM(&s_ih) - 1,
-				       "vs-5265: pointer out of range");
-
-				/* Hole, nothing to remove. */
-				if (!get_block_num(p_n_unfm_pointer, 0)) {
-					(*p_n_removed)++;
-					continue;
-				}
+		    unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
+		    block = get_block_num(unfm, 0);
 
-				(*p_n_removed)++;
+		    if (block != 0) {
+			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
+			put_block_num(unfm, 0, 0);
+			journal_mark_dirty (th, p_s_sb, p_s_bh);
+			reiserfs_free_block(th, inode, block, 1);
+		    }
 
-				tmp = get_block_num(p_n_unfm_pointer, 0);
-				put_block_num(p_n_unfm_pointer, 0, 0);
-				journal_mark_dirty(th, p_s_sb, p_s_bh);
-				reiserfs_free_block(th, inode, tmp, 1);
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-			}
+		    cond_resched();
 
-			/* a trick.  If the buffer has been logged, this
-			 ** will do nothing.  If we've broken the loop without
-			 ** logging it, it will restore the buffer
-			 **
-			 */
-			reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
-
-			/* This loop can be optimized. */
-		} while ((*p_n_removed < n_unfm_number || need_research) &&
-			 search_for_position_by_key(p_s_sb, p_s_item_key,
-						    p_s_path) ==
-			 POSITION_FOUND);
-
-		RFALSE(*p_n_removed < n_unfm_number,
-		       "PAP-5310: indirect item is not found");
-		RFALSE(item_moved(&s_ih, p_s_path),
-		       "after while, comp failed, retry");
-
-		if (c_mode == M_CUT)
-			pos_in_item(p_s_path) *= UNFM_P_SIZE;
-		return c_mode;
+		    if (item_moved (&s_ih, p_s_path))  {
+			need_re_search = 1;
+			break;
+		    }
+
+		    pos --;
+		    (*p_n_removed) ++;
+		    (*p_n_cut_size) -= UNFM_P_SIZE;
+
+		    if (pos == 0) {
+			(*p_n_cut_size) -= IH_SIZE;
+			result = M_DELETE;
+			break;
+		    }
+		}
+		/* a trick.  If the buffer has been logged, this will do nothing.  If
+		** we've broken the loop without logging it, it will restore the
+		** buffer */
+		reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
+	    } while (need_re_search &&
+		     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
+	    pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
+
+	    if (*p_n_cut_size == 0) {
+		/* Nothing were cut. maybe convert last unformatted node to the
+		 * direct item? */
+		result = M_CONVERT;
+	    }
+	    return result;
 	}
 }
 
@@ -1948,7 +1881,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 		 ** sure the file is consistent before ending the current trans
 		 ** and starting a new one
 		 */
-		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
+		if (journal_transaction_should_end(th, 0) ||
+		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
 			int orig_len_alloc = th->t_blocks_allocated;
 			decrement_counters_in_path(&s_search_path);
 
@@ -1962,7 +1896,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 			if (err)
 				goto out;
 			err = journal_begin(th, p_s_inode->i_sb,
-					    JOURNAL_PER_BALANCE_CNT * 6);
+					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
 			if (err)
 				goto out;
 			reiserfs_update_inode_transaction(p_s_inode);
-- 
cgit v1.2.3


From a44c94a7b82a425b73384c104d5cb3dd3caa075e Mon Sep 17 00:00:00 2001
From: Alexander Zarochentsev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:06:59 -0800
Subject: [PATCH] reiserfs: handle trans_id overflow

Reiserfs does not handle transaction ID overflow correctly.  Transaction ID
== 0 causes reiserfs to crash.  The patch fixes all places where the
transaction ID is incremented.

Signed-off-by: Alexander Zarochentsev <zam@namesys.com>
Signed-off-by: Hans Reiser <reiser@namesys.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 5a9d2722fa0..1b73529b809 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2227,6 +2227,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
 	journal->j_last_flush_trans_id = trans_id;
 	journal->j_trans_id = trans_id + 1;
+	/* check for trans_id overflow */
+	if (journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	brelse(c_bh);
 	brelse(d_bh);
 	kfree(log_blocks);
@@ -2450,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
 		journal->j_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+		/* check for trans_id overflow */
+		if (journal->j_trans_id == 0)
+			journal->j_trans_id = 10;
 		journal->j_last_flush_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id);
 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -3873,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	int cur_write_start = 0;	/* start index of current log write */
 	int old_start;
 	int i;
-	int flush = flags & FLUSH_ALL;
-	int wait_on_commit = flags & WAIT;
+	int flush;
+	int wait_on_commit;
 	struct reiserfs_journal_list *jl, *temp_jl;
 	struct list_head *entry, *safe;
 	unsigned long jindex;
@@ -3884,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	BUG_ON(th->t_refcount > 1);
 	BUG_ON(!th->t_trans_id);
 
+	/* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+	if (th->t_trans_id == ~0UL)
+		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+	flush = flags & FLUSH_ALL;
+	wait_on_commit = flags & WAIT;
+
 	put_fs_excl();
 	current->journal_info = th->t_handle_save;
 	reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4105,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	journal->j_first = NULL;
 	journal->j_len = 0;
 	journal->j_trans_start_time = 0;
-	journal->j_trans_id++;
+	/* check for trans_id overflow */
+	if (++journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
 	journal->j_must_wait = 0;
 	journal->j_len_alloc = 0;
-- 
cgit v1.2.3


From 619d5d8a2b3f800ea3a0301a58ede570684956b0 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sat, 25 Mar 2006 03:07:00 -0800
Subject: [PATCH] reiserfs: reiserfs_file_write() will lose error code when a
 0-length write occurs w/ O_SYNC

When an error occurs in reiserfs_file_write before any data is written, and
O_SYNC is set, the return code of generic_osync_write will overwrite the
error code, losing it.

This patch ensures that generic_osync_inode() doesn't run under an error
condition, losing the error.  This duplicates the logic from
generic_file_buffered_write().

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Alexander Zarochentsev <zam@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index be12879bb17..044de8be39a 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1546,10 +1546,10 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		}
 	}
 
-	if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
-		res =
-		    generic_osync_inode(inode, file->f_mapping,
-					OSYNC_METADATA | OSYNC_DATA);
+	if (likely(res >= 0) &&
+	    (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
+		res = generic_osync_inode(inode, file->f_mapping,
+		                          OSYNC_METADATA | OSYNC_DATA);
 
 	mutex_unlock(&inode->i_mutex);
 	reiserfs_async_progress_wait(inode->i_sb);
-- 
cgit v1.2.3


From b500531e6f5f234ed267bd7060ee06d144faf0ca Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:07:01 -0800
Subject: [PATCH] Introduce FMODE_EXEC file flag

Introduce FMODE_EXEC file flag, to indicate that file is being opened for
execution.  This is useful for distributed filesystems to maintain
consistent behavior for returning ETXTBUSY when opening for write and
execution happens on different nodes.

akpm:

  Needed by Lustre at present.  I assume their objective to to work towards
  being able to install Lustre on an unmodified distro kernel, which seems
  sane.  It should have zero runtime cost.

  Trond and Chuck indicate that NFS4 can probably use this too, for the same
  thing.

  Steven says it's also on the GFS todo list.

Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Chuck Lever <cel@citi.umich.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 0b515ac5313..d8c477a5625 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	if (error)
 		goto out;
 
@@ -477,7 +477,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	file = ERR_PTR(err);
 
 	if (!err) {
-- 
cgit v1.2.3


From 9a56c213929d83139fd1e12727e1037d71b519f8 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sat, 25 Mar 2006 03:07:02 -0800
Subject: [PATCH] Add lookup_instantiate_filp usage warning

I think it would be nice to put an usage warning in header of
lookup_instantiate_filp() to indicate it is unsafe to use it on anything
but regular files (even that is potentially unsafe, but there your ->open()
is usually in your hands anyway), so that others won't fall into the same
trap I did.

Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 1091dadd6c3..7d02d19bd0a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -890,6 +890,10 @@ EXPORT_SYMBOL(filp_open);
  * a fully instantiated struct file to the caller.
  * This function is meant to be called from within a filesystem's
  * lookup method.
+ * Beware of calling it for non-regular files! Those ->open methods might block
+ * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
+ * leading to a deadlock, as nobody can open that fifo anymore, because
+ * another process to open fifo will block on locked parent when doing lookup).
  * Note that in case of error, nd->intent.open.file is destroyed, but the
  * path information remains valid.
  * If the open callback is set to NULL, then the standard f_op->open()
-- 
cgit v1.2.3


From c32ccd87bfd1414b0aabfcd8dbc7539ad23bcbaa Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Mar 2006 03:07:09 -0800
Subject: [PATCH] inotify: lock avoidance with parent watch status in dentry

Previous inotify work avoidance is good when inotify is completely unused,
but it breaks down if even a single watch is in place anywhere in the
system.  Robin Holt notices that udev is one such culprit - it slows down a
512-thread application on a 512 CPU system from 6 seconds to 22 minutes.

Solve this by adding a flag in the dentry that tells inotify whether or not
its parent inode has a watch on it.  Event queueing to parent will skip
taking locks if this flag is cleared.  Setting and clearing of this flag on
all child dentries versus event delivery: this is no in terms of race
cases, and that was shown to be equivalent to always performing the check.

The essential behaviour is that activity occuring _after_ a watch has been
added and _before_ it has been removed, will generate events.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c  |  8 ++++++
 fs/inotify.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 139e5fd22fa..0f7ec12d65f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 	if (inode)
 		list_add(&entry->d_alias, &inode->i_dentry);
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 	list_add(&entry->d_alias, &inode->i_dentry);
 do_negative:
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 	return NULL;
@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+			fsnotify_d_instantiate(new, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
 			d_rehash(dentry);
@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 			/* d_instantiate takes dcache_lock, so we do it by hand */
 			list_add(&dentry->d_alias, &inode->i_dentry);
 			dentry->d_inode = inode;
+			fsnotify_d_instantiate(dentry, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry)
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
 	if (atomic_read(&dentry->d_count) == 1) {
+		/* remove this and other inotify debug checks after 2.6.18 */
+		dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+
 		dentry_iput(dentry);
 		fsnotify_nameremove(dentry, isdir);
 		return;
@@ -1342,6 +1349,7 @@ already_unhashed:
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 	spin_unlock(&target->d_lock);
+	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
diff --git a/fs/inotify.c b/fs/inotify.c
index 0ee39ef591c..a61e93e1785 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -38,7 +38,6 @@
 #include <asm/ioctls.h>
 
 static atomic_t inotify_cookie;
-static atomic_t inotify_watches;
 
 static kmem_cache_t *watch_cachep;
 static kmem_cache_t *event_cachep;
@@ -380,6 +379,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 	return error;
 }
 
+/*
+ * inotify_inode_watched - returns nonzero if there are watches on this inode
+ * and zero otherwise.  We call this lockless, we do not care if we race.
+ */
+static inline int inotify_inode_watched(struct inode *inode)
+{
+	return !list_empty(&inode->inotify_watches);
+}
+
+/*
+ * Get child dentry flag into synch with parent inode.
+ * Flag should always be clear for negative dentrys.
+ */
+static void set_dentry_child_flags(struct inode *inode, int watched)
+{
+	struct dentry *alias;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode) {
+				WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+				continue;
+			}
+			spin_lock(&child->d_lock);
+			if (watched) {
+				WARN_ON(child->d_flags &
+						DCACHE_INOTIFY_PARENT_WATCHED);
+				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+			} else {
+				WARN_ON(!(child->d_flags &
+					DCACHE_INOTIFY_PARENT_WATCHED));
+				child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
+			}
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
 /*
  * create_watch - creates a watch on the given device.
  *
@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 	get_inotify_watch(watch);
 
 	atomic_inc(&dev->user->inotify_watches);
-	atomic_inc(&inotify_watches);
 
 	return watch;
 }
@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 	list_del(&watch->i_list);
 	list_del(&watch->d_list);
 
+	if (!inotify_inode_watched(watch->inode))
+		set_dentry_child_flags(watch->inode, 0);
+
 	atomic_dec(&dev->user->inotify_watches);
-	atomic_dec(&inotify_watches);
 	idr_remove(&dev->idr, watch->wd);
 	put_inotify_watch(watch);
 }
@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
 	remove_watch_no_event(watch, dev);
 }
 
+/* Kernel API */
+
 /*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
+ * inotify_d_instantiate - instantiate dcache entry for inode
  */
-static inline int inotify_inode_watched(struct inode *inode)
+void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
 {
-	return !list_empty(&inode->inotify_watches);
+	struct dentry *parent;
+
+	if (!inode)
+		return;
+
+	WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+	spin_lock(&entry->d_lock);
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	spin_unlock(&entry->d_lock);
 }
 
-/* Kernel API */
+/*
+ * inotify_d_move - dcache entry has been moved
+ */
+void inotify_d_move(struct dentry *entry)
+{
+	struct dentry *parent;
+
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	else
+		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+}
 
 /**
  * inotify_inode_queue_event - queue an event to all watches on this inode
@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 	struct dentry *parent;
 	struct inode *inode;
 
-	if (!atomic_read (&inotify_watches))
+	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
 		return;
 
 	spin_lock(&dentry->d_lock);
@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		goto out;
 	}
 
+	if (!inotify_inode_watched(inode))
+		set_dentry_child_flags(inode, 1);
+
 	/* Add the watch to the device's and the inode's list */
 	list_add(&watch->d_list, &dev->watches);
 	list_add(&watch->i_list, &inode->inotify_watches);
@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
 	inotify_max_user_watches = 8192;
 
 	atomic_set(&inotify_cookie, 0);
-	atomic_set(&inotify_watches, 0);
 
 	watch_cachep = kmem_cache_create("inotify_watch_cache",
 					 sizeof(struct inotify_watch),
-- 
cgit v1.2.3


From cd02b966bfcad12d1b2e265dc8dbc331d4c184c4 Mon Sep 17 00:00:00 2001
From: "Vladimir V. Saveliev" <vs@namesys.com>
Date: Sat, 25 Mar 2006 03:07:15 -0800
Subject: [PATCH] reiserfs: cleanups

Clean up several places where gcc issues warnings when -W is specified.
Thanks to Neil for finding that.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/fix_node.c | 4 +---
 fs/reiserfs/super.c    | 8 ++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index aa22588019e..5600d3d60cf 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -191,9 +191,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
 					       "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
 					       key, vn->vn_affected_item_num,
 					       vn->vn_mode, M_DELETE);
-			} else
-				/* we can delete directory item, that has only one directory entry in it */
-				;
+			}
 		}
 #endif
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 93e6ef9360e..cae2abbc0c7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -685,14 +685,14 @@ static const arg_desc_t logging_mode[] = {
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
 	{"writeback", 1 << REISERFS_DATA_WRITEBACK,
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for -o barrier= */
 static const arg_desc_t barrier_mode[] = {
 	{"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
 	{"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for "-o block-allocator=" and bits which are to be set in
@@ -890,7 +890,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
 		{"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
 #endif
-		{"nolog",},	/* This is unsupported */
+		{.option_name = "nolog"},
 		{"replayonly",.setmask = 1 << REPLAYONLY},
 		{"block-allocator",.arg_required = 'a',.values = balloc},
 		{"data",.arg_required = 'd',.values = logging_mode},
@@ -908,7 +908,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"grpjquota",.arg_required =
 		 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
 		{"jqfmt",.arg_required = 'f',.values = NULL},
-		{NULL,}
+		{.option_name = NULL}
 	};
 
 	*blocks = 0;
-- 
cgit v1.2.3


From 5930860296ca438071d3824bf7306ad0dfd33fc1 Mon Sep 17 00:00:00 2001
From: Alexander Zarochentsev <zam@namesys.com>
Date: Sat, 25 Mar 2006 03:07:16 -0800
Subject: [PATCH] reiserfs: use balance_dirty_pages_ratelimited_nr in
 reiserfs_file_write()

Use the new balance_dirty_pages_ratelimited_nr in reiserfs "largeio" file
write.

Signed-off-by: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 044de8be39a..d0c1e865963 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1532,7 +1532,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		buf += write_bytes;
 		*ppos = pos += write_bytes;
 		count -= write_bytes;
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 	}
 
 	/* this is only true on error */
-- 
cgit v1.2.3


From 27979bb2ff748613dba96ae66392a76fb0678527 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@swtch.com>
Date: Sat, 25 Mar 2006 03:07:23 -0800
Subject: [PATCH] v9fs: consolidate trans_sock into trans_fd

Here is a new trans_fd.c that replaces the current trans_fd.c and
trans_sock.c.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/Makefile     |   1 -
 fs/9p/trans_fd.c   | 297 +++++++++++++++++++++++++++++++----------------
 fs/9p/trans_sock.c | 334 -----------------------------------------------------
 3 files changed, 196 insertions(+), 436 deletions(-)
 delete mode 100644 fs/9p/trans_sock.c

(limited to 'fs')

diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 2f4ce43f7b6..5db5af9dbf2 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -2,7 +2,6 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 
 9p2000-objs := \
 	trans_fd.o \
-	trans_sock.o \
 	mux.o \
 	9p.o \
 	conv.o \
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 5b2ce21b10f..8029844d6b2 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -1,10 +1,13 @@
 /*
  * linux/fs/9p/trans_fd.c
  *
- * File Descriptor Transport Layer
+ * Fd transport layer.  Includes deprecated socket layer.
  *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -25,6 +28,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
@@ -40,89 +44,119 @@
 #include "v9fs.h"
 #include "transport.h"
 
+#define V9FS_PORT 564
+
 struct v9fs_trans_fd {
-	struct file *in_file;
-	struct file *out_file;
+	struct file *rd;
+	struct file *wr;
 };
 
 /**
- * v9fs_fd_recv - receive from a socket
+ * v9fs_fd_read- read from a fd
  * @v9ses: session information
  * @v: buffer to receive data into
  * @len: size of receive buffer
  *
  */
-
-static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+	int ret;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking read ...\n");
+
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
+	return ret;
 }
 
 /**
- * v9fs_fd_send - send to a socket
+ * v9fs_fd_write - write to a socket
  * @v9ses: session information
  * @v: buffer to send data from
  * @len: size of send buffer
  *
  */
-
-static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
-	mm_segment_t oldfs = get_fs();
-	int ret = 0;
+	int ret;
+	mm_segment_t oldfs;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
+
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking write ...\n");
 
 	oldfs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
 	set_fs(oldfs);
 
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
 	return ret;
 }
 
-/**
- * v9fs_fd_init - initialize file descriptor transport
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
 {
-	struct v9fs_trans_fd *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
+	int ret, n;
+	struct v9fs_trans_fd *ts;
+	mm_segment_t oldfs;
 
-	if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
-		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return -ENOPROTOOPT;
-	}
+	if (!trans || trans->status != Connected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-	if (!ts)
-		return -ENOMEM;
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	ts->in_file = fget( v9ses->rfdno );
-	ts->out_file = fget( v9ses->wfdno );
+	oldfs = get_fs();
+	set_fs(get_ds());
 
-	if (!ts->in_file || !ts->out_file) {
-		if (ts->in_file)
-			fput(ts->in_file);
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		goto end;
 
-		if (ts->out_file)
-			fput(ts->out_file);
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0) {
+			ret = n;
+			goto end;
+		}
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+	}
 
+      end:
+	set_fs(oldfs);
+	return ret;
+}
+
+static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
+{
+	struct v9fs_transport *trans = v9ses->transport;
+	struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
+					   GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->rd = fget(rfd);
+	ts->wr = fget(wfd);
+	if (!ts->rd || !ts->wr) {
+		if (ts->rd)
+			fput(ts->rd);
+		if (ts->wr)
+			fput(ts->wr);
 		kfree(ts);
 		return -EIO;
 	}
@@ -133,84 +167,145 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 	return 0;
 }
 
-
-/**
- * v9fs_fd_close - shutdown file descriptor
- * @trans: private socket structure
- *
- */
-
-static void v9fs_fd_close(struct v9fs_transport *trans)
+static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
+			char *data)
 {
-	struct v9fs_trans_fd *ts;
-
-	if (!trans)
-		return;
-
-	ts = xchg(&trans->priv, NULL);
+	if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
+		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		return -ENOPROTOOPT;
+	}
 
-	if (!ts)
-		return;
+	return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
+}
 
-	trans->status = Disconnected;
-	if (ts->in_file)
-		fput(ts->in_file);
+static int v9fs_socket_open(struct v9fs_session_info *v9ses,
+			    struct socket *csocket)
+{
+	int fd, ret;
+
+	csocket->sk->sk_allocation = GFP_NOIO;
+	if ((fd = sock_map_fd(csocket)) < 0) {
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
+		ret = fd;
+	      release_csocket:
+		sock_release(csocket);
+		return ret;
+	}
 
-	if (ts->out_file)
-		fput(ts->out_file);
+	if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
+		sockfd_put(csocket);
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
+		goto release_csocket;
+	}
 
-	kfree(ts);
+	((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
+	    O_NONBLOCK;
+	return 0;
 }
 
-static unsigned int
-v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
+			 char *data)
 {
-	int ret, n;
-	struct v9fs_trans_fd *ts;
-	mm_segment_t oldfs;
+	int ret;
+	struct socket *csocket = NULL;
+	struct sockaddr_in sin_server;
+
+	sin_server.sin_family = AF_INET;
+	sin_server.sin_addr.s_addr = in_aton(addr);
+	sin_server.sin_port = htons(v9ses->port);
+	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+
+	if (!csocket) {
+		eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
+		return -1;
+	}
 
-	if (!trans)
-		return -EIO;
+	ret = csocket->ops->connect(csocket,
+				    (struct sockaddr *)&sin_server,
+				    sizeof(struct sockaddr_in), 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_tcp: problem connecting socket to %s\n",
+			addr);
+		return ret;
+	}
 
-	ts = trans->priv;
-	if (trans->status != Connected || !ts)
-		return -EIO;
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	oldfs = get_fs();
-	set_fs(get_ds());
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	int ret;
+	struct socket *csocket;
+	struct sockaddr_un sun_server;
+
+	if (strlen(addr) > UNIX_PATH_MAX) {
+		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+			addr);
+		return -ENAMETOOLONG;
+	}
 
-	if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
-		ret = -EIO;
-		goto end;
+	sun_server.sun_family = PF_UNIX;
+	strcpy(sun_server.sun_path, addr);
+	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+			sizeof(struct sockaddr_un) - 1, 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
+			addr, ret);
+		return ret;
 	}
 
-	ret = ts->in_file->f_op->poll(ts->in_file, pt);
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	if (ts->out_file != ts->in_file) {
-		if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
-			ret = -EIO;
-			goto end;
-		}
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_fd *ts;
 
-		n = ts->out_file->f_op->poll(ts->out_file, pt);
+	if (!trans)
+		return;
 
-		ret &= ~POLLOUT;
-		n &= ~POLLIN;
+	ts = xchg(&trans->priv, NULL);
 
-		ret |= n;
-	}
+	if (!ts)
+		return;
 
-end:
-	set_fs(oldfs);
-	return ret;
+	trans->status = Disconnected;
+	if (ts->rd)
+		fput(ts->rd);
+	if (ts->wr)
+		fput(ts->wr);
+	kfree(ts);
 }
 
-
 struct v9fs_transport v9fs_trans_fd = {
 	.init = v9fs_fd_init,
-	.write = v9fs_fd_send,
-	.read = v9fs_fd_recv,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
 	.close = v9fs_fd_close,
 	.poll = v9fs_fd_poll,
 };
 
+struct v9fs_transport v9fs_trans_tcp = {
+	.init = v9fs_tcp_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+	.init = v9fs_unix_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
deleted file mode 100644
index 44e830697ac..00000000000
--- a/fs/9p/trans_sock.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * linux/fs/9p/trans_socket.c
- *
- * Socket Transport Layer
- *
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/config.h>
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/ipv6.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/un.h>
-#include <asm/uaccess.h>
-#include <linux/inet.h>
-#include <linux/idr.h>
-#include <linux/file.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "transport.h"
-
-#define V9FS_PORT 564
-
-struct v9fs_trans_sock {
-	struct socket *s;
-	struct file *filp;
-};
-
-/**
- * v9fs_sock_recv - receive from a socket
- * @v9ses: session information
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
-	if (ret <= 0) {
-		if (ret != -ERESTARTSYS && ret != -EAGAIN)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-/**
- * v9fs_sock_send - send to a socket
- * @v9ses: session information
- * @v: buffer to send data from
- * @len: size of send buffer
- *
- */
-
-static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-	if (!ts) {
-		dprintk(DEBUG_ERROR, "no transport ...\n");
-		return -EREMOTEIO;
-	}
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
-	set_fs(oldfs);
-
-	if (ret < 0) {
-		if (ret != -ERESTARTSYS)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
-	struct poll_table_struct *pt) {
-
-	int ret;
-	struct v9fs_trans_sock *ts;
-	mm_segment_t oldfs;
-
-	if (!trans) {
-		dprintk(DEBUG_ERROR, "no transport\n");
-		return -EIO;
-	}
-
-	ts = trans->priv;
-	if (trans->status != Connected || !ts) {
-		dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
-		return -EIO;
-	}
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-
-	if (!ts->filp->f_op || !ts->filp->f_op->poll) {
-		dprintk(DEBUG_ERROR, "no poll operation\n");
-		ret = -EIO;
-		goto end;
-	}
-
-	ret = ts->filp->f_op->poll(ts->filp, pt);
-
-end:
-	set_fs(oldfs);
-	return ret;
-}
-
-
-/**
- * v9fs_tcp_init - initialize TCP socket
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
-{
-	struct socket *csocket = NULL;
-	struct sockaddr_in sin_server;
-	int rc = 0;
-	struct v9fs_trans_sock *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
-	int fd;
-
-	trans->status = Disconnected;
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	if (!addr)
-		return -EINVAL;
-
-	dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
-
-	sin_server.sin_family = AF_INET;
-	sin_server.sin_addr.s_addr = in_aton(addr);
-	sin_server.sin_port = htons(v9ses->port);
-	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-	rc = csocket->ops->connect(csocket,
-				   (struct sockaddr *)&sin_server,
-				   sizeof(struct sockaddr_in), 0);
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_tcp: problem connecting socket to %s\n",
-			addr);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_unix_init - initialize UNIX domain socket
- * @v9ses: session information
- * @dev_name: path to named pipe
- * @data: mount options
- *
- */
-
-static int
-v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
-	       char *data)
-{
-	int rc, fd;
-	struct socket *csocket;
-	struct sockaddr_un sun_server;
-	struct v9fs_transport *trans;
-	struct v9fs_trans_sock *ts;
-
-	rc = 0;
-	csocket = NULL;
-	trans = v9ses->transport;
-
-	trans->status = Disconnected;
-
-	if (strlen(dev_name) > UNIX_PATH_MAX) {
-		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
-			dev_name);
-		return -ENOMEM;
-	}
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	sun_server.sun_family = PF_UNIX;
-	strcpy(sun_server.sun_path, dev_name);
-	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
-	rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
-		sizeof(struct sockaddr_un) - 1, 0);	/* -1 *is* important */
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
-			dev_name, rc);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_sock_close - shutdown socket
- * @trans: private socket structure
- *
- */
-
-static void v9fs_sock_close(struct v9fs_transport *trans)
-{
-	struct v9fs_trans_sock *ts;
-
-	if (!trans)
-		return;
-
-	ts = trans->priv;
-
-	if ((ts) && (ts->filp)) {
-		fput(ts->filp);
-		ts->filp = NULL;
-		ts->s = NULL;
-		trans->status = Disconnected;
-	}
-
-	kfree(ts);
-
-	trans->priv = NULL;
-}
-
-struct v9fs_transport v9fs_trans_tcp = {
-	.init = v9fs_tcp_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
-
-struct v9fs_transport v9fs_trans_unix = {
-	.init = v9fs_unix_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
-- 
cgit v1.2.3


From 4a26c2429b8c1ab2be140a4b29aaf16d4dcd8f92 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@swtch.com>
Date: Sat, 25 Mar 2006 03:07:24 -0800
Subject: [PATCH] v9fs: rename tids to tags to be consistent with Plan 9
 documentation

The code talks about these things called tids, which I eventually figured
out are tags.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c   |  2 +-
 fs/9p/mux.c  | 12 ++++++------
 fs/9p/v9fs.c |  2 +-
 fs/9p/v9fs.h |  3 ---
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index c148e6ba07e..ea2cf9692ff 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -152,7 +152,7 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 /**
  * v9fs_v9fs_t_flush - flush a pending transaction
  * @v9ses: 9P2000 session information
- * @tag: tid to release
+ * @tag: tag to release
  *
  */
 
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index e2ae60adda9..3b10a36cefd 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -70,7 +70,7 @@ struct v9fs_mux_data {
 	int msize;
 	unsigned char *extended;
 	struct v9fs_transport *trans;
-	struct v9fs_idpool tidpool;
+	struct v9fs_idpool tagpool;
 	int err;
 	wait_queue_head_t equeue;
 	struct list_head req_list;
@@ -280,8 +280,8 @@ struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
 	m->msize = msize;
 	m->extended = extended;
 	m->trans = trans;
-	idr_init(&m->tidpool.pool);
-	init_MUTEX(&m->tidpool.lock);
+	idr_init(&m->tagpool.pool);
+	init_MUTEX(&m->tagpool.lock);
 	m->err = 0;
 	init_waitqueue_head(&m->equeue);
 	INIT_LIST_HEAD(&m->req_list);
@@ -965,7 +965,7 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 {
 	int tag;
 
-	tag = v9fs_get_idpool(&m->tidpool);
+	tag = v9fs_get_idpool(&m->tagpool);
 	if (tag < 0)
 		return V9FS_NOTAG;
 	else
@@ -974,6 +974,6 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 
 static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
 {
-	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
-		v9fs_put_idpool(tag, &m->tidpool);
+	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
+		v9fs_put_idpool(tag, &m->tagpool);
 }
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61352491ba3..daf623cd61c 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -289,7 +289,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	/* set global debug level */
 	v9fs_debug_level = v9ses->debug;
 
-	/* id pools that are session-dependent: FIDs and TIDs */
+	/* id pools that are session-dependent: fids and tags */
 	idr_init(&v9ses->fidpool.pool);
 	init_MUTEX(&v9ses->fidpool.lock);
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index f337da7a0ee..9f63ab8106d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -91,6 +91,3 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_DEFUSER	"nobody"
 #define V9FS_DEFANAME	""
 
-/* inital pool sizes for fids and tags */
-#define V9FS_START_FIDS 8192
-#define V9FS_START_TIDS 256
-- 
cgit v1.2.3


From 5174fdab9f58181249debab6e959ae4fd4abd0ed Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sat, 25 Mar 2006 03:07:25 -0800
Subject: [PATCH] v9fs: print 9p messages

Print 9p messages.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.h         |   1 +
 fs/9p/Makefile     |   3 +-
 fs/9p/debug.h      |   1 +
 fs/9p/fcprint.c    | 347 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/mux.c        |  15 +++
 fs/9p/vfs_dentry.c |   2 +-
 fs/9p/vfs_inode.c  |   2 +-
 7 files changed, 368 insertions(+), 3 deletions(-)
 create mode 100644 fs/9p/fcprint.c

(limited to 'fs')

diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 95d72aec1c1..1df9e69b794 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -372,3 +372,4 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
 		 u32 count, const char __user * data,
 		 struct v9fs_fcall **rcall);
+int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 5db5af9dbf2..12d52421d58 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,5 +13,6 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 	vfs_dentry.o \
 	error.o \
 	v9fs.o \
-	fid.o
+	fid.o \
+	fcprint.o
 
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index fe551032788..ff54a7b0b23 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -30,6 +30,7 @@
 #define DEBUG_MUX		(1<<5)
 #define DEBUG_TRANS		(1<<6)
 #define DEBUG_SLABS	      	(1<<7)
+#define DEBUG_FCALL		(1<<8)
 
 #define DEBUG_DUMP_PKT		0
 
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
new file mode 100644
index 00000000000..fc8a98437b7
--- /dev/null
+++ b/fs/9p/fcprint.c
@@ -0,0 +1,347 @@
+/*
+ *  linux/fs/9p/fcprint.c
+ *
+ *  Print 9P call.
+ *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+static int
+v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
+{
+	int n;
+	char b[10];
+
+	n = 0;
+	if (q->type & V9FS_QTDIR)
+		b[n++] = 'd';
+	if (q->type & V9FS_QTAPPEND)
+		b[n++] = 'a';
+	if (q->type & V9FS_QTAUTH)
+		b[n++] = 'A';
+	if (q->type & V9FS_QTEXCL)
+		b[n++] = 'l';
+	if (q->type & V9FS_QTTMP)
+		b[n++] = 't';
+	if (q->type & V9FS_QTSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
+		q->version, b);
+}
+
+static int
+v9fs_printperm(char *buf, int buflen, int perm)
+{
+	int n;
+	char b[15];
+
+	n = 0;
+	if (perm & V9FS_DMDIR)
+		b[n++] = 'd';
+	if (perm & V9FS_DMAPPEND)
+		b[n++] = 'a';
+	if (perm & V9FS_DMAUTH)
+		b[n++] = 'A';
+	if (perm & V9FS_DMEXCL)
+		b[n++] = 'l';
+	if (perm & V9FS_DMTMP)
+		b[n++] = 't';
+	if (perm & V9FS_DMDEVICE)
+		b[n++] = 'D';
+	if (perm & V9FS_DMSOCKET)
+		b[n++] = 'S';
+	if (perm & V9FS_DMNAMEDPIPE)
+		b[n++] = 'P';
+	if (perm & V9FS_DMSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
+}
+
+static int
+v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
+{
+	int n;
+
+	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
+		st->name.str, st->uid.len, st->uid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
+
+	n += scnprintf(buf+n, buflen-n, " q ");
+	n += v9fs_printqid(buf+n, buflen-n, &st->qid);
+	n += scnprintf(buf+n, buflen-n, " m ");
+	n += v9fs_printperm(buf+n, buflen-n, st->mode);
+	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
+		st->atime, st->mtime, (long long int) st->length);
+
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
+			st->extension.len, st->extension.str);
+
+	return n;
+}
+
+static int
+v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	int i, n;
+
+	i = n = 0;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	return n;
+}
+
+static int
+v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
+}
+
+int
+v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
+{
+	int i, ret, type, tag;
+
+	if (!fc)
+		return scnprintf(buf, buflen, "<NULL>");
+
+	type = fc->id;
+	tag = fc->tag;
+
+	ret = 0;
+	switch (type) {
+	case TVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tversion tag %u msize %u version '%.*s'", tag,
+			fc->params.tversion.msize, fc->params.tversion.version.len,
+			fc->params.tversion.version.str);
+		break;
+
+	case RVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rversion tag %u msize %u version '%.*s'", tag,
+			fc->params.rversion.msize, fc->params.rversion.version.len,
+			fc->params.rversion.version.str);
+		break;
+
+	case TAUTH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
+			fc->params.tauth.afid, fc->params.tauth.uname.len,
+			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
+			fc->params.tauth.aname.str);
+		break;
+
+	case RAUTH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
+		break;
+
+	case TATTACH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
+			tag, fc->params.tattach.fid, fc->params.tattach.afid,
+			fc->params.tattach.uname.len, fc->params.tattach.uname.str,
+			fc->params.tattach.aname.len, fc->params.tattach.aname.str);
+		break;
+
+	case RATTACH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
+		break;
+
+	case RERROR:
+		ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
+			tag, fc->params.rerror.error.len,
+			fc->params.rerror.error.str);
+		if (extended)
+			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
+				fc->params.rerror.errno);
+		break;
+
+	case TFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
+			tag, fc->params.tflush.oldtag);
+		break;
+
+	case RFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
+		break;
+
+	case TWALK:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twalk tag %u fid %d newfid %d nwname %d", tag,
+			fc->params.twalk.fid, fc->params.twalk.newfid,
+			fc->params.twalk.nwname);
+		for(i = 0; i < fc->params.twalk.nwname; i++)
+			ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
+				fc->params.twalk.wnames[i].len,
+				fc->params.twalk.wnames[i].str);
+		break;
+
+	case RWALK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
+			tag, fc->params.rwalk.nwqid);
+		for(i = 0; i < fc->params.rwalk.nwqid; i++)
+			ret += v9fs_printqid(buf+ret, buflen-ret,
+				&fc->params.rwalk.wqids[i]);
+		break;
+
+	case TOPEN:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Topen tag %u fid %d mode %d", tag,
+			fc->params.topen.fid, fc->params.topen.mode);
+		break;
+
+	case ROPEN:
+		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
+		ret += scnprintf(buf+ret, buflen-ret," iounit %d",
+			fc->params.ropen.iounit);
+		break;
+
+	case TCREATE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
+			fc->params.tcreate.fid, fc->params.tcreate.name.len,
+			fc->params.tcreate.name.str);
+
+		ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
+		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
+			fc->params.tcreate.mode);
+		break;
+
+	case RCREATE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
+		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
+			fc->params.rcreate.iounit);
+		break;
+
+	case TREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tread tag %u fid %d offset %lld count %u", tag,
+			fc->params.tread.fid,
+			(long long int) fc->params.tread.offset,
+			fc->params.tread.count);
+		break;
+
+	case RREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rread tag %u count %u data ", tag,
+			fc->params.rread.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
+			fc->params.rread.count);
+		break;
+
+	case TWRITE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twrite tag %u fid %d offset %lld count %u data ",
+			tag, fc->params.twrite.fid,
+			(long long int) fc->params.twrite.offset,
+			fc->params.twrite.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
+			fc->params.twrite.count);
+		break;
+
+	case RWRITE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
+			tag, fc->params.rwrite.count);
+		break;
+
+	case TCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
+			tag, fc->params.tclunk.fid);
+		break;
+
+	case RCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
+		break;
+
+	case TREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
+			tag, fc->params.tremove.fid);
+		break;
+
+	case RREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
+		break;
+
+	case TSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
+			tag, fc->params.tstat.fid);
+		break;
+
+	case RSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
+			extended);
+		break;
+
+	case TWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
+			tag, fc->params.twstat.fid);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
+			extended);
+		break;
+
+	case RWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
+		break;
+
+	default:
+		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
+		break;
+	}
+
+	return ret;
+}
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 3b10a36cefd..d16f4f488fd 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -635,6 +635,14 @@ static void v9fs_read_work(void *a)
 			goto error;
 		}
 
+		if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+			char buf[150];
+
+			v9fs_printfcall(buf, sizeof(buf), m->rcall,
+				*m->extended);
+			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
+		}
+
 		rcall = m->rcall;
 		rbuf = m->rbuf;
 		if (m->rpos > n) {
@@ -740,6 +748,13 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 
 	v9fs_set_tag(tc, n);
 
+	if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+		char buf[150];
+
+		v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
+		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
+	}
+
 	req->tag = n;
 	req->tcall = tc;
 	req->rcall = NULL;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 12c9cc926b7..0cf2b840219 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
  *
  */
 
-int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(struct dentry *dentry)
 {
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 	return 1;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 651a9e14d9a..e46bb5a82e1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -348,7 +348,7 @@ error:
 	return ERR_PTR(err);
 }
 
-struct inode *
+static struct inode *
 v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
 	struct super_block *sb)
 {
-- 
cgit v1.2.3


From 16cce6d27ef52e00cc124196046bbae7150024c1 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sat, 25 Mar 2006 03:07:26 -0800
Subject: [PATCH] v9fs: add extension field to Tcreate

Implement a new way of creating special files.  Instead of Tcreate+Twstat,
add one more field to Tcreate that contains special file description.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c        |  8 +++++---
 fs/9p/9p.h        |  3 ++-
 fs/9p/conv.c      |  8 +++++++-
 fs/9p/conv.h      |  3 ++-
 fs/9p/vfs_file.c  | 32 +++++++++++++++-----------------
 fs/9p/vfs_inode.c | 47 +++++++++++++++++++----------------------------
 6 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index ea2cf9692ff..81027bc1f09 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -334,8 +334,8 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
  */
 
 int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-	      u32 perm, u8 mode, struct v9fs_fcall **rcp)
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
+	u8 mode, char *extension, struct v9fs_fcall **rcp)
 {
 	int ret;
 	struct v9fs_fcall *tc;
@@ -343,7 +343,9 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
 		fid, name, perm, mode);
 
-	tc = v9fs_create_tcreate(fid, name, perm, mode);
+	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
+		v9ses->extended);
+
 	if (!IS_ERR(tc)) {
 		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
 		kfree(tc);
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 1df9e69b794..2bb89b4005a 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -235,6 +235,7 @@ struct Tcreate {
 	struct v9fs_str name;
 	u32 perm;
 	u8 mode;
+	struct v9fs_str extension;
 };
 
 struct Rcreate {
@@ -364,7 +365,7 @@ int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 		  struct v9fs_fcall **rcall);
 
 int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-		  u32 perm, u8 mode, struct v9fs_fcall **rcall);
+	u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
 
 int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 		u64 offset, u32 count, struct v9fs_fcall **rcall);
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index bba81714246..b129079e5f3 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -666,7 +666,8 @@ struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
 	return fc;
 }
 
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended)
 {
 	int size;
 	struct v9fs_fcall *fc;
@@ -674,6 +675,9 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	struct cbuf *bufp = &buffer;
 
 	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
+	if (extended && extension!=NULL)
+		size += 2 + strlen(extension);	/* extension[s] */
+
 	fc = v9fs_create_common(bufp, size, TCREATE);
 	if (IS_ERR(fc))
 		goto error;
@@ -682,6 +686,8 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	v9fs_put_str(bufp, name, &fc->params.tcreate.name);
 	v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
 	v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+	if (extended)
+		v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
 
 	if (buf_check_overflow(bufp)) {
 		kfree(fc);
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index f5896628dae..03cacdda789 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -39,7 +39,8 @@ struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
 struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
 	char **wnames);
 struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended);
 struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
 struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
 	const char __user *data);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index de3a129698d..1144d59d646 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -69,29 +69,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	fid = v9fs_get_idpool(&v9ses->fidpool);
 	if (fid < 0) {
-			eprintk(KERN_WARNING, "newfid fails!\n");
-			return -ENOSPC;
-		}
+		eprintk(KERN_WARNING, "newfid fails!\n");
+		return -ENOSPC;
+	}
 
 	err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL);
 	if (err < 0) {
-			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+		dprintk(DEBUG_ERROR, "rewalk didn't work\n");
 		goto put_fid;
 	}
 
-	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
-	if (vfid == NULL) {
-		dprintk(DEBUG_ERROR, "out of memory\n");
-		goto clunk_fid;
-		}
-
-		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
-		/* translate open mode appropriately */
+	/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+	/* translate open mode appropriately */
 	omode = v9fs_uflags2omode(file->f_flags);
 	err = v9fs_t_open(v9ses, fid, omode, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("open failed", fcall);
-		goto destroy_vfid;
+		goto clunk_fid;
+	}
+
+	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+	if (vfid == NULL) {
+		dprintk(DEBUG_ERROR, "out of memory\n");
+		err = -ENOMEM;
+		goto clunk_fid;
 	}
 
 	file->private_data = vfid;
@@ -106,15 +107,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	return 0;
 
-destroy_vfid:
-	v9fs_fid_destroy(vfid);
-
 clunk_fid:
 	v9fs_t_clunk(v9ses, fid);
 
 put_fid:
 	v9fs_put_idpool(fid, &v9ses->fidpool);
-		kfree(fcall);
+	kfree(fcall);
 
 	return err;
 }
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e46bb5a82e1..dc67f83d0da 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -255,8 +255,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 }
 
 static int
-v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
-	u32 perm, u8 mode, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
+v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
+	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
 {
 	u32 fid;
 	int err;
@@ -271,14 +271,14 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("clone error", fcall);
-		goto error;
+		goto put_fid;
 	}
 	kfree(fcall);
 
-	err = v9fs_t_create(v9ses, fid, name, perm, mode, &fcall);
+	err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("create fails", fcall);
-		goto error;
+		goto clunk_fid;
 	}
 
 	if (iounit)
@@ -293,7 +293,11 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	kfree(fcall);
 	return 0;
 
-error:
+clunk_fid:
+	v9fs_t_clunk(v9ses, fid);
+	fid = V9FS_NOFID;
+
+put_fid:
 	if (fid >= 0)
 		v9fs_put_idpool(fid, &v9ses->fidpool);
 
@@ -474,7 +478,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		flags = O_RDWR;
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, v9fs_uflags2omode(flags), &fid, &qid, &iounit);
+		perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
 
 	if (err)
 		goto error;
@@ -550,7 +554,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, NULL, &fid, NULL, NULL);
 
 	if (err) {
 		dprintk(DEBUG_ERROR, "create error %d\n", err);
@@ -1008,11 +1012,13 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	/* copy extension buffer into buffer */
 	if (fcall->params.rstat.stat.extension.len < buflen)
-		buflen = fcall->params.rstat.stat.extension.len;
+		buflen = fcall->params.rstat.stat.extension.len + 1;
 
-	memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+	memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
 	buffer[buflen-1] = 0;
 
+	dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len,
+		fcall->params.rstat.stat.extension.str, buffer);
 	retval = buflen;
 
       FreeFcall:
@@ -1072,7 +1078,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!link)
 		link = ERR_PTR(-ENOMEM);
 	else {
-		len = v9fs_readlink(dentry, link, strlen(link));
+		len = v9fs_readlink(dentry, link, PATH_MAX);
 
 		if (len < 0) {
 			__putname(link);
@@ -1109,10 +1115,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct v9fs_fid *dfid, *vfid;
 	struct inode *inode;
-	struct v9fs_fcall *fcall;
-	struct v9fs_wstat wstat;
 
-	fcall = NULL;
 	inode = NULL;
 	vfid = NULL;
 	v9ses = v9fs_inode2v9ses(dir);
@@ -1125,7 +1128,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	}
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
 
 	if (err)
 		goto error;
@@ -1148,23 +1151,11 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 		goto error;
 	}
 
-	/* issue a Twstat */
-	v9fs_blank_wstat(&wstat);
-	wstat.muid = v9ses->name;
-	wstat.extension = (char *) extension;
-	err = v9fs_t_wstat(v9ses, vfid->fid, &wstat, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("wstat error", fcall);
-		goto error;
-	}
-
-	kfree(fcall);
 	dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(dentry, inode);
 	return 0;
 
 error:
-	kfree(fcall);
 	if (vfid)
 		v9fs_fid_destroy(vfid);
 
@@ -1224,7 +1215,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	name = __getname();
-	sprintf(name, "hardlink(%d)\n", oldfid->fid);
+	sprintf(name, "%d\n", oldfid->fid);
 	retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
 	__putname(name);
 
-- 
cgit v1.2.3


From c0291a05f8e6a72c9807b0e2a369ee82bec659c3 Mon Sep 17 00:00:00 2001
From: Eugene Teo <eugene.teo@eugeneteo.net>
Date: Sat, 25 Mar 2006 03:07:27 -0800
Subject: [PATCH] v9fs: fix vfs_inode dereference before NULL check

__getname, which in turn will call kmem_cache_alloc, may return NULL.

Coverity bug #977

Signed-off-by: Eugene Teo <eugene.teo@eugeneteo.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index dc67f83d0da..4cbfb714c6e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1244,6 +1244,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 		return -EINVAL;
 
 	name = __getname();
+	if (!name)
+		return -ENOMEM;
 	/* build extension */
 	if (S_ISBLK(mode))
 		sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
-- 
cgit v1.2.3


From 42e8c509cfa3d92b3dcbfe95edf6be00e5d4b0eb Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@hera.kernel.org>
Date: Sat, 25 Mar 2006 03:07:28 -0800
Subject: [PATCH] v9fs: update license boilerplate

Update license boilerplate to specify GPLv2 and remove the (at your option
clause).  This change was agreed to by all the copyright holders (approvals
can be found on v9fs-developer mailing list).

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c         | 5 ++---
 fs/9p/9p.h         | 5 ++---
 fs/9p/conv.c       | 5 ++---
 fs/9p/conv.h       | 5 ++---
 fs/9p/debug.h      | 5 ++---
 fs/9p/error.c      | 5 ++---
 fs/9p/error.h      | 5 ++---
 fs/9p/fcprint.c    | 5 ++---
 fs/9p/fid.c        | 5 ++---
 fs/9p/fid.h        | 5 ++---
 fs/9p/mux.c        | 5 ++---
 fs/9p/mux.h        | 5 ++---
 fs/9p/trans_fd.c   | 6 ++----
 fs/9p/transport.h  | 5 ++---
 fs/9p/v9fs.c       | 5 ++---
 fs/9p/v9fs.h       | 5 ++---
 fs/9p/v9fs_vfs.h   | 5 ++---
 fs/9p/vfs_addr.c   | 5 ++---
 fs/9p/vfs_dentry.c | 5 ++---
 fs/9p/vfs_dir.c    | 5 ++---
 fs/9p/vfs_file.c   | 5 ++---
 fs/9p/vfs_inode.c  | 5 ++---
 fs/9p/vfs_super.c  | 5 ++---
 23 files changed, 46 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index 81027bc1f09..552de120bbd 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 2bb89b4005a..94e2f92ab2e 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index b129079e5f3..a767e05b60b 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index 03cacdda789..dd5b6b1b610 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index ff54a7b0b23..4228c0bb3c3 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/error.c b/fs/9p/error.c
index e4b6f8f38b6..981fe8ecd78 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -11,9 +11,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/error.h b/fs/9p/error.h
index a9794e85fe5..5f3ca522b31 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -12,9 +12,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
index fc8a98437b7..583e827baeb 100644
--- a/fs/9p/fcprint.c
+++ b/fs/9p/fcprint.c
@@ -6,9 +6,8 @@
  *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index c4d13bf904d..b7608af07ce 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005, 2006 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 1fc2dd08d75..aa974d6875c 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index d16f4f488fd..3e5b124a721 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 17144fdfa11..e90bfd32ea4 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 8029844d6b2..94e0a7fd9fc 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -7,12 +7,10 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 91fcdb94b36..b38a4b8a41c 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index daf623cd61c..b633433f9ce 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9f63ab8106d..c134d104cb2 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index a759278acaa..43c9f7de031 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 8100fb5171b..efda46fb64d 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 0cf2b840219..062daa6000a 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index cd5eeb032d6..766f11f1215 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 1144d59d646..59e74416340 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4cbfb714c6e..133db366d30 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d05318fa684..083ed5af8a1 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-- 
cgit v1.2.3


From 67543e508d74ad1a8e80290580c9d1440beba4d9 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@hera.kernel.org>
Date: Sat, 25 Mar 2006 03:07:29 -0800
Subject: [PATCH] 9p: fix name consistency problems

There were a number of conflicting naming schemes used in the v9fs project.
The directory was fs/9p, but MAINTAINERS and Documentation referred to
v9fs.  The module name itself was 9p2000, and the file system type was 9P.
This patch attempts to clean that up, changing all references to 9p in
order to match the directory name.  We'll also start using 9p instead of
v9fs as our patch prefix.

There is also a minor consistency cleanup in the options changing the name
option to uname in order to more closely match the Plan 9 options.

Signed-off-by: Eric Van Hensbergevan <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c        | 431 ------------------------------------------------------
 fs/9p/Makefile    |   6 +-
 fs/9p/fcall.c     | 430 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/v9fs.c      |   8 +-
 fs/9p/vfs_super.c |   2 +-
 5 files changed, 438 insertions(+), 439 deletions(-)
 delete mode 100644 fs/9p/9p.c
 create mode 100644 fs/9p/fcall.c

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
deleted file mode 100644
index 552de120bbd..00000000000
--- a/fs/9p/9p.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- *  linux/fs/9p/9p.c
- *
- *  This file contains functions to perform synchronous 9P calls
- *
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "mux.h"
-
-/**
- * v9fs_t_version - negotiate protocol parameters with sever
- * @v9ses: 9P2000 session information
- * @msize: requested max size packet
- * @version: requested version.extension string
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-	       char *version, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-	tc = v9fs_create_tversion(msize, version);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_attach - mount the server
- * @v9ses: 9P2000 session information
- * @uname: user name doing the attach
- * @aname: remote name being attached to
- * @fid: mount fid to attatch to root node
- * @afid: authentication fid (in this case result key)
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall* tc;
-
-	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
-		aname, fid, afid);
-
-	tc = v9fs_create_tattach(fid, afid, uname, aname);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
-	struct v9fs_fcall *rc, int err)
-{
-	int fid;
-	struct v9fs_session_info *v9ses;
-
-	if (err)
-		return;
-
-	fid = tc->params.tclunk.fid;
-	kfree(tc);
-
-	if (!rc)
-		return;
-
-	v9ses = a;
-	if (rc->id == RCLUNK)
-		v9fs_put_idpool(fid, &v9ses->fidpool);
-
-	kfree(rc);
-}
-
-/**
- * v9fs_t_clunk - release a fid (finish a transaction)
- * @v9ses: 9P2000 session information
- * @fid: fid to release
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	rc = NULL;
-	tc = v9fs_create_tclunk(fid);
-	if (!IS_ERR(tc))
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-	else
-		ret = PTR_ERR(tc);
-
-	if (ret)
-		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
-
-	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
-	return ret;
-}
-
-#if 0
-/**
- * v9fs_v9fs_t_flush - flush a pending transaction
- * @v9ses: 9P2000 session information
- * @tag: tag to release
- *
- */
-
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
-
-	tc = v9fs_create_tflush(oldtag);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-#endif  /*  0  */
-
-/**
- * v9fs_t_stat - read a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to get info about
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	ret = -ENOMEM;
-	tc = v9fs_create_tstat(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_wstat - write a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to write info about
- * @stat: metadata
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_walk - walk a fid to a new file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to walk
- * @newfid: new fid (for clone operations)
- * @name: path to walk fid to
- * @fcall: pointer to response fcall
- *
- */
-
-/* TODO: support multiple walk */
-
-int
-v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-	    char *name, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-	int nwname;
-
-	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-
-	if (name)
-		nwname = 1;
-	else
-		nwname = 0;
-
-	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_open - open a file
- *
- * @v9ses - 9P2000 session information
- * @fid - fid to open
- * @mode - mode to open file (R, RW, etc)
- * @fcall - pointer to response fcall
- *
- */
-
-int
-v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-	    struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-
-	tc = v9fs_create_topen(fid, mode);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_remove - remove a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to remove
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-	      struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_tremove(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_create - create a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to create
- * @name: name of the file or directory to create
- * @perm: permissions to create with
- * @mode: mode to open file (R, RW, etc)
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
-	u8 mode, char *extension, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
-		fid, name, perm, mode);
-
-	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
-		v9ses->extended);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_read - read data
- * @v9ses: 9P2000 session information
- * @fid: fid to read from
- * @offset: offset to start read at
- * @count: how many bytes to read
- * @fcall: pointer to response fcall (with data)
- *
- */
-
-int
-v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-	    u32 count, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_tread(fid, offset, count);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-		if (!ret)
-			ret = rc->params.rread.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_write - write data
- * @v9ses: 9P2000 session information
- * @fid: fid to write to
- * @offset: offset to start write at
- * @count: how many bytes to write
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
-	const char __user *data, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_twrite(fid, offset, count, data);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-
-		if (!ret)
-			ret = rc->params.rwrite.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 12d52421d58..87897f84dfb 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,9 +1,9 @@
-obj-$(CONFIG_9P_FS) := 9p2000.o
+obj-$(CONFIG_9P_FS) := 9p.o
 
-9p2000-objs := \
+9p-objs := \
 	trans_fd.o \
 	mux.o \
-	9p.o \
+	fcall.o \
 	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
new file mode 100644
index 00000000000..71742ba150c
--- /dev/null
+++ b/fs/9p/fcall.c
@@ -0,0 +1,430 @@
+/*
+ *  linux/fs/9p/fcall.c
+ *
+ *  This file contains functions to perform synchronous 9P calls
+ *
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+	       char *version, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+	tc = v9fs_create_tversion(msize, version);
+
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall* tc;
+
+	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+		aname, fid, afid);
+
+	tc = v9fs_create_tattach(fid, afid, uname, aname);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
+	struct v9fs_fcall *rc, int err)
+{
+	int fid;
+	struct v9fs_session_info *v9ses;
+
+	if (err)
+		return;
+
+	fid = tc->params.tclunk.fid;
+	kfree(tc);
+
+	if (!rc)
+		return;
+
+	v9ses = a;
+	if (rc->id == RCLUNK)
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+
+	kfree(rc);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	rc = NULL;
+	tc = v9fs_create_tclunk(fid);
+	if (!IS_ERR(tc))
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+	else
+		ret = PTR_ERR(tc);
+
+	if (ret)
+		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
+
+	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
+	return ret;
+}
+
+#if 0
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tag to release
+ *
+ */
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
+
+	tc = v9fs_create_tflush(oldtag);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+#endif
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	ret = -ENOMEM;
+	tc = v9fs_create_tstat(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+	    char *name, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+	int nwname;
+
+	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+
+	if (name)
+		nwname = 1;
+	else
+		nwname = 0;
+
+	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+	    struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+
+	tc = v9fs_create_topen(fid, mode);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+	      struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+
+	tc = v9fs_create_tremove(fid);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
+	u8 mode, char *extension, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc;
+
+	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+		fid, name, perm, mode);
+
+	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
+		v9ses->extended);
+
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+	    u32 count, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
+
+	tc = v9fs_create_tread(fid, offset, count);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+		if (!ret)
+			ret = rc->params.rread.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
+
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
+	const char __user *data, struct v9fs_fcall **rcp)
+{
+	int ret;
+	struct v9fs_fcall *tc, *rc;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+		(long long unsigned) offset, count);
+
+	tc = v9fs_create_twrite(fid, offset, count, data);
+	if (!IS_ERR(tc)) {
+		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+
+		if (!ret)
+			ret = rc->params.rwrite.count;
+		if (rcp)
+			*rcp = rc;
+		else
+			kfree(rc);
+
+		kfree(tc);
+	} else
+		ret = PTR_ERR(tc);
+
+	return ret;
+}
+
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b633433f9ce..d37416eb579 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -50,7 +50,7 @@ enum {
 	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
 	Opt_rfdno, Opt_wfdno,
 	/* String options */
-	Opt_name, Opt_remotename,
+	Opt_uname, Opt_remotename,
 	/* Options that take no arguments */
 	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
 	/* Error token */
@@ -66,7 +66,7 @@ static match_table_t tokens = {
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
 	{Opt_debug, "debug=%x"},
-	{Opt_name, "name=%s"},
+	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_unix, "proto=unix"},
 	{Opt_tcp, "proto=tcp"},
@@ -115,7 +115,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token < Opt_name) {
+		if (token < Opt_uname) {
 			if ((ret = match_int(&args[0], &option)) < 0) {
 				dprintk(DEBUG_ERROR,
 					"integer field, but no integer?\n");
@@ -157,7 +157,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		case Opt_fd:
 			v9ses->proto = PROTO_FD;
 			break;
-		case Opt_name:
+		case Opt_uname:
 			match_strcpy(v9ses->name, &args[0]);
 			break;
 		case Opt_remotename:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 083ed5af8a1..b0a0ae509c0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -261,7 +261,7 @@ static struct super_operations v9fs_super_ops = {
 };
 
 struct file_system_type v9fs_fs_type = {
-	.name = "9P",
+	.name = "9p",
 	.get_sb = v9fs_get_sb,
 	.kill_sb = v9fs_kill_super,
 	.owner = THIS_MODULE,
-- 
cgit v1.2.3


From 58bf6a2db2a4a1b41712674d9165510180259dec Mon Sep 17 00:00:00 2001
From: Kirk True <kernel@kirkandsheila.com>
Date: Sat, 25 Mar 2006 03:07:30 -0800
Subject: [PATCH] smbfs: Fix debug logging-only compilation error

When SMBFS_DEBUG_VERBOSE is #define-d, the compile breaks:

fs/smbfs/inode.c:217: error: aggregate value used where an integer was expected

This is a simple matter of using the .tv_sec attribute of struct time_spec.

Signed-off-by: Kirk True <kernel@kirkandsheila.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 44ed1d418b4..fdeabc0a34f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -217,7 +217,7 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
 		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
 			inode->i_ino,
-			(long) last_time, (long) inode->i_mtime,
+			(long) last_time, (long) inode->i_mtime.tv_sec,
 			(long) last_sz, (long) inode->i_size);
 
 		if (!S_ISDIR(inode->i_mode))
-- 
cgit v1.2.3


From f348d70a324e15afc701a494f32ec468abb7d1eb Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Sat, 25 Mar 2006 03:07:39 -0800
Subject: [PATCH] POLLRDHUP/EPOLLRDHUP handling for half-closed devices
 notifications

Implement the half-closed devices notifiation, by adding a new POLLRDHUP
(and its alias EPOLLRDHUP) bit to the existing poll/select sets.  Since the
existing POLLHUP handling, that does not report correctly half-closed
devices, was feared to be changed, this implementation leaves the current
POLLHUP reporting unchanged and simply add a new bit that is set in the few
places where it makes sense.  The same thing was discussed and conceptually
agreed quite some time ago:

http://lkml.org/lkml/2003/7/12/116

Since this new event bit is added to the existing Linux poll infrastruture,
even the existing poll/select system calls will be able to use it.  As far
as the existing POLLHUP handling, the patch leaves it as is.  The
pollrdhup-2.6.16.rc5-0.10.diff defines the POLLRDHUP for all the existing
archs and sets the bit in the six relevant files.  The other attached diff
is the simple change required to sys/epoll.h to add the EPOLLRDHUP
definition.

There is "a stupid program" to test POLLRDHUP delivery here:

 http://www.xmailserver.org/pollrdhup-test.c

It tests poll(2), but since the delivery is same epoll(2) will work equally.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c2b16fda13..a0f682cdd03 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	switch (op) {
 	case EPOLL_CTL_ADD:
 		if (!epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		break;
 	case EPOLL_CTL_MOD:
 		if (epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 			error = ep_modify(ep, epi, &epds);
 		} else
 			error = -ENOENT;
-- 
cgit v1.2.3


From 11b8448751ba114416c63899638a8e473ebd21e7 Mon Sep 17 00:00:00 2001
From: Denis Vlasenko <vda@ilport.com.ua>
Date: Sat, 25 Mar 2006 03:07:42 -0800
Subject: [PATCH] fix messages in fs/minix

Believe it or not, but in fs/minix/*, the oldest filesystem in the kernel,
something still can be fixed:

	printk("new_inode: bit already set");

"\n" is missing!

While at it, I also removed periods from the end of error messages and made
capitalization uniform.  Also s/i-node/inode/, s/printk (/printk(/

Signed-ff-by: Denis Vlasenko <vda@ilport.com.ua>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/minix/bitmap.c   | 10 +++++-----
 fs/minix/inode.c    | 26 +++++++++++++-------------
 fs/minix/itree_v1.c |  4 ++--
 fs/minix/itree_v2.c |  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index dc6a4e4abcd..4a6abc49418 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -56,7 +56,7 @@ void minix_free_block(struct inode * inode, int block)
 	unsigned int bit,zone;
 
 	if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) {
-		printk("trying to free block not in datazone\n");
+		printk("Trying to free block not in datazone\n");
 		return;
 	}
 	zone = block - sbi->s_firstdatazone + 1;
@@ -124,7 +124,7 @@ minix_V1_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -149,7 +149,7 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX2_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -204,7 +204,7 @@ void minix_free_inode(struct inode * inode)
 	bh = sbi->s_imap[ino >> 13];
 	lock_kernel();
 	if (!minix_test_and_clear_bit(ino & 8191, bh->b_data))
-		printk("minix_free_inode: bit %lu already cleared.\n", ino);
+		printk("minix_free_inode: bit %lu already cleared\n", ino);
 	unlock_kernel();
 	mark_buffer_dirty(bh);
  out:
@@ -238,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
 		return NULL;
 	}
 	if (minix_test_and_set_bit(j,bh->b_data)) {	/* shouldn't happen */
-		printk("new_inode: bit already set");
+		printk("new_inode: bit already set\n");
 		unlock_kernel();
 		iput(inode);
 		return NULL;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d9ffc43fee5..2dcccf1d1b7 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -127,11 +127,11 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
 		mark_buffer_dirty(sbi->s_sbh);
 
 		if (!(sbi->s_mount_state & MINIX_VALID_FS))
-			printk ("MINIX-fs warning: remounting unchecked fs, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting unchecked fs, "
+				"running fsck is recommended\n");
 		else if ((sbi->s_mount_state & MINIX_ERROR_FS))
-			printk ("MINIX-fs warning: remounting fs with errors, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting fs with errors, "
+				"running fsck is recommended\n");
 	}
 	return 0;
 }
@@ -245,11 +245,11 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		mark_buffer_dirty(bh);
 	}
 	if (!(sbi->s_mount_state & MINIX_VALID_FS))
-		printk ("MINIX-fs: mounting unchecked file system, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting unchecked file system, "
+			"running fsck is recommended\n");
  	else if (sbi->s_mount_state & MINIX_ERROR_FS)
-		printk ("MINIX-fs: mounting file system with errors, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting file system with errors, "
+			"running fsck is recommended\n");
 	return 0;
 
 out_iput:
@@ -273,19 +273,19 @@ out_no_bitmap:
 
 out_no_map:
 	if (!silent)
-		printk ("MINIX-fs: can't allocate map\n");
+		printk("MINIX-fs: can't allocate map\n");
 	goto out_release;
 
 out_no_fs:
 	if (!silent)
-		printk("VFS: Can't find a Minix or Minix V2 filesystem on device "
-		       "%s.\n", s->s_id);
+		printk("VFS: Can't find a Minix or Minix V2 filesystem "
+			"on device %s\n", s->s_id);
     out_release:
 	brelse(bh);
 	goto out;
 
 out_bad_hblock:
-	printk("MINIX-fs: blocksize too small for device.\n");
+	printk("MINIX-fs: blocksize too small for device\n");
 	goto out;
 
 out_bad_sb:
@@ -524,7 +524,7 @@ int minix_sync_inode(struct inode * inode)
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 		{
-			printk ("IO error syncing minix inode [%s:%08lx]\n",
+			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
 			err = -1;
 		}
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index ba06aef4aca..656b1347a25 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 512) {
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 3adc7675560..9adcdc754e0 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 256) {
-- 
cgit v1.2.3


From d25b9a1ff0741e71a46f37f45263b5ddcbc948c4 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sat, 25 Mar 2006 03:07:44 -0800
Subject: [PATCH] freeze_bdev() cleanup

freeze_bdev() uses a fsync_super() without sync_blockdev().  This patch
makes __fsync_super() and shares it.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6d77ce9f54e..3b3ab528192 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -160,12 +160,7 @@ int sync_blockdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(sync_blockdev);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
+static void __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	DQUOT_SYNC(sb);
@@ -177,7 +172,16 @@ int fsync_super(struct super_block *sb)
 		sb->s_op->sync_fs(sb, 1);
 	sync_blockdev(sb->s_bdev);
 	sync_inodes_sb(sb, 1);
+}
 
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	__fsync_super(sb);
 	return sync_blockdev(sb->s_bdev);
 }
 
@@ -216,19 +220,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		sync_inodes_sb(sb, 0);
-		DQUOT_SYNC(sb);
-
-		lock_super(sb);
-		if (sb->s_dirt && sb->s_op->write_super)
-			sb->s_op->write_super(sb);
-		unlock_super(sb);
-
-		if (sb->s_op->sync_fs)
-			sb->s_op->sync_fs(sb, 1);
-
-		sync_blockdev(sb->s_bdev);
-		sync_inodes_sb(sb, 1);
+		__fsync_super(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
-- 
cgit v1.2.3


From 4ffc84442572669727dc4fcd976582508eaf23e7 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sat, 25 Mar 2006 03:07:44 -0800
Subject: [PATCH] Move cond_resched() after iput() in sync_sb_inodes()

In here, I think the following order is more cache-friendly.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 785c7213a54..f3fbe2d030f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,8 +381,8 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 			list_move(&inode->i_list, &sb->s_dirty);
 		}
 		spin_unlock(&inode_lock);
-		cond_resched();
 		iput(inode);
+		cond_resched();
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0)
 			break;
-- 
cgit v1.2.3


From 2ab13460852e65c2ec0e77000baba5e859a6a2cf Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Sat, 25 Mar 2006 03:07:45 -0800
Subject: [PATCH] Reduce sched latency in shrink_dcache_sb()

This patch reduces scheduling latency in shrink_dcache_sb() noticed during
remounting of big partitions with many cached dentries.  The same latency
fix was applied to select_parent() long ago.

Signed-off-by: Denis Lunev <den@sw.ru>
Signed-off-by: Pavel Emelianov <xemul@sw.ru>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0f7ec12d65f..93958464850 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -489,6 +489,7 @@ repeat:
 			continue;
 		}
 		prune_one_dentry(dentry);
+		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From c1f5a1944657ba6abe375e3bb2a3238a46849f70 Mon Sep 17 00:00:00 2001
From: Kirk True <kernel@kirkandsheila.com>
Date: Sat, 25 Mar 2006 03:07:47 -0800
Subject: [PATCH] ext3: Fix debug logging-only compilation error

When EXT3FS_DEBUG is #define-d, the compile breaks due to #include file
issues.

Signed-off-by: Kirk True <kernel@kirkandsheila.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/bitmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index cb16b4c5d5d..ce4f82b9e52 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,11 +7,11 @@
  * Universite Pierre et Marie Curie (Paris VI)
  */
 
-#ifdef EXT3FS_DEBUG
-
 #include <linux/buffer_head.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
 
-#include "ext3_fs.h"
+#ifdef EXT3FS_DEBUG
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
-- 
cgit v1.2.3


From 3cdc409c169c9f2155151eea82cb9868e4d62788 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Mar 2006 03:07:50 -0800
Subject: [PATCH] reiserfs/xattr_acl.c:reiserfs_get_acl(): make size an int

The Coverity checker wasn't happy seeing a size_t compared with -ENODATA
and -ENOSYS.

Since the only place where size is set is through the result of
reiserfs_xattr_get() which is an int, we could simply make size an int.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Cc: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/xattr_acl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index ab8894c3b9e..58c418fbca2 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -182,7 +182,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
 	struct posix_acl *acl, **p_acl;
-	size_t size;
+	int size;
 	int retval;
 	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
@@ -206,7 +206,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		return posix_acl_dup(*p_acl);
 
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
-	if ((int)size < 0) {
+	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
 			*p_acl = ERR_PTR(-ENODATA);
 			return NULL;
-- 
cgit v1.2.3


From d5ee4ea8334368b7d284a7d82855f6f16ba599b4 Mon Sep 17 00:00:00 2001
From: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Date: Sat, 25 Mar 2006 03:07:54 -0800
Subject: [PATCH] indirect_print_item() warning fix

fs/reiserfs/item_ops.c: In function 'indirect_print_item':
fs/reiserfs/item_ops.c:278: warning: 'num' may be used uninitialized in this function

(akpm: this is probably just gcc being dumb)

Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/item_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e237cd668e5..7a88adbceef 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -275,7 +275,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
 	int j;
 	__le32 *unp;
 	__u32 prev = INT_MAX;
-	int num;
+	int num = 0;
 
 	unp = (__le32 *) item;
 
-- 
cgit v1.2.3


From 7e53cac41da9ebb9be774220c1b2615182667c9d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 25 Mar 2006 03:07:57 -0800
Subject: [PATCH] Honour AOP_TRUNCATE_PAGE returns in page_symlink

As prepare_write, commit_write and readpage are allowed to return
AOP_TRUNCATE_PAGE, page_symlink should respond to them.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1baf1b06fe4..712dfc77793 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2627,16 +2627,27 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	int err = -ENOMEM;
 	char *kaddr;
 
+retry:
 	page = find_or_create_page(mapping, 0, gfp_mask);
 	if (!page)
 		goto fail;
 	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
 	if (err)
 		goto fail_map;
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, len-1);
 	kunmap_atomic(kaddr, KM_USER0);
-	mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
+	if (err)
+		goto fail_map;
 	/*
 	 * Notice that we are _not_ going to block here - end of page is
 	 * unmapped, so this will only try to map the rest of page, see
@@ -2646,7 +2657,8 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	 */
 	if (!PageUptodate(page)) {
 		err = mapping->a_ops->readpage(NULL, page);
-		wait_on_page_locked(page);
+		if (err != AOP_TRUNCATED_PAGE)
+			wait_on_page_locked(page);
 	} else {
 		unlock_page(page);
 	}
-- 
cgit v1.2.3


From 76c67de460b3d00b7ab8a96bb18f07ca47d65fba Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Sat, 25 Mar 2006 03:07:58 -0800
Subject: [PATCH] Ext2 flags shouldn't report "nogrpid"

If I mount ext2 "rw", I want it to say "rw", not "rw,nogrpid".

I caught this writing an automated regression test script for the busybox
mount command.  The symptom is
  /dev/loop0 on /images/ext2.dir type ext2 (rw,nogrpid)
instead of:
  /dev/loop0 on /images/ext2.dir type ext2 (rw)

The behavior was introduced by git commit
8fc2751beb0941966d3a97b26544e8585e428c08.

Signed-off-by: Rob Landley <rob@landley.net>
Cc: Mark Bellon <mbellon@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 268b73f5847..7e30bae174e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -211,8 +211,6 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
 		seq_puts(seq, ",grpid");
-	else
-		seq_puts(seq, ",nogrpid");
 
 #if defined(CONFIG_QUOTA)
 	if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
-- 
cgit v1.2.3


From 57070d012cd425c3a71663528c56a436abd2d9da Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Sat, 25 Mar 2006 03:08:04 -0800
Subject: [PATCH] compat_sys_nfsservctl(): handle errors correctly

Correct some error handling on the compat version of the nfsservctl()
system.  It was detecting errors while copying in the arguments from user
space, but then attempting to use the arguments anyway.  This didn't seem
so good.

Signed-off-by: Peter Staubach <staubach@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 2a88477330f..263990ae409 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2170,9 +2170,12 @@ asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *
 
 	default:
 		err = -EINVAL;
-		goto done;
+		break;
 	}
 
+	if (err)
+		goto done;
+
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
 	/* The __user pointer casts are valid because of the set_fs() */
-- 
cgit v1.2.3


From 11b0b5abb2097a63c1081d9b7e825b987b227972 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <neukum@fachschaft.cup.uni-muenchen.de>
Date: Sat, 25 Mar 2006 03:08:13 -0800
Subject: [PATCH] use kzalloc and kcalloc in core fs code

Signed-off-by: Oliver Neukum <oliver@neukum.name>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c        | 3 +--
 fs/binfmt_elf.c | 3 +--
 fs/bio.c        | 7 ++-----
 fs/char_dev.c   | 7 ++-----
 fs/compat.c     | 3 +--
 fs/exec.c       | 3 +--
 fs/pipe.c       | 3 +--
 fs/super.c      | 3 +--
 8 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index aec2b1916d1..e41e932ba48 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -122,10 +122,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 	info->nr = 0;
 	info->ring_pages = info->internal_pages;
 	if (nr_pages > AIO_RING_PAGES) {
-		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 		if (!info->ring_pages)
 			return -ENOMEM;
-		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
 	}
 
 	info->mmap_size = nr_pages * PAGE_SIZE;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c2eac2a50bd..61c21e7dc95 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1465,12 +1465,11 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 		read_lock(&tasklist_lock);
 		do_each_thread(g,p)
 			if (current->mm == p->mm && current != p) {
-				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
+				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
 					read_unlock(&tasklist_lock);
 					goto cleanup;
 				}
-				memset(tmp, 0, sizeof(*tmp));
 				INIT_LIST_HEAD(&tmp->list);
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
diff --git a/fs/bio.c b/fs/bio.c
index 0a8c59cb68f..73e664c01d3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -636,12 +636,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
 		goto out;
 
-	memset(pages, 0, nr_pages * sizeof(struct page *));
-
 	for (i = 0; i < iov_count; i++) {
 		unsigned long uaddr = (unsigned long)iov[i].iov_base;
 		unsigned long len = iov[i].iov_len;
@@ -1186,12 +1184,11 @@ void bioset_free(struct bio_set *bs)
 
 struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
 {
-	struct bio_set *bs = kmalloc(sizeof(*bs), GFP_KERNEL);
+	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 
 	if (!bs)
 		return NULL;
 
-	memset(bs, 0, sizeof(*bs));
 	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
 			mempool_free_slab, bio_slab);
 
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 5c36345c9bf..8c6eb04d31e 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -146,12 +146,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	int ret = 0;
 	int i;
 
-	cd = kmalloc(sizeof(struct char_device_struct), GFP_KERNEL);
+	cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
 	if (cd == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	memset(cd, 0, sizeof(struct char_device_struct));
-
 	mutex_lock(&chrdevs_lock);
 
 	/* temporary */
@@ -466,9 +464,8 @@ static struct kobj_type ktype_cdev_dynamic = {
 
 struct cdev *cdev_alloc(void)
 {
-	struct cdev *p = kmalloc(sizeof(struct cdev), GFP_KERNEL);
+	struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
 	if (p) {
-		memset(p, 0, sizeof(struct cdev));
 		p->kobj.ktype = &ktype_cdev_dynamic;
 		INIT_LIST_HEAD(&p->list);
 		kobject_init(&p->kobj);
diff --git a/fs/compat.c b/fs/compat.c
index 263990ae409..ef5a0771592 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1476,10 +1476,9 @@ int compat_do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/fs/exec.c b/fs/exec.c
index d8c477a5625..995cba3c62b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1143,10 +1143,9 @@ int do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/fs/pipe.c b/fs/pipe.c
index 8aada8e426f..d976866a115 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -662,10 +662,9 @@ struct inode* pipe_new(struct inode* inode)
 {
 	struct pipe_inode_info *info;
 
-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!info)
 		goto fail_page;
-	memset(info, 0, sizeof(*info));
 	inode->i_pipe = info;
 
 	init_waitqueue_head(PIPE_WAIT(*inode));
diff --git a/fs/super.c b/fs/super.c
index 37554b87618..8743e9bbb29 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -55,11 +55,10 @@ DEFINE_SPINLOCK(sb_lock);
  */
 static struct super_block *alloc_super(void)
 {
-	struct super_block *s = kmalloc(sizeof(struct super_block),  GFP_USER);
+	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
 
 	if (s) {
-		memset(s, 0, sizeof(struct super_block));
 		if (security_sb_alloc(s)) {
 			kfree(s);
 			s = NULL;
-- 
cgit v1.2.3


From 0e6b3e5e97e2e8a25bcfc528dad94edf5220dfeb Mon Sep 17 00:00:00 2001
From: Phillip Susi <psusi@cfl.rr.com>
Date: Sat, 25 Mar 2006 03:08:14 -0800
Subject: [PATCH] udf: fix uid/gid options and add uid/gid=ignore and forget
 options

As Pekka Enberg pointed out, with the if still following the else, you can
still get a null uid written to the disk if you specify a default uid= without
uid=forget.  In other words, if the desktop user is uid=1000 and the mount
option uid=1000 is given ( which is done on ubuntu automatically and probably
other distributions that use hal ), then if any other user besides uid 1000
owns a file then a 0 will be written to the media as the owning uid instead.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/inode.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index d04cff2273b..81e0e8459af 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1341,13 +1341,11 @@ udf_update_inode(struct inode *inode, int do_sync)
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
 		fe->uid = cpu_to_le32(-1);
-	else if (inode->i_uid != UDF_SB(inode->i_sb)->s_uid)
-		fe->uid = cpu_to_le32(inode->i_uid);
+	else fe->uid = cpu_to_le32(inode->i_uid);
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
 		fe->gid = cpu_to_le32(-1);
-	else if (inode->i_gid != UDF_SB(inode->i_sb)->s_gid)
-		fe->gid = cpu_to_le32(inode->i_gid);
+	else fe->gid = cpu_to_le32(inode->i_gid);
 
 	udfperms =	((inode->i_mode & S_IRWXO)     ) |
 			((inode->i_mode & S_IRWXG) << 2) |
-- 
cgit v1.2.3


From 174e27c607cfa3ebb92934d28c0fdfcf5ce6c3af Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Sat, 25 Mar 2006 03:08:16 -0800
Subject: [PATCH] direct-io: bug fix in dio handling write error

There is a bug in direct-io on propagating write error up to the higher I/O
layer.  When performing an async ODIRECT write to a block device, if a
device error occurred (like media error or disk is pulled), the error code
is only propagated from device driver to the DIO layer.  The error code
stops at finished_one_bio().  The aysnc write, however, is supposedly have
a corresponding AIO event with appropriate return code (in this case -EIO).
 Application which waits on the async write event, will hang forever since
such AIO event is lost forever (if such app did not use the timeout option
in io_getevents call.  Regardless, an AIO event is lost).

The discovery of above bug leads to another discovery of potential race
window with dio->result.  The fundamental problem is that dio->result is
overloaded with dual use: an indicator of fall back path for partial dio
write, and an error indicator used in the I/O completion path.  In the
event of device error, the setting of -EIO to dio->result clashes with
value used to track partial write that activates the fall back path.

It was also pointed out that it is impossible to use dio->result to track
partial write and at the same time to track error returned from device
driver.  Because direct_io_work can only determines whether it is a partial
write at the end of io submission and in mid stream of those io submission,
a return code could be coming back from the driver.  Thus messing up all
the subsequent logic.

Proposed fix is to separating out error code returned by the IO completion
path from partial IO submit tracking.  A new variable is added to dio
structure specifically to track io error returned in the completion path.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Suparna Bhattacharya <suparna@in.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 27f3e787fac..235ed8d1f11 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -129,6 +129,7 @@ struct dio {
 	/* AIO related stuff */
 	struct kiocb *iocb;		/* kiocb */
 	int is_async;			/* is IO async ? */
+	int io_error;			/* IO error in completion path */
 	ssize_t result;                 /* IO result */
 };
 
@@ -250,6 +251,10 @@ static void finished_one_bio(struct dio *dio)
 			    ((offset + transferred) > dio->i_size))
 				transferred = dio->i_size - offset;
 
+			/* check for error in completion path */
+			if (dio->io_error)
+				transferred = dio->io_error;
+
 			dio_complete(dio, offset, transferred);
 
 			/* Complete AIO later if falling back to buffered i/o */
@@ -406,7 +411,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	int page_no;
 
 	if (!uptodate)
-		dio->result = -EIO;
+		dio->io_error = -EIO;
 
 	if (dio->is_async && dio->rw == READ) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
@@ -971,6 +976,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->next_block_for_io = -1;
 
 	dio->page_errors = 0;
+	dio->io_error = 0;
 	dio->result = 0;
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
-- 
cgit v1.2.3


From 55148548124e3e52e8921f1cb0e325111ef9cbb1 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Sat, 25 Mar 2006 03:08:22 -0800
Subject: [PATCH] remove needless check in binfmt_elf.c

Local variable i is unsigned int and thus cannot be negative.

(akpm: unsigneds shouldn't be called `i'.  This value cannot possibly be
negative anyway).

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 61c21e7dc95..4349113881f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1334,7 +1334,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
-	psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
+	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-- 
cgit v1.2.3


From 6cc6b1226b71132a1d6e95449d78e051f1f3b506 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Sat, 25 Mar 2006 03:08:23 -0800
Subject: [PATCH] remove needless check in fs/read_write.c

nr_segs is unsigned long and thus cannot be negative.  We checked against 0
few lines before.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index 3f7a1a62165..34b1bf259ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -470,7 +470,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	 * verify all the pointers
 	 */
 	ret = -EINVAL;
-	if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0))
+	if (nr_segs > UIO_MAXIOV)
 		goto out;
 	if (!file->f_op)
 		goto out;
-- 
cgit v1.2.3


From 1ad3dcc09c88c6e01d7624398c591ff3aee22fbe Mon Sep 17 00:00:00 2001
From: Luke Yang <luke.adi@gmail.com>
Date: Sat, 25 Mar 2006 03:08:24 -0800
Subject: [PATCH] flat binary loader doesn't check fd table full

In binfmt_flat.c, the flat binary loader should check file descriptor table
and install the fd on the file.

Convert the function to single-exit and fix this bug.

Signed-off-by: "Luke Yang" <luke.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_flat.c | 73 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 108d56bbd0d..69f44dcdb0b 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -36,6 +36,7 @@
 #include <linux/personality.h>
 #include <linux/init.h>
 #include <linux/flat.h>
+#include <linux/syscalls.h>
 
 #include <asm/byteorder.h>
 #include <asm/system.h>
@@ -426,6 +427,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 	int i, rev, relocs = 0;
 	loff_t fpos;
 	unsigned long start_code, end_code;
+	int ret;
+	int exec_fileno;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
 	inode = bprm->file->f_dentry->d_inode;
@@ -450,7 +453,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 		 */
 		if (strncmp(hdr->magic, "#!", 2))
 			printk("BINFMT_FLAT: bad header magic\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
@@ -458,14 +462,16 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 	if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
 		printk("BINFMT_FLAT: bad flat file version 0x%x (supported 0x%x and 0x%x)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 	
 	/* Don't allow old format executables to use shared libraries */
 	if (rev == OLD_FLAT_VERSION && id != 0) {
 		printk("BINFMT_FLAT: shared libraries are not available before rev 0x%x\n",
 				(int) FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	/*
@@ -478,7 +484,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 #ifndef CONFIG_BINFMT_ZFLAT
 	if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
 		printk("Support for ZFLAT executables is not enabled.\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 #endif
 
@@ -490,14 +497,27 @@ static int load_flat_file(struct linux_binprm * bprm,
 	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
-	if (data_len + bss_len > rlim)
-		return -ENOMEM;
+	if (data_len + bss_len > rlim) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* check file descriptor */
+	exec_fileno = get_unused_fd();
+	if (exec_fileno < 0) {
+		ret = -EMFILE;
+		goto err;
+	}
+	get_file(bprm->file);
+	fd_install(exec_fileno, bprm->file);
 
 	/* Flush all traces of the currently running executable */
 	if (id == 0) {
 		result = flush_old_exec(bprm);
-		if (result)
-			return result;
+		if (result) {
+			ret = result;
+			goto err_close;
+		}
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX);
@@ -527,7 +547,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			if (!textpos)
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to mmap process text, errno %d\n", (int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		down_write(&current->mm->mmap_sem);
@@ -542,7 +563,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to allocate RAM for process data, errno %d\n",
 					(int)-datapos);
 			do_munmap(current->mm, textpos, text_len);
-			return realdatastart;
+			ret = realdatastart;
+			goto err_close;
 		}
 		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
 
@@ -564,7 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read data+bss, errno %d\n", (int)-result);
 			do_munmap(current->mm, textpos, text_len);
 			do_munmap(current->mm, realdatastart, data_len + extra);
-			return result;
+			ret = result;
+			goto err_close;
 		}
 
 		reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
@@ -582,7 +605,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to allocate RAM for process text/data, errno %d\n",
 					(int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		realdatastart = textpos + ntohl(hdr->data_start);
@@ -627,7 +651,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read code+data+bss, errno %d\n",(int)-result);
 			do_munmap(current->mm, textpos, text_len + data_len + extra +
 				MAX_SHARED_LIBS * sizeof(unsigned long));
-			return result;
+			ret = result;
+			goto err_close;
 		}
 	}
 
@@ -690,8 +715,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			unsigned long addr;
 			if (*rp) {
 				addr = calc_reloc(*rp, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 				*rp = addr;
 			}
 		}
@@ -718,8 +745,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			relval = ntohl(reloc[i]);
 			addr = flat_get_relocate_addr(relval);
 			rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
-			if (rp == (unsigned long *)RELOC_FAILED)
-				return -ENOEXEC;
+			if (rp == (unsigned long *)RELOC_FAILED) {
+				ret = -ENOEXEC;
+				goto err_close;
+			}
 
 			/* Get the pointer's value.  */
 			addr = flat_get_addr_from_rp(rp, relval, flags);
@@ -731,8 +760,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 				if ((flags & FLAT_FLAG_GOTPIC) == 0)
 					addr = ntohl(addr);
 				addr = calc_reloc(addr, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 
 				/* Write back the relocated pointer.  */
 				flat_put_addr_at_rp(rp, addr, relval);
@@ -752,6 +783,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			stack_len);
 
 	return 0;
+err_close:
+	sys_close(exec_fileno);
+err:
+	return ret;
 }
 
 
-- 
cgit v1.2.3


From 913bd906019514579b3c7ec5ab9c463e89207a57 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 25 Mar 2006 16:29:09 +0100
Subject: [PATCH] x86_64: Increase the variability of the process stack on
 64bit architectures

8MB is not really very random, use 1GB (or more with larger page sizes)
instead.

Also use the low bits of the random generator output now instead of
throwing them away.

Only enabled on x86-64 right now. Other architectures need to add
a suitable STACK_RND_MASK

Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4349113881f..537893a1601 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -500,17 +500,22 @@ out:
 #define INTERPRETER_AOUT 1
 #define INTERPRETER_ELF 2
 
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
+#endif
 
 static unsigned long randomize_stack_top(unsigned long stack_top)
 {
 	unsigned int random_variable = 0;
 
-	if (current->flags & PF_RANDOMIZE)
-		random_variable = get_random_int() % (8*1024*1024);
+	if (current->flags & PF_RANDOMIZE) {
+		random_variable = get_random_int() & STACK_RND_MASK;
+		random_variable <<= PAGE_SHIFT;
+	}
 #ifdef CONFIG_STACK_GROWSUP
-	return PAGE_ALIGN(stack_top + random_variable);
+	return PAGE_ALIGN(stack_top) + random_variable;
 #else
-	return PAGE_ALIGN(stack_top - random_variable);
+	return PAGE_ALIGN(stack_top) - random_variable;
 #endif
 }
 
-- 
cgit v1.2.3


From e827f92355e1eeec2d227d3bd3350d04042a011e Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:24:46 +0200
Subject: BUG_ON() Conversion in fs/buffer.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/buffer.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b3ab528192..4342ab0ad99 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -796,8 +796,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 	if (!mapping->assoc_mapping) {
 		mapping->assoc_mapping = buffer_mapping;
 	} else {
-		if (mapping->assoc_mapping != buffer_mapping)
-			BUG();
+		BUG_ON(mapping->assoc_mapping != buffer_mapping);
 	}
 	if (list_empty(&bh->b_assoc_buffers)) {
 		spin_lock(&buffer_mapping->private_lock);
@@ -1114,8 +1113,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	if (!page)
 		return NULL;
 
-	if (!PageLocked(page))
-		BUG();
+	BUG_ON(!PageLocked(page));
 
 	if (page_has_buffers(page)) {
 		bh = page_buffers(page);
@@ -1522,8 +1520,7 @@ void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
 	bh->b_page = page;
-	if (offset >= PAGE_SIZE)
-		BUG();
+	BUG_ON(offset >= PAGE_SIZE);
 	if (PageHighMem(page))
 		/*
 		 * This catches illegal uses and preserves the offset:
-- 
cgit v1.2.3


From 28133c7b2b9bbdf8a8765a319e818c1652f38c1f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:25:39 +0200
Subject: BUG_ON() Conversion in fs/dcache.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner and can better optimized away

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/dcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 93958464850..0c78f05819d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -798,7 +798,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	if (!list_empty(&entry->d_alias)) BUG();
+	BUG_ON(!list_empty(&entry->d_alias));
 	spin_lock(&dcache_lock);
 	if (inode)
 		list_add(&entry->d_alias, &inode->i_dentry);
-- 
cgit v1.2.3


From 4d4ef9abe34a472fbfc9ab75cfde0d58bc342c44 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:26:51 +0200
Subject: BUG_ON() Conversion in fs/hfs/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/hfs/bnode.c | 9 +++------
 fs/hfs/btree.c | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index a7a7d77f3fd..1e44dcfe49c 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -306,8 +306,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node)
 	for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)];
 	     *p && *p != node; p = &(*p)->next_hash)
 		;
-	if (!*p)
-		BUG();
+	BUG_ON(!*p);
 	*p = node->next_hash;
 	node->tree->node_hash_cnt--;
 }
@@ -415,8 +414,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
 	spin_lock(&tree->hash_lock);
 	node = hfs_bnode_findhash(tree, num);
 	spin_unlock(&tree->hash_lock);
-	if (node)
-		BUG();
+	BUG_ON(node);
 	node = __hfs_bnode_create(tree, num);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
@@ -459,8 +457,7 @@ void hfs_bnode_put(struct hfs_bnode *node)
 
 		dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n",
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
-		if (!atomic_read(&node->refcnt))
-			BUG();
+		BUG_ON(!atomic_read(&node->refcnt));
 		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
 			return;
 		for (i = 0; i < tree->pages_per_bnode; i++) {
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 7bb11edd148..d20131ce4b9 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -36,8 +36,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
 	tree->inode = iget_locked(sb, id);
 	if (!tree->inode)
 		goto free_tree;
-	if (!(tree->inode->i_state & I_NEW))
-		BUG();
+	BUG_ON(!(tree->inode->i_state & I_NEW));
 	{
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 	HFS_I(tree->inode)->flags = 0;
-- 
cgit v1.2.3


From 309be53da60dc24b73f3f0bceab8f0707c05371f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:27:41 +0200
Subject: BUG_ON() Conversion in fs/ext2/

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/ext2/dir.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index b3dbd716cd3..0165388c425 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -416,8 +416,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 
 	lock_page(page);
 	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-	if (err)
-		BUG();
+	BUG_ON(err);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type (de, inode);
 	err = ext2_commit_chunk(page, from, to);
@@ -554,8 +553,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 		from = (char*)pde - (char*)page_address(page);
 	lock_page(page);
 	err = mapping->a_ops->prepare_write(NULL, page, from, to);
-	if (err)
-		BUG();
+	BUG_ON(err);
 	if (pde)
 		pde->rec_len = cpu_to_le16(to-from);
 	dir->inode = 0;
-- 
cgit v1.2.3


From 64a07bd82ed526d813b64b0957543eef55bdf9c0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sun, 26 Mar 2006 01:36:55 -0800
Subject: [PATCH] protect remove_proc_entry

It has been discovered that the remove_proc_entry has a race in the removing
of entries in the proc file system that are siblings.  There's no protection
around the traversing and removing of elements that belong in the same
subdirectory.

This subdirectory list is protected in other areas by the BKL.  So the BKL was
at first used to protect this area too, but unfortunately, remove_proc_entry
may be called with spinlocks held.  The BKL may schedule, so this was not a
solution.

The final solution was to add a new global spin lock to protect this list,
called proc_subdir_lock.  This lock now protects the list in
remove_proc_entry, and I also went around looking for other areas that this
list is modified and added this protection there too.  Care must be taken
since these locations call several functions that may also schedule.

Since I don't see any location that these functions that modify the
subdirectory list are called by interrupts, the irqsave/restore versions of
the spin lock was _not_ used.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/generic.c      | 32 +++++++++++++++++++++++++++++---
 fs/proc/proc_devtree.c |  2 ++
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 20e5c4509a4..47b7a20d45e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -19,6 +19,7 @@
 #include <linux/idr.h>
 #include <linux/namei.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -29,6 +30,8 @@ static ssize_t proc_file_write(struct file *file, const char __user *buffer,
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
+DEFINE_SPINLOCK(proc_subdir_lock);
+
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
 	if (de->namelen != len)
@@ -277,7 +280,9 @@ static int xlate_proc_name(const char *name,
 	const char     		*cp = name, *next;
 	struct proc_dir_entry	*de;
 	int			len;
+	int 			rtn = 0;
 
+	spin_lock(&proc_subdir_lock);
 	de = &proc_root;
 	while (1) {
 		next = strchr(cp, '/');
@@ -289,13 +294,17 @@ static int xlate_proc_name(const char *name,
 			if (proc_match(len, cp, de))
 				break;
 		}
-		if (!de)
-			return -ENOENT;
+		if (!de) {
+			rtn = -ENOENT;
+			goto out;
+		}
 		cp += len + 1;
 	}
 	*residual = cp;
 	*ret = de;
-	return 0;
+out:
+	spin_unlock(&proc_subdir_lock);
+	return rtn;
 }
 
 static DEFINE_IDR(proc_inum_idr);
@@ -380,6 +389,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 	int error = -ENOENT;
 
 	lock_kernel();
+	spin_lock(&proc_subdir_lock);
 	de = PDE(dir);
 	if (de) {
 		for (de = de->subdir; de ; de = de->next) {
@@ -388,12 +398,15 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
 				unsigned int ino = de->low_ino;
 
+				spin_unlock(&proc_subdir_lock);
 				error = -EINVAL;
 				inode = proc_get_inode(dir->i_sb, ino, de);
+				spin_lock(&proc_subdir_lock);
 				break;
 			}
 		}
 	}
+	spin_unlock(&proc_subdir_lock);
 	unlock_kernel();
 
 	if (inode) {
@@ -447,11 +460,13 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
+			spin_lock(&proc_subdir_lock);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
 				if (!de) {
 					ret = 1;
+					spin_unlock(&proc_subdir_lock);
 					goto out;
 				}
 				if (!i)
@@ -461,12 +476,16 @@ int proc_readdir(struct file * filp,
 			}
 
 			do {
+				/* filldir passes info to user space */
+				spin_unlock(&proc_subdir_lock);
 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
 					    de->low_ino, de->mode >> 12) < 0)
 					goto out;
+				spin_lock(&proc_subdir_lock);
 				filp->f_pos++;
 				de = de->next;
 			} while (de);
+			spin_unlock(&proc_subdir_lock);
 	}
 	ret = 1;
 out:	unlock_kernel();
@@ -500,9 +519,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 	if (i == 0)
 		return -EAGAIN;
 	dp->low_ino = i;
+
+	spin_lock(&proc_subdir_lock);
 	dp->next = dir->subdir;
 	dp->parent = dir;
 	dir->subdir = dp;
+	spin_unlock(&proc_subdir_lock);
+
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
 			dp->proc_fops = &proc_dir_operations;
@@ -694,6 +717,8 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 		goto out;
 	len = strlen(fn);
+
+	spin_lock(&proc_subdir_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
 			continue;
@@ -714,6 +739,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 		}
 		break;
 	}
+	spin_unlock(&proc_subdir_lock);
 out:
 	return;
 }
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 9bdd077d6f5..596b4b4f1cc 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -136,9 +136,11 @@ void proc_device_tree_add_node(struct device_node *np,
 		 * properties are quite unimportant for us though, thus we
 		 * simply "skip" them here, but we do have to check.
 		 */
+		spin_lock(&proc_subdir_lock);
 		for (ent = de->subdir; ent != NULL; ent = ent->next)
 			if (!strcmp(ent->name, pp->name))
 				break;
+		spin_unlock(&proc_subdir_lock);
 		if (ent != NULL) {
 			printk(KERN_WARNING "device-tree: property \"%s\" name"
 			       " conflicts with node in %s\n", pp->name,
-- 
cgit v1.2.3


From 353ab6e97b8f209dbecc9f650f1f84e3da2a7bb1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 26 Mar 2006 01:37:12 -0800
Subject: [PATCH] sem2mutex: fs/

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Eric Van Hensbergen <ericvh@ericvh.myip.org>
Cc: Robert Love <rml@tech9.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/inode.c     | 31 ++++++++++++++++---------------
 fs/dcookies.c         | 19 ++++++++++---------
 fs/jffs2/compr_zlib.c | 19 ++++++++++---------
 fs/jfs/jfs_logmgr.c   | 27 ++++++++++++++-------------
 fs/lockd/host.c       | 19 ++++++++++---------
 fs/lockd/svc.c        | 17 +++++++++--------
 fs/lockd/svcsubs.c    | 17 +++++++++--------
 fs/nfs/callback.c     | 11 ++++++-----
 fs/nfsd/nfs4state.c   |  9 +++++----
 fs/partitions/devfs.c | 12 ++++++------
 fs/super.c            |  7 ++++---
 11 files changed, 99 insertions(+), 89 deletions(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 8ad52f5bf25..acc1b2c10a8 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cramfs_fs_sb.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/mutex.h>
 #include <asm/semaphore.h>
 
 #include <asm/uaccess.h>
@@ -31,7 +32,7 @@ static struct inode_operations cramfs_dir_inode_operations;
 static struct file_operations cramfs_directory_operations;
 static struct address_space_operations cramfs_aops;
 
-static DECLARE_MUTEX(read_mutex);
+static DEFINE_MUTEX(read_mutex);
 
 
 /* These two macros may change in future, to provide better st_ino
@@ -250,20 +251,20 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 	memset(sbi, 0, sizeof(struct cramfs_sb_info));
 
 	/* Invalidate the read buffers on mount: think disk change.. */
-	down(&read_mutex);
+	mutex_lock(&read_mutex);
 	for (i = 0; i < READ_BUFFERS; i++)
 		buffer_blocknr[i] = -1;
 
 	/* Read the first block and get the superblock from it */
 	memcpy(&super, cramfs_read(sb, 0, sizeof(super)), sizeof(super));
-	up(&read_mutex);
+	mutex_unlock(&read_mutex);
 
 	/* Do sanity checks on the superblock */
 	if (super.magic != CRAMFS_MAGIC) {
 		/* check at 512 byte offset */
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		memcpy(&super, cramfs_read(sb, 512, sizeof(super)), sizeof(super));
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		if (super.magic != CRAMFS_MAGIC) {
 			if (!silent)
 				printk(KERN_ERR "cramfs: wrong magic\n");
@@ -366,7 +367,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		mode_t mode;
 		int namelen, error;
 
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256);
 		name = (char *)(de+1);
 
@@ -379,7 +380,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		memcpy(buf, name, namelen);
 		ino = CRAMINO(de);
 		mode = de->mode;
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		nextoffset = offset + sizeof(*de) + namelen;
 		for (;;) {
 			if (!namelen) {
@@ -410,7 +411,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 	unsigned int offset = 0;
 	int sorted;
 
-	down(&read_mutex);
+	mutex_lock(&read_mutex);
 	sorted = CRAMFS_SB(dir->i_sb)->flags & CRAMFS_FLAG_SORTED_DIRS;
 	while (offset < dir->i_size) {
 		struct cramfs_inode *de;
@@ -433,7 +434,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 
 		for (;;) {
 			if (!namelen) {
-				up(&read_mutex);
+				mutex_unlock(&read_mutex);
 				return ERR_PTR(-EIO);
 			}
 			if (name[namelen-1])
@@ -447,7 +448,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 			continue;
 		if (!retval) {
 			struct cramfs_inode entry = *de;
-			up(&read_mutex);
+			mutex_unlock(&read_mutex);
 			d_add(dentry, get_cramfs_inode(dir->i_sb, &entry));
 			return NULL;
 		}
@@ -455,7 +456,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
 		if (sorted)
 			break;
 	}
-	up(&read_mutex);
+	mutex_unlock(&read_mutex);
 	d_add(dentry, NULL);
 	return NULL;
 }
@@ -474,21 +475,21 @@ static int cramfs_readpage(struct file *file, struct page * page)
 		u32 start_offset, compr_len;
 
 		start_offset = OFFSET(inode) + maxblock*4;
-		down(&read_mutex);
+		mutex_lock(&read_mutex);
 		if (page->index)
 			start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4, 4);
 		compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) - start_offset);
-		up(&read_mutex);
+		mutex_unlock(&read_mutex);
 		pgdata = kmap(page);
 		if (compr_len == 0)
 			; /* hole */
 		else {
-			down(&read_mutex);
+			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
 				 PAGE_CACHE_SIZE,
 				 cramfs_read(sb, start_offset, compr_len),
 				 compr_len);
-			up(&read_mutex);
+			mutex_unlock(&read_mutex);
 		}
 	} else
 		pgdata = kmap(page);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index f8274a8f83b..ef758cfa556 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/dcookies.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 /* The dcookies are allocated from a kmem_cache and
@@ -36,7 +37,7 @@ struct dcookie_struct {
 };
 
 static LIST_HEAD(dcookie_users);
-static DECLARE_MUTEX(dcookie_sem);
+static DEFINE_MUTEX(dcookie_mutex);
 static kmem_cache_t * dcookie_cache;
 static struct list_head * dcookie_hashtable;
 static size_t hash_size;
@@ -114,7 +115,7 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
 	int err = 0;
 	struct dcookie_struct * dcs;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	if (!is_live()) {
 		err = -EINVAL;
@@ -134,7 +135,7 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
 	*cookie = dcookie_value(dcs);
 
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return err;
 }
 
@@ -157,7 +158,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	if (!is_live()) {
 		err = -EINVAL;
@@ -192,7 +193,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 out_free:
 	kfree(kbuf);
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return err;
 }
 
@@ -290,7 +291,7 @@ struct dcookie_user * dcookie_register(void)
 {
 	struct dcookie_user * user;
 
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
 	if (!user)
@@ -302,7 +303,7 @@ struct dcookie_user * dcookie_register(void)
 	list_add(&user->next, &dcookie_users);
 
 out:
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 	return user;
 out_free:
 	kfree(user);
@@ -313,7 +314,7 @@ out_free:
 
 void dcookie_unregister(struct dcookie_user * user)
 {
-	down(&dcookie_sem);
+	mutex_lock(&dcookie_mutex);
 
 	list_del(&user->next);
 	kfree(user);
@@ -321,7 +322,7 @@ void dcookie_unregister(struct dcookie_user * user)
 	if (!is_live())
 		dcookie_exit();
 
-	up(&dcookie_sem);
+	mutex_unlock(&dcookie_mutex);
 }
 
 EXPORT_SYMBOL_GPL(dcookie_register);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 4db8be8e90c..5c63e0cdcf4 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -33,13 +33,14 @@
 	*/
 #define STREAM_END_SPACE 12
 
-static DECLARE_MUTEX(deflate_sem);
-static DECLARE_MUTEX(inflate_sem);
+static DEFINE_MUTEX(deflate_mutex);
+static DEFINE_MUTEX(inflate_mutex);
 static z_stream inf_strm, def_strm;
 
 #ifdef __KERNEL__ /* Linux-only */
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 
 static int __init alloc_workspaces(void)
 {
@@ -79,11 +80,11 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 	if (*dstlen <= STREAM_END_SPACE)
 		return -1;
 
-	down(&deflate_sem);
+	mutex_lock(&deflate_mutex);
 
 	if (Z_OK != zlib_deflateInit(&def_strm, 3)) {
 		printk(KERN_WARNING "deflateInit failed\n");
-		up(&deflate_sem);
+		mutex_unlock(&deflate_mutex);
 		return -1;
 	}
 
@@ -104,7 +105,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 		if (ret != Z_OK) {
 			D1(printk(KERN_DEBUG "deflate in loop returned %d\n", ret));
 			zlib_deflateEnd(&def_strm);
-			up(&deflate_sem);
+			mutex_unlock(&deflate_mutex);
 			return -1;
 		}
 	}
@@ -133,7 +134,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 	*sourcelen = def_strm.total_in;
 	ret = 0;
  out:
-	up(&deflate_sem);
+	mutex_unlock(&deflate_mutex);
 	return ret;
 }
 
@@ -145,7 +146,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 	int ret;
 	int wbits = MAX_WBITS;
 
-	down(&inflate_sem);
+	mutex_lock(&inflate_mutex);
 
 	inf_strm.next_in = data_in;
 	inf_strm.avail_in = srclen;
@@ -173,7 +174,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 
 	if (Z_OK != zlib_inflateInit2(&inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
-		up(&inflate_sem);
+		mutex_unlock(&inflate_mutex);
 		return 1;
 	}
 
@@ -183,7 +184,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
 		printk(KERN_NOTICE "inflate returned %d\n", ret);
 	}
 	zlib_inflateEnd(&inf_strm);
-	up(&inflate_sem);
+	mutex_unlock(&inflate_mutex);
         return 0;
 }
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0b348b13b55..3315f0b1fbc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -69,6 +69,7 @@
 #include <linux/bio.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -165,7 +166,7 @@ do {						\
  */
 static LIST_HEAD(jfs_external_logs);
 static struct jfs_log *dummy_log = NULL;
-static DECLARE_MUTEX(jfs_log_sem);
+static DEFINE_MUTEX(jfs_log_mutex);
 
 /*
  * forward references
@@ -1085,20 +1086,20 @@ int lmLogOpen(struct super_block *sb)
 	if (sbi->mntflag & JFS_INLINELOG)
 		return open_inline_log(sb);
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	list_for_each_entry(log, &jfs_external_logs, journal_list) {
 		if (log->bdev->bd_dev == sbi->logdev) {
 			if (memcmp(log->uuid, sbi->loguuid,
 				   sizeof(log->uuid))) {
 				jfs_warn("wrong uuid on JFS journal\n");
-				up(&jfs_log_sem);
+				mutex_unlock(&jfs_log_mutex);
 				return -EINVAL;
 			}
 			/*
 			 * add file system to log active file system list
 			 */
 			if ((rc = lmLogFileSystem(log, sbi, 1))) {
-				up(&jfs_log_sem);
+				mutex_unlock(&jfs_log_mutex);
 				return rc;
 			}
 			goto journal_found;
@@ -1106,7 +1107,7 @@ int lmLogOpen(struct super_block *sb)
 	}
 
 	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
-		up(&jfs_log_sem);
+		mutex_unlock(&jfs_log_mutex);
 		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&log->sb_list);
@@ -1151,7 +1152,7 @@ journal_found:
 	sbi->log = log;
 	LOG_UNLOCK(log);
 
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	return 0;
 
 	/*
@@ -1168,7 +1169,7 @@ journal_found:
 	blkdev_put(bdev);
 
       free:		/* free log descriptor */
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	kfree(log);
 
 	jfs_warn("lmLogOpen: exit(%d)", rc);
@@ -1212,11 +1213,11 @@ static int open_dummy_log(struct super_block *sb)
 {
 	int rc;
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	if (!dummy_log) {
 		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
 		if (!dummy_log) {
-			up(&jfs_log_sem);
+			mutex_unlock(&jfs_log_mutex);
 			return -ENOMEM;
 		}
 		INIT_LIST_HEAD(&dummy_log->sb_list);
@@ -1229,7 +1230,7 @@ static int open_dummy_log(struct super_block *sb)
 		if (rc) {
 			kfree(dummy_log);
 			dummy_log = NULL;
-			up(&jfs_log_sem);
+			mutex_unlock(&jfs_log_mutex);
 			return rc;
 		}
 	}
@@ -1238,7 +1239,7 @@ static int open_dummy_log(struct super_block *sb)
 	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
 	JFS_SBI(sb)->log = dummy_log;
 	LOG_UNLOCK(dummy_log);
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 
 	return 0;
 }
@@ -1466,7 +1467,7 @@ int lmLogClose(struct super_block *sb)
 
 	jfs_info("lmLogClose: log:0x%p", log);
 
-	down(&jfs_log_sem);
+	mutex_lock(&jfs_log_mutex);
 	LOG_LOCK(log);
 	list_del(&sbi->log_list);
 	LOG_UNLOCK(log);
@@ -1516,7 +1517,7 @@ int lmLogClose(struct super_block *sb)
 	kfree(log);
 
       out:
-	up(&jfs_log_sem);
+	mutex_unlock(&jfs_log_mutex);
 	jfs_info("lmLogClose: exit(%d)", rc);
 	return rc;
 }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 112ebf8b8df..729ac427d35 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/sm_inter.h>
+#include <linux/mutex.h>
 
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
@@ -30,7 +31,7 @@
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
 static int			nrhosts;
-static DECLARE_MUTEX(nlm_host_sema);
+static DEFINE_MUTEX(nlm_host_mutex);
 
 
 static void			nlm_gc_hosts(void);
@@ -71,7 +72,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 	hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
 
 	/* Lock hash table */
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 
 	if (time_after_eq(jiffies, next_gc))
 		nlm_gc_hosts();
@@ -91,7 +92,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 				nlm_hosts[hash] = host;
 			}
 			nlm_get_host(host);
-			up(&nlm_host_sema);
+			mutex_unlock(&nlm_host_mutex);
 			return host;
 		}
 	}
@@ -130,7 +131,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 		next_gc = 0;
 
 nohost:
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 	return host;
 }
 
@@ -141,19 +142,19 @@ nlm_find_client(void)
 	 * and return it
 	 */
 	int hash;
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 	for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) {
 		struct nlm_host *host, **hp;
 		for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
 			if (host->h_server &&
 			    host->h_killed == 0) {
 				nlm_get_host(host);
-				up(&nlm_host_sema);
+				mutex_unlock(&nlm_host_mutex);
 				return host;
 			}
 		}
 	}
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 	return NULL;
 }
 
@@ -265,7 +266,7 @@ nlm_shutdown_hosts(void)
 	int		i;
 
 	dprintk("lockd: shutting down host module\n");
-	down(&nlm_host_sema);
+	mutex_lock(&nlm_host_mutex);
 
 	/* First, make all hosts eligible for gc */
 	dprintk("lockd: nuking all hosts...\n");
@@ -276,7 +277,7 @@ nlm_shutdown_hosts(void)
 
 	/* Then, perform a garbage collection pass */
 	nlm_gc_hosts();
-	up(&nlm_host_sema);
+	mutex_unlock(&nlm_host_mutex);
 
 	/* complain if any hosts are left */
 	if (nrhosts) {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5e85bde6c12..fd56c8872f3 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <linux/mutex.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -43,13 +44,13 @@ static struct svc_program	nlmsvc_program;
 struct nlmsvc_binding *		nlmsvc_ops;
 EXPORT_SYMBOL(nlmsvc_ops);
 
-static DECLARE_MUTEX(nlmsvc_sema);
+static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int		nlmsvc_users;
 static pid_t			nlmsvc_pid;
 int				nlmsvc_grace_period;
 unsigned long			nlmsvc_timeout;
 
-static DECLARE_MUTEX_LOCKED(lockd_start);
+static DECLARE_COMPLETION(lockd_start_done);
 static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
 
 /*
@@ -112,7 +113,7 @@ lockd(struct svc_rqst *rqstp)
 	 * Let our maker know we're running.
 	 */
 	nlmsvc_pid = current->pid;
-	up(&lockd_start);
+	complete(&lockd_start_done);
 
 	daemonize("lockd");
 
@@ -215,7 +216,7 @@ lockd_up(void)
 	struct svc_serv *	serv;
 	int			error = 0;
 
-	down(&nlmsvc_sema);
+	mutex_lock(&nlmsvc_mutex);
 	/*
 	 * Unconditionally increment the user count ... this is
 	 * the number of clients who _want_ a lockd process.
@@ -263,7 +264,7 @@ lockd_up(void)
 			"lockd_up: create thread failed, error=%d\n", error);
 		goto destroy_and_out;
 	}
-	down(&lockd_start);
+	wait_for_completion(&lockd_start_done);
 
 	/*
 	 * Note: svc_serv structures have an initial use count of 1,
@@ -272,7 +273,7 @@ lockd_up(void)
 destroy_and_out:
 	svc_destroy(serv);
 out:
-	up(&nlmsvc_sema);
+	mutex_unlock(&nlmsvc_mutex);
 	return error;
 }
 EXPORT_SYMBOL(lockd_up);
@@ -285,7 +286,7 @@ lockd_down(void)
 {
 	static int warned;
 
-	down(&nlmsvc_sema);
+	mutex_lock(&nlmsvc_mutex);
 	if (nlmsvc_users) {
 		if (--nlmsvc_users)
 			goto out;
@@ -315,7 +316,7 @@ lockd_down(void)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 out:
-	up(&nlmsvc_sema);
+	mutex_unlock(&nlmsvc_mutex);
 }
 EXPORT_SYMBOL(lockd_down);
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c7a6e3ae44d..a570e5c8a93 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/time.h>
 #include <linux/in.h>
+#include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfsd/nfsfh.h>
@@ -28,7 +29,7 @@
 #define FILE_HASH_BITS		5
 #define FILE_NRHASH		(1<<FILE_HASH_BITS)
 static struct nlm_file *	nlm_files[FILE_NRHASH];
-static DECLARE_MUTEX(nlm_file_sema);
+static DEFINE_MUTEX(nlm_file_mutex);
 
 #ifdef NFSD_DEBUG
 static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
@@ -91,7 +92,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 	hash = file_hash(f);
 
 	/* Lock file table */
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 
 	for (file = nlm_files[hash]; file; file = file->f_next)
 		if (!nfs_compare_fh(&file->f_handle, f))
@@ -130,7 +131,7 @@ found:
 	nfserr = 0;
 
 out_unlock:
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 	return nfserr;
 
 out_free:
@@ -239,14 +240,14 @@ nlm_traverse_files(struct nlm_host *host, int action)
 	struct nlm_file	*file, **fp;
 	int		i;
 
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
 		fp = nlm_files + i;
 		while ((file = *fp) != NULL) {
 			/* Traverse locks, blocks and shares of this file
 			 * and update file->f_locks count */
 			if (nlm_inspect_file(host, file, action)) {
-				up(&nlm_file_sema);
+				mutex_unlock(&nlm_file_mutex);
 				return 1;
 			}
 
@@ -261,7 +262,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
 			}
 		}
 	}
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 	return 0;
 }
 
@@ -281,7 +282,7 @@ nlm_release_file(struct nlm_file *file)
 				file, file->f_count);
 
 	/* Lock file table */
-	down(&nlm_file_sema);
+	mutex_lock(&nlm_file_mutex);
 
 	/* If there are no more locks etc, delete the file */
 	if(--file->f_count == 0) {
@@ -289,7 +290,7 @@ nlm_release_file(struct nlm_file *file)
 			nlm_delete_file(file);
 	}
 
-	up(&nlm_file_sema);
+	mutex_unlock(&nlm_file_mutex);
 }
 
 /*
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 99d2cfbce86..90c95adc8c1 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
+#include <linux/mutex.h>
 
 #include <net/inet_sock.h>
 
@@ -31,7 +32,7 @@ struct nfs_callback_data {
 };
 
 static struct nfs_callback_data nfs_callback_info;
-static DECLARE_MUTEX(nfs_callback_sema);
+static DEFINE_MUTEX(nfs_callback_mutex);
 static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
@@ -95,7 +96,7 @@ int nfs_callback_up(void)
 	int ret = 0;
 
 	lock_kernel();
-	down(&nfs_callback_sema);
+	mutex_lock(&nfs_callback_mutex);
 	if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
 		goto out;
 	init_completion(&nfs_callback_info.started);
@@ -121,7 +122,7 @@ int nfs_callback_up(void)
 	nfs_callback_info.serv = serv;
 	wait_for_completion(&nfs_callback_info.started);
 out:
-	up(&nfs_callback_sema);
+	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
 	return ret;
 out_destroy:
@@ -139,7 +140,7 @@ int nfs_callback_down(void)
 	int ret = 0;
 
 	lock_kernel();
-	down(&nfs_callback_sema);
+	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
 	do {
 		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
@@ -147,7 +148,7 @@ int nfs_callback_down(void)
 		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
 			break;
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
-	up(&nfs_callback_sema);
+	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6ab762bea9..c7b87e92f91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
+#include <linux/mutex.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -77,11 +78,11 @@ static void nfs4_set_recdir(char *recdir);
 
 /* Locking:
  *
- * client_sema: 
+ * client_mutex:
  * 	protects clientid_hashtbl[], clientstr_hashtbl[],
  * 	unconfstr_hashtbl[], uncofid_hashtbl[].
  */
-static DECLARE_MUTEX(client_sema);
+static DEFINE_MUTEX(client_mutex);
 
 static kmem_cache_t *stateowner_slab = NULL;
 static kmem_cache_t *file_slab = NULL;
@@ -91,13 +92,13 @@ static kmem_cache_t *deleg_slab = NULL;
 void
 nfs4_lock_state(void)
 {
-	down(&client_sema);
+	mutex_lock(&client_mutex);
 }
 
 void
 nfs4_unlock_state(void)
 {
-	up(&client_sema);
+	mutex_unlock(&client_mutex);
 }
 
 static inline u32
diff --git a/fs/partitions/devfs.c b/fs/partitions/devfs.c
index 87f50444fd3..3f0a780c9ce 100644
--- a/fs/partitions/devfs.c
+++ b/fs/partitions/devfs.c
@@ -6,7 +6,7 @@
 #include <linux/vmalloc.h>
 #include <linux/genhd.h>
 #include <linux/bitops.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 
 struct unique_numspace {
@@ -16,7 +16,7 @@ struct unique_numspace {
 	struct semaphore  mutex;
 };
 
-static DECLARE_MUTEX(numspace_mutex);
+static DEFINE_MUTEX(numspace_mutex);
 
 static int expand_numspace(struct unique_numspace *s)
 {
@@ -48,7 +48,7 @@ static int alloc_unique_number(struct unique_numspace *s)
 {
 	int rval = 0;
 
-	down(&numspace_mutex);
+	mutex_lock(&numspace_mutex);
 	if (s->num_free < 1)
 		rval = expand_numspace(s);
 	if (!rval) {
@@ -56,7 +56,7 @@ static int alloc_unique_number(struct unique_numspace *s)
 		--s->num_free;
 		__set_bit(rval, s->bits);
 	}
-	up(&numspace_mutex);
+	mutex_unlock(&numspace_mutex);
 
 	return rval;
 }
@@ -66,11 +66,11 @@ static void dealloc_unique_number(struct unique_numspace *s, int number)
 	int old_val;
 
 	if (number >= 0) {
-		down(&numspace_mutex);
+		mutex_lock(&numspace_mutex);
 		old_val = __test_and_clear_bit(number, s->bits);
 		if (old_val)
 			++s->num_free;
-		up(&numspace_mutex);
+		mutex_unlock(&numspace_mutex);
 	}
 }
 
diff --git a/fs/super.c b/fs/super.c
index 8743e9bbb29..a66f66bb804 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <linux/kobject.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 
@@ -380,9 +381,9 @@ restart:
 void sync_filesystems(int wait)
 {
 	struct super_block *sb;
-	static DECLARE_MUTEX(mutex);
+	static DEFINE_MUTEX(mutex);
 
-	down(&mutex);		/* Could be down_interruptible */
+	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
@@ -411,7 +412,7 @@ restart:
 			goto restart;
 	}
 	spin_unlock(&sb_lock);
-	up(&mutex);
+	mutex_unlock(&mutex);
 }
 
 /**
-- 
cgit v1.2.3


From 3978d7179d3849848df8a37dd0a5acc20bcb8750 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 26 Mar 2006 01:37:17 -0800
Subject: [PATCH] Make address_space_operations->sync_page return void

The only user ignores the return value, and the only instanace
(block_sync_page) always returns 0...

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c    | 3 +--
 fs/cifs/file.c | 6 ++++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b3ab528192..0b9456fd074 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3007,7 +3007,7 @@ out:
 }
 EXPORT_SYMBOL(try_to_free_buffers);
 
-int block_sync_page(struct page *page)
+void block_sync_page(struct page *page)
 {
 	struct address_space *mapping;
 
@@ -3015,7 +3015,6 @@ int block_sync_page(struct page *page)
 	mapping = page_mapping(page);
 	if (mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, page);
-	return 0;
 }
 
 /*
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 165d6742638..fb49aef1f2e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1339,7 +1339,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return rc;
 }
 
-/* static int cifs_sync_page(struct page *page)
+/* static void cifs_sync_page(struct page *page)
 {
 	struct address_space *mapping;
 	struct inode *inode;
@@ -1353,16 +1353,18 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 		return 0;
 	inode = mapping->host;
 	if (!inode)
-		return 0; */
+		return; */
 
 /*	fill in rpages then 
 	result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
 
 /*	cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index));
 
+#if 0
 	if (rc < 0)
 		return rc;
 	return 0;
+#endif
 } */
 
 /*
-- 
cgit v1.2.3


From 2ff28e22bdb8727fbc7d7889807bc5a73aae56c5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 26 Mar 2006 01:37:18 -0800
Subject: [PATCH] Make address_space_operations->invalidatepage return void

The return value of this function is never used, so let's be honest and
declare it as void.

Some places where invalidatepage returned 0, I have inserted comments
suggesting a BUG_ON.

[akpm@osdl.org: JBD BUG fix]
[akpm@osdl.org: rework for git-nfs]
[akpm@osdl.org: don't go BUG in block_invalidate_page()]
Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/file.c               |  6 +++---
 fs/buffer.c                 | 18 ++++++++----------
 fs/ext3/inode.c             |  4 ++--
 fs/jbd/transaction.c        | 13 +++++--------
 fs/jfs/jfs_metapage.c       |  7 +++----
 fs/nfs/file.c               |  3 +--
 fs/reiserfs/inode.c         |  8 +++++---
 fs/xfs/linux-2.6/xfs_aops.c |  4 ++--
 8 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 150b1922792..7bb716887e2 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -28,7 +28,7 @@ static int afs_file_release(struct inode *inode, struct file *file);
 #endif
 
 static int afs_file_readpage(struct file *file, struct page *page);
-static int afs_file_invalidatepage(struct page *page, unsigned long offset);
+static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
 
 struct inode_operations afs_file_inode_operations = {
@@ -212,7 +212,7 @@ int afs_cache_get_page_cookie(struct page *page,
 /*
  * invalidate part or all of a page
  */
-static int afs_file_invalidatepage(struct page *page, unsigned long offset)
+static void afs_file_invalidatepage(struct page *page, unsigned long offset)
 {
 	int ret = 1;
 
@@ -238,11 +238,11 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset)
 			if (!PageWriteback(page))
 				ret = page->mapping->a_ops->releasepage(page,
 									0);
+			/* possibly should BUG_ON(!ret); - neilb */
 		}
 	}
 
 	_leave(" = %d", ret);
-	return ret;
 } /* end afs_file_invalidatepage() */
 
 /*****************************************************************************/
diff --git a/fs/buffer.c b/fs/buffer.c
index 0b9456fd074..f25f5809642 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1593,11 +1593,10 @@ EXPORT_SYMBOL(try_to_release_page);
  * point.  Because the caller is about to free (and possibly reuse) those
  * blocks on-disk.
  */
-int block_invalidatepage(struct page *page, unsigned long offset)
+void block_invalidatepage(struct page *page, unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	unsigned int curr_off = 0;
-	int ret = 1;
 
 	BUG_ON(!PageLocked(page));
 	if (!page_has_buffers(page))
@@ -1624,19 +1623,18 @@ int block_invalidatepage(struct page *page, unsigned long offset)
 	 * so real IO is not possible anymore.
 	 */
 	if (offset == 0)
-		ret = try_to_release_page(page, 0);
+		try_to_release_page(page, 0);
 out:
-	return ret;
+	return;
 }
 EXPORT_SYMBOL(block_invalidatepage);
 
-int do_invalidatepage(struct page *page, unsigned long offset)
+void do_invalidatepage(struct page *page, unsigned long offset)
 {
-	int (*invalidatepage)(struct page *, unsigned long);
-	invalidatepage = page->mapping->a_ops->invalidatepage;
-	if (invalidatepage == NULL)
-		invalidatepage = block_invalidatepage;
-	return (*invalidatepage)(page, offset);
+	void (*invalidatepage)(struct page *, unsigned long);
+	invalidatepage = page->mapping->a_ops->invalidatepage ? :
+		block_invalidatepage;
+	(*invalidatepage)(page, offset);
 }
 
 /*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c361377e0a..76e22c9c9c6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1430,7 +1430,7 @@ ext3_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
 }
 
-static int ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext3_invalidatepage(struct page *page, unsigned long offset)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
@@ -1440,7 +1440,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
 	if (offset == 0)
 		ClearPageChecked(page);
 
-	return journal_invalidatepage(journal, page, offset);
+	journal_invalidatepage(journal, page, offset);
 }
 
 static int ext3_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ada31fa272e..c609f5034fc 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1873,16 +1873,15 @@ zap_buffer_unlocked:
 }
 
 /** 
- * int journal_invalidatepage() 
+ * void journal_invalidatepage()
  * @journal: journal to use for flush... 
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
  * Reap page buffers containing data after offset in page.
  *
- * Return non-zero if the page's buffers were successfully reaped.
  */
-int journal_invalidatepage(journal_t *journal, 
+void journal_invalidatepage(journal_t *journal,
 		      struct page *page, 
 		      unsigned long offset)
 {
@@ -1893,7 +1892,7 @@ int journal_invalidatepage(journal_t *journal,
 	if (!PageLocked(page))
 		BUG();
 	if (!page_has_buffers(page))
-		return 1;
+		return;
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be
@@ -1916,11 +1915,9 @@ int journal_invalidatepage(journal_t *journal,
 	} while (bh != head);
 
 	if (!offset) {
-		if (!may_free || !try_to_free_buffers(page))
-			return 0;
-		J_ASSERT(!page_has_buffers(page));
+		if (may_free && try_to_free_buffers(page))
+			J_ASSERT(!page_has_buffers(page));
 	}
-	return 1;
 }
 
 /* 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 5fbaeaadccd..8508043849f 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -578,14 +578,13 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 	return 0;
 }
 
-static int metapage_invalidatepage(struct page *page, unsigned long offset)
+static void metapage_invalidatepage(struct page *page, unsigned long offset)
 {
 	BUG_ON(offset);
 
-	if (PageWriteback(page))
-		return 0;
+	BUG_ON(PageWriteback(page));
 
-	return metapage_releasepage(page, 0);
+	metapage_releasepage(page, 0);
 }
 
 struct address_space_operations jfs_metapage_aops = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5263b2864a4..dee49a0cb99 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -318,10 +318,9 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 	return status;
 }
 
-static int nfs_invalidate_page(struct page *page, unsigned long offset)
+static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
 	/* FIXME: we really should cancel any unstarted writes on this page */
-	return 1;
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d60f6238c66..62e18c19b44 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2793,7 +2793,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
 }
 
 /* clm -- taken from fs/buffer.c:block_invalidate_page */
-static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
+static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	struct inode *inode = page->mapping->host;
@@ -2832,10 +2832,12 @@ static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
 	 * The get_block cached value has been unconditionally invalidated,
 	 * so real IO is not possible anymore.
 	 */
-	if (!offset && ret)
+	if (!offset && ret) {
 		ret = try_to_release_page(page, 0);
+		/* maybe should BUG_ON(!ret); - neilb */
+	}
       out:
-	return ret;
+	return;
 }
 
 static int reiserfs_set_page_dirty(struct page *page)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 97fc056130e..4f2476f188b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1442,14 +1442,14 @@ xfs_vm_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
 }
 
-STATIC int
+STATIC void
 xfs_vm_invalidatepage(
 	struct page		*page,
 	unsigned long		offset)
 {
 	xfs_page_trace(XFS_INVALIDPAGE_ENTER,
 			page->mapping->host, page, offset);
-	return block_invalidatepage(page, offset);
+	block_invalidatepage(page, offset);
 }
 
 struct address_space_operations xfs_address_space_operations = {
-- 
cgit v1.2.3


From fa3536cc144c1298f2ed9416c33f3b77fa2cd37a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 26 Mar 2006 01:37:24 -0800
Subject: [PATCH] Use __read_mostly on some hot fs variables

I discovered on oprofile hunting on a SMP platform that dentry lookups were
slowed down because d_hash_mask, d_hash_shift and dentry_hashtable were in
a cache line that contained inodes_stat.  So each time inodes_stats is
changed by a cpu, other cpus have to refill their cache line.

This patch moves some variables to the __read_mostly section, in order to
avoid false sharing.  RCU dentry lookups can go full speed.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c       |  4 ++--
 fs/block_dev.c |  4 ++--
 fs/dcache.c    | 14 +++++++-------
 fs/dcookies.c  |  6 +++---
 fs/dnotify.c   |  4 ++--
 fs/eventpoll.c |  6 +++---
 fs/fcntl.c     |  4 ++--
 fs/inode.c     |  8 ++++----
 fs/inotify.c   | 12 ++++++------
 fs/locks.c     |  2 +-
 fs/namespace.c |  4 ++--
 fs/pipe.c      |  2 +-
 12 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 73e664c01d3..49db9286a3b 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab;
+static kmem_cache_t *bio_slab __read_mostly;
 
 #define BIOVEC_NR_POOLS 6
 
@@ -39,7 +39,7 @@ static kmem_cache_t *bio_slab;
  * basically we just need to survive
  */
 #define BIO_SPLIT_ENTRIES 8	
-mempool_t *bio_split_pool;
+mempool_t *bio_split_pool __read_mostly;
 
 struct biovec_slab {
 	int nr_vecs;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 573fc8e0b67..9a451a9ffad 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -234,7 +234,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static kmem_cache_t * bdev_cachep;
+static kmem_cache_t * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
@@ -308,7 +308,7 @@ static struct file_system_type bd_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static struct vfsmount *bd_mnt;
+static struct vfsmount *bd_mnt __read_mostly;
 struct super_block *blockdev_superblock;
 
 void __init bdev_cache_init(void)
diff --git a/fs/dcache.c b/fs/dcache.c
index 93958464850..aaca5e7970b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -36,7 +36,7 @@
 
 /* #define DCACHE_DEBUG 1 */
 
-int sysctl_vfs_cache_pressure = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
@@ -44,7 +44,7 @@ static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache; 
+static kmem_cache_t *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -59,9 +59,9 @@ static kmem_cache_t *dentry_cache;
 #define D_HASHBITS     d_hash_shift
 #define D_HASHMASK     d_hash_mask
 
-static unsigned int d_hash_mask;
-static unsigned int d_hash_shift;
-static struct hlist_head *dentry_hashtable;
+static unsigned int d_hash_mask __read_mostly;
+static unsigned int d_hash_shift __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
 static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
@@ -1719,10 +1719,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep;
+kmem_cache_t *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep;
+kmem_cache_t *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ef758cfa556..8749339bf4f 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -38,9 +38,9 @@ struct dcookie_struct {
 
 static LIST_HEAD(dcookie_users);
 static DEFINE_MUTEX(dcookie_mutex);
-static kmem_cache_t * dcookie_cache;
-static struct list_head * dcookie_hashtable;
-static size_t hash_size;
+static kmem_cache_t *dcookie_cache __read_mostly;
+static struct list_head *dcookie_hashtable __read_mostly;
+static size_t hash_size __read_mostly;
 
 static inline int is_live(void)
 {
diff --git a/fs/dnotify.c b/fs/dnotify.c
index f3b540dd5d1..f932591df5a 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -21,9 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 
-int dir_notify_enable = 1;
+int dir_notify_enable __read_mostly = 1;
 
-static kmem_cache_t *dn_cache;
+static kmem_cache_t *dn_cache __read_mostly;
 
 static void redo_inode_mask(struct inode *inode)
 {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a0f682cdd03..e067a06c646 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -281,13 +281,13 @@ static struct mutex epmutex;
 static struct poll_safewake psw;
 
 /* Slab cache used to allocate "struct epitem" */
-static kmem_cache_t *epi_cache;
+static kmem_cache_t *epi_cache __read_mostly;
 
 /* Slab cache used to allocate "struct eppoll_entry" */
-static kmem_cache_t *pwq_cache;
+static kmem_cache_t *pwq_cache __read_mostly;
 
 /* Virtual fs used to allocate inodes for eventpoll files */
-static struct vfsmount *eventpoll_mnt;
+static struct vfsmount *eventpoll_mnt __read_mostly;
 
 /* File callbacks that implement the eventpoll file behaviour */
 static struct file_operations eventpoll_fops = {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 03c789560fb..2a2479196f9 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -412,7 +412,7 @@ out:
 
 /* Table to convert sigio signal codes into poll band bitmaps */
 
-static long band_table[NSIGPOLL] = {
+static const long band_table[NSIGPOLL] = {
 	POLLIN | POLLRDNORM,			/* POLL_IN */
 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
@@ -531,7 +531,7 @@ int send_sigurg(struct fown_struct *fown)
 }
 
 static DEFINE_RWLOCK(fasync_lock);
-static kmem_cache_t *fasync_cache;
+static kmem_cache_t *fasync_cache __read_mostly;
 
 /*
  * fasync_helper() is used by some character device drivers (mainly mice)
diff --git a/fs/inode.c b/fs/inode.c
index 85da11044ad..1fddf2803af 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -56,8 +56,8 @@
 #define I_HASHBITS	i_hash_shift
 #define I_HASHMASK	i_hash_mask
 
-static unsigned int i_hash_mask;
-static unsigned int i_hash_shift;
+static unsigned int i_hash_mask __read_mostly;
+static unsigned int i_hash_shift __read_mostly;
 
 /*
  * Each inode can be on two separate lists. One is
@@ -73,7 +73,7 @@ static unsigned int i_hash_shift;
 
 LIST_HEAD(inode_in_use);
 LIST_HEAD(inode_unused);
-static struct hlist_head *inode_hashtable;
+static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
  * A simple spinlock to protect the list manipulations.
@@ -98,7 +98,7 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep;
+static kmem_cache_t * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
diff --git a/fs/inotify.c b/fs/inotify.c
index a61e93e1785..f48a3dae071 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -39,15 +39,15 @@
 
 static atomic_t inotify_cookie;
 
-static kmem_cache_t *watch_cachep;
-static kmem_cache_t *event_cachep;
+static kmem_cache_t *watch_cachep __read_mostly;
+static kmem_cache_t *event_cachep __read_mostly;
 
-static struct vfsmount *inotify_mnt;
+static struct vfsmount *inotify_mnt __read_mostly;
 
 /* these are configurable via /proc/sys/fs/inotify/ */
-int inotify_max_user_instances;
-int inotify_max_user_watches;
-int inotify_max_queued_events;
+int inotify_max_user_instances __read_mostly;
+int inotify_max_user_watches __read_mostly;
+int inotify_max_queued_events __read_mostly;
 
 /*
  * Lock ordering:
diff --git a/fs/locks.c b/fs/locks.c
index 56f996e98bb..709450a7b89 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,7 +142,7 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
-static kmem_cache_t *filelock_cache;
+static kmem_cache_t *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
diff --git a/fs/namespace.c b/fs/namespace.c
index 71e75bcf4d2..e069a4c5e38 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -43,9 +43,9 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
 static int event;
 
-static struct list_head *mount_hashtable;
+static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
-static kmem_cache_t *mnt_cache;
+static kmem_cache_t *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
 /* /sys/fs */
diff --git a/fs/pipe.c b/fs/pipe.c
index d976866a115..4384c929094 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -675,7 +675,7 @@ fail_page:
 	return NULL;
 }
 
-static struct vfsmount *pipe_mnt;
+static struct vfsmount *pipe_mnt __read_mostly;
 static int pipefs_delete_dentry(struct dentry *dentry)
 {
 	return 1;
-- 
cgit v1.2.3


From 6dc0fe8f8b40854982929e4f24d8c65115769b60 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Sun, 26 Mar 2006 01:37:24 -0800
Subject: [PATCH] VFS,fs/locks.c: cleanup locks_insert_block

BUG instead of handling a case that should never happen.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 709450a7b89..4badf6a0e7b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -533,12 +533,7 @@ static void locks_delete_block(struct file_lock *waiter)
 static void locks_insert_block(struct file_lock *blocker, 
 			       struct file_lock *waiter)
 {
-	if (!list_empty(&waiter->fl_block)) {
-		printk(KERN_ERR "locks_insert_block: removing duplicated lock "
-			"(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid,
-			waiter->fl_start, waiter->fl_end, waiter->fl_type);
-		__locks_delete_block(waiter);
-	}
+	BUG_ON(!list_empty(&waiter->fl_block));
 	list_add_tail(&waiter->fl_block, &blocker->fl_block);
 	waiter->fl_next = blocker;
 	if (IS_POSIX(blocker))
-- 
cgit v1.2.3


From 5842add2f3b519111b6401f3a35862bd00a3aa7e Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Sun, 26 Mar 2006 01:37:26 -0800
Subject: [PATCH] VFS,fs/locks.c,NFSD4: add race_free posix_lock_file_conf()
 interface

Lockd and the NFSv4 server both exercise a race condition where
posix_test_lock() is called either before or after posix_lock_file() to
deal with a denied lock request due to a conflicting lock.

Remove the race condition for the NFSv4 server by adding a new conflicting
lock parameter to __posix_lock_file() , changing the name to
__posix_lock_file_conf().

Keep posix_lock_file() interface, add posix_lock_conf() interface, both
call __posix_lock_file_conf().

[akpm@osdl.org: Put the EXPORT_SYMBOL() where it belongs]
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 4badf6a0e7b..4d9e71d43e7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -792,9 +792,7 @@ out:
 	return error;
 }
 
-EXPORT_SYMBOL(posix_lock_file);
-
-static int __posix_lock_file(struct inode *inode, struct file_lock *request)
+static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
 	struct file_lock *new_fl, *new_fl2;
@@ -818,6 +816,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 				continue;
 			if (!posix_locks_conflict(request, fl))
 				continue;
+			if (conflock)
+				locks_copy_lock(conflock, fl);
 			error = -EAGAIN;
 			if (!(request->fl_flags & FL_SLEEP))
 				goto out;
@@ -987,8 +987,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
  */
 int posix_lock_file(struct file *filp, struct file_lock *fl)
 {
-	return __posix_lock_file(filp->f_dentry->d_inode, fl);
+	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL);
+}
+EXPORT_SYMBOL(posix_lock_file);
+
+/**
+ * posix_lock_file_conf - Apply a POSIX-style lock to a file
+ * @filp: The file to apply the lock to
+ * @fl: The lock to be applied
+ * @conflock: Place to return a copy of the conflicting lock, if found.
+ *
+ * Except for the conflock parameter, acts just like posix_lock_file.
+ */
+int posix_lock_file_conf(struct file *filp, struct file_lock *fl,
+			struct file_lock *conflock)
+{
+	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock);
 }
+EXPORT_SYMBOL(posix_lock_file_conf);
 
 /**
  * posix_lock_file_wait - Apply a POSIX-style lock to a file
@@ -1004,7 +1020,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = __posix_lock_file(filp->f_dentry->d_inode, fl);
+		error = posix_lock_file(filp, fl);
 		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1076,7 +1092,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 	fl.fl_end = offset + count - 1;
 
 	for (;;) {
-		error = __posix_lock_file(inode, &fl);
+		error = __posix_lock_file_conf(inode, &fl, NULL);
 		if (error != -EAGAIN)
 			break;
 		if (!(fl.fl_flags & FL_SLEEP))
@@ -1689,7 +1705,7 @@ again:
 		error = filp->f_op->lock(filp, cmd, file_lock);
 	else {
 		for (;;) {
-			error = __posix_lock_file(inode, file_lock);
+			error = posix_lock_file(filp, file_lock);
 			if ((error != -EAGAIN) || (cmd == F_SETLK))
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
@@ -1832,7 +1848,7 @@ again:
 		error = filp->f_op->lock(filp, cmd, file_lock);
 	else {
 		for (;;) {
-			error = __posix_lock_file(inode, file_lock);
+			error = posix_lock_file(filp, file_lock);
 			if ((error != -EAGAIN) || (cmd == F_SETLK64))
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
-- 
cgit v1.2.3


From eb76b3fda1f7c2aa2d1523b36835048a15e5e5d2 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Sun, 26 Mar 2006 01:37:26 -0800
Subject: [PATCH] NFSD4: return conflict lock without races

Update the NFSv4 server to use the new posix_lock_file_conf() interface.
Remove unnecessary (and race-prone) posix_test_file() calls.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c7b87e92f91..47ec112b266 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2750,37 +2750,31 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	* Note: locks.c uses the BKL to protect the inode's lock list.
 	*/
 
-	status = posix_lock_file(filp, &file_lock);
-	dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
+	/* XXX?: Just to divert the locks_release_private at the start of
+	 * locks_copy_lock: */
+	conflock.fl_ops = NULL;
+	conflock.fl_lmops = NULL;
+	status = posix_lock_file_conf(filp, &file_lock, &conflock);
+	dprintk("NFSD: nfsd4_lock: posix_lock_file_conf status %d\n",status);
 	switch (-status) {
 	case 0: /* success! */
 		update_stateid(&lock_stp->st_stateid);
 		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 
 				sizeof(stateid_t));
-		goto out;
-	case (EAGAIN):
-		goto conflicting_lock;
+		break;
+	case (EAGAIN):		/* conflock holds conflicting lock */
+		status = nfserr_denied;
+		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
+		nfs4_set_lock_denied(&conflock, &lock->lk_denied);
+		break;
 	case (EDEADLK):
 		status = nfserr_deadlock;
-		dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
-		goto out;
+		break;
 	default:        
-		status = nfserrno(status);
-		dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
-		goto out;
-	}
-
-conflicting_lock:
-	dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
-	status = nfserr_denied;
-	/* XXX There is a race here. Future patch needed to provide 
-	 * an atomic posix_lock_and_test_file
-	 */
-	if (!posix_test_lock(filp, &file_lock, &conflock)) {
-		status = nfserr_serverfault;
-		goto out;
+		dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",status);
+		status = nfserr_resource;
+		break;
 	}
-	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
-- 
cgit v1.2.3


From 0eaae62abaa1ad1f231932b6cdd9fb1b91df6651 Mon Sep 17 00:00:00 2001
From: Matthew Dobson <colpatch@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:47 -0800
Subject: [PATCH] mempool: use common mempool kmalloc allocator

This patch changes several mempool users, all of which are basically just
wrappers around kmalloc(), to use the common mempool_kmalloc/kfree, rather
than their own wrapper function, removing a bunch of duplicated code.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 49db9286a3b..377046d8294 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1125,16 +1125,6 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	return bp;
 }
 
-static void *bio_pair_alloc(gfp_t gfp_flags, void *data)
-{
-	return kmalloc(sizeof(struct bio_pair), gfp_flags);
-}
-
-static void bio_pair_free(void *bp, void *data)
-{
-	kfree(bp);
-}
-
 
 /*
  * create memory pools for biovec's in a bio_set.
@@ -1254,8 +1244,8 @@ static int __init init_bio(void)
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
-	bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES,
-				bio_pair_alloc, bio_pair_free, NULL);
+	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
+						     sizeof(struct bio_pair));
 	if (!bio_split_pool)
 		panic("bio: can't create split pool\n");
 
-- 
cgit v1.2.3


From 93d2341c750cda0df48a6cc67b35fe25f1ec47df Mon Sep 17 00:00:00 2001
From: Matthew Dobson <colpatch@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:50 -0800
Subject: [PATCH] mempool: use mempool_create_slab_pool()

Modify well over a dozen mempool users to call mempool_create_slab_pool()
rather than calling mempool_create() with extra arguments, saving about 30
lines of code and increasing readability.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c                     |  7 ++-----
 fs/cifs/cifsfs.c             | 18 ++++++------------
 fs/jfs/jfs_metapage.c        |  4 ++--
 fs/nfs/read.c                |  6 ++----
 fs/nfs/write.c               | 12 ++++--------
 fs/xfs/linux-2.6/xfs_super.c |  5 ++---
 6 files changed, 18 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 377046d8294..eb8fbc53f2c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1141,8 +1141,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
 		if (i >= scale)
 			pool_entries >>= 1;
 
-		*bvp = mempool_create(pool_entries, mempool_alloc_slab,
-					mempool_free_slab, bp->slab);
+		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
 		if (!*bvp)
 			return -ENOMEM;
 	}
@@ -1179,9 +1178,7 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
 	if (!bs)
 		return NULL;
 
-	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
-			mempool_free_slab, bio_slab);
-
+	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 221b3334b73..6b99b51d669 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -738,10 +738,8 @@ cifs_init_request_bufs(void)
 		cERROR(1,("cifs_min_rcv set to maximum (64)"));
 	}
 
-	cifs_req_poolp = mempool_create(cifs_min_rcv,
-					mempool_alloc_slab,
-					mempool_free_slab,
-					cifs_req_cachep);
+	cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
+						  cifs_req_cachep);
 
 	if(cifs_req_poolp == NULL) {
 		kmem_cache_destroy(cifs_req_cachep);
@@ -771,10 +769,8 @@ cifs_init_request_bufs(void)
 		cFYI(1,("cifs_min_small set to maximum (256)"));
 	}
 
-	cifs_sm_req_poolp = mempool_create(cifs_min_small,
-				mempool_alloc_slab,
-				mempool_free_slab,
-				cifs_sm_req_cachep);
+	cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
+						     cifs_sm_req_cachep);
 
 	if(cifs_sm_req_poolp == NULL) {
 		mempool_destroy(cifs_req_poolp);
@@ -808,10 +804,8 @@ cifs_init_mids(void)
 	if (cifs_mid_cachep == NULL)
 		return -ENOMEM;
 
-	cifs_mid_poolp = mempool_create(3 /* a reasonable min simultan opers */,
-					mempool_alloc_slab,
-					mempool_free_slab,
-					cifs_mid_cachep);
+	/* 3 is a reasonable minimum number of simultaneous operations */
+	cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep);
 	if(cifs_mid_poolp == NULL) {
 		kmem_cache_destroy(cifs_mid_cachep);
 		return -ENOMEM;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 8508043849f..f28696f235c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -220,8 +220,8 @@ int __init metapage_init(void)
 	if (metapage_cache == NULL)
 		return -ENOMEM;
 
-	metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
-					  mempool_free_slab, metapage_cache);
+	metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
+						    metapage_cache);
 
 	if (metapage_mempool == NULL) {
 		kmem_cache_destroy(metapage_cache);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3961524fd4a..624ca7146b6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -663,10 +663,8 @@ int nfs_init_readpagecache(void)
 	if (nfs_rdata_cachep == NULL)
 		return -ENOMEM;
 
-	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_rdata_cachep);
+	nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
+						     nfs_rdata_cachep);
 	if (nfs_rdata_mempool == NULL)
 		return -ENOMEM;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3f5225404c9..4cfada2cc09 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1521,17 +1521,13 @@ int nfs_init_writepagecache(void)
 	if (nfs_wdata_cachep == NULL)
 		return -ENOMEM;
 
-	nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_wdata_cachep);
+	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
+						     nfs_wdata_cachep);
 	if (nfs_wdata_mempool == NULL)
 		return -ENOMEM;
 
-	nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
-					   mempool_alloc_slab,
-					   mempool_free_slab,
-					   nfs_wdata_cachep);
+	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
+						      nfs_wdata_cachep);
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8355faf8ffd..1884300417e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -375,9 +375,8 @@ xfs_init_zones(void)
 	if (!xfs_ioend_zone)
 		goto out_destroy_vnode_zone;
 
-	xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
-					mempool_alloc_slab, mempool_free_slab,
-					xfs_ioend_zone);
+	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+						  xfs_ioend_zone);
 	if (!xfs_ioend_pool)
 		goto out_free_ioend_zone;
 	return 0;
-- 
cgit v1.2.3


From 5515eff811cb807f0d3221a6e8cc223c7850d205 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 26 Mar 2006 01:37:53 -0800
Subject: [PATCH] 2tb-files-add-blkcnt_t-fixes

Cc: Takashi Sato <sho@tnes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/inode.c    | 6 +++---
 fs/cifs/readdir.c  | 8 ++++----
 fs/ocfs2/journal.c | 8 +++++---
 fs/ocfs2/namei.c   | 5 +++--
 4 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ff93a9f81d1..598eec9778f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -163,9 +163,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 
 		if (num_of_bytes < end_of_file)
 			cFYI(1, ("allocation size less than end of file"));
-		cFYI(1,
-		     ("Size %ld and blocks %ld",
-		      (unsigned long) inode->i_size, inode->i_blocks));
+		cFYI(1, ("Size %ld and blocks %llu",
+			(unsigned long) inode->i_size,
+			(unsigned long long)inode->i_blocks));
 		if (S_ISREG(inode->i_mode)) {
 			cFYI(1, ("File inode"));
 			inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index edb3b6eb34b..488bd0d81dc 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -197,10 +197,10 @@ static void fill_in_inode(struct inode *tmp_inode,
 
 	if (allocation_size < end_of_file)
 		cFYI(1, ("May be sparse file, allocation less than file size"));
-	cFYI(1,
-	     ("File Size %ld and blocks %ld and blocksize %ld",
-	      (unsigned long)tmp_inode->i_size, tmp_inode->i_blocks,
-	      tmp_inode->i_blksize));
+	cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld",
+		(unsigned long)tmp_inode->i_size,
+		(unsigned long long)tmp_inode->i_blocks,
+		tmp_inode->i_blksize));
 	if (S_ISREG(tmp_inode->i_mode)) {
 		cFYI(1, ("File inode"));
 		tmp_inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ae3440ca083..90641929a43 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -582,7 +582,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	}
 
 	mlog(0, "inode->i_size = %lld\n", inode->i_size);
-	mlog(0, "inode->i_blocks = %lu\n", inode->i_blocks);
+	mlog(0, "inode->i_blocks = %llu\n",
+			(unsigned long long)inode->i_blocks);
 	mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
 
 	/* call the kernels journal init function now */
@@ -850,8 +851,9 @@ static int ocfs2_force_read_journal(struct inode *inode)
 
 	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
 
-	mlog(0, "Force reading %lu blocks\n",
-	     (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9)));
+	mlog(0, "Force reading %llu blocks\n",
+		(unsigned long long)(inode->i_blocks >>
+			(inode->i_sb->s_blocksize_bits - 9)));
 
 	v_blkno = 0;
 	while (v_blkno <
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 274f61d0cda..0673862c8bd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1444,8 +1444,9 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 	 * write i_size + 1 bytes. */
 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 
-	mlog_entry("i_blocks = %lu, i_size = %llu, blocks = %d\n",
-		       inode->i_blocks, i_size_read(inode), blocks);
+	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
+			(unsigned long long)inode->i_blocks,
+			i_size_read(inode), blocks);
 
 	/* Sanity check -- make sure we're going to fit. */
 	if (bytes_left >
-- 
cgit v1.2.3


From 89747d369d34e333b9b60f10f333a0b727b4e4e2 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:55 -0800
Subject: [PATCH] ext3_get_blocks: Mapping multiple blocks at a once

Currently ext3_get_block() only maps or allocates one block at a time.  This
is quite inefficient for sequential IO workload.

I have posted a early implements a simply multiple block map and allocation
with current ext3.  The basic idea is allocating the 1st block in the existing
way, and attempting to allocate the next adjacent blocks on a best effort
basis.  More description about the implementation could be found here:
http://marc.theaimsgroup.com/?l=ext2-devel&m=112162230003522&w=2

The following the latest version of the patch: break the original patch into 5
patches, re-worked some logicals, and fixed some bugs.  The break ups are:

 [patch 1] Adding map multiple blocks at a time in ext3_get_blocks()
 [patch 2] Extend ext3_get_blocks() to support multiple block allocation
 [patch 3] Implement multiple block allocation in ext3-try-to-allocate
 (called via ext3_new_block()).
 [patch 4] Proper accounting updates in ext3_new_blocks()
 [patch 5] Adjust reservation window size properly (by the given number
 of blocks to allocate) before block allocation to increase the
 possibility of allocating multiple blocks in a single call.

Tests done so far includes fsx,tiobench and dbench.  The following numbers
collected from Direct IO tests (1G file creation/read) shows the system time
have been greatly reduced (more than 50% on my 8 cpu system) with the patches.

 1G file DIO write:
 	2.6.15		2.6.15+patches
 real    0m31.275s	0m31.161s
 user    0m0.000s	0m0.000s
 sys     0m3.384s	0m0.564s

 1G file DIO read:
 	2.6.15		2.6.15+patches
 real    0m30.733s	0m30.624s
 user    0m0.000s	0m0.004s
 sys     0m0.748s	0m0.380s

Some previous test we did on buffered IO with using multiple blocks allocation
and delayed allocation shows noticeable improvement on throughput and system
time.

This patch:

Add support of mapping multiple blocks in one call.

This is useful for DIO reads and re-writes (where blocks are already
allocated), also is in line with Christoph's proposal of using getblocks() in
mpage_readpage() or mpage_readpages().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c   |   5 +--
 fs/ext3/inode.c | 105 ++++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 79 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 773459164bb..38bd3f6ec14 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -131,8 +131,9 @@ static int ext3_readdir(struct file * filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext3_get_block_handle(NULL, inode, blk, &map_bh, 0, 0);
-		if (!err) {
+		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+						&map_bh, 0, 0);
+		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
 				&filp->f_ra,
 				filp,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 76e22c9c9c6..fcfb10f7712 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -330,7 +330,7 @@ static int ext3_block_to_path(struct inode *inode,
 		ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
 	}
 	if (boundary)
-		*boundary = (i_block & (ptrs - 1)) == (final - 1);
+		*boundary = final - 1 - (i_block & (ptrs - 1));
 	return n;
 }
 
@@ -669,11 +669,15 @@ err_out:
  * akpm: `handle' can be NULL if create == 0.
  *
  * The BKL may not be held on entry here.  Be sure to take it early.
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
  */
 
 int
-ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create, int extend_disksize)
+ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
+		unsigned long maxblocks, struct buffer_head *bh_result,
+		int create, int extend_disksize)
 {
 	int err = -EIO;
 	int offsets[4];
@@ -681,11 +685,15 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	Indirect *partial;
 	unsigned long goal;
 	int left;
-	int boundary = 0;
-	const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
+	int blocks_to_boundary = 0;
+	int depth;
 	struct ext3_inode_info *ei = EXT3_I(inode);
+	int count = 0;
+	unsigned long first_block = 0;
+
 
 	J_ASSERT(handle != NULL || create == 0);
+	depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
@@ -694,8 +702,31 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
+		first_block = chain[depth - 1].key;
 		clear_buffer_new(bh_result);
-		goto got_it;
+		count++;
+		/*map more blocks*/
+		while (count < maxblocks && count <= blocks_to_boundary) {
+			if (!verify_chain(chain, partial)) {
+				/*
+				 * Indirect block might be removed by
+				 * truncate while we were reading it.
+				 * Handling of that case: forget what we've
+				 * got now. Flag the err as EAGAIN, so it
+				 * will reread.
+				 */
+				err = -EAGAIN;
+				count = 0;
+				break;
+			}
+			if (le32_to_cpu(*(chain[depth-1].p+count) ==
+					(first_block + count)))
+				count++;
+			else
+				break;
+		}
+		if (err != -EAGAIN)
+			goto got_it;
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
@@ -723,6 +754,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 		}
 		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
+			count++;
 			mutex_unlock(&ei->truncate_mutex);
 			if (err)
 				goto cleanup;
@@ -772,8 +804,9 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	set_buffer_new(bh_result);
 got_it:
 	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
-	if (boundary)
+	if (blocks_to_boundary == 0)
 		set_buffer_boundary(bh_result);
+	err = count;
 	/* Clean up and exit */
 	partial = chain + depth - 1;	/* the whole chain */
 cleanup:
@@ -787,21 +820,6 @@ out:
 	return err;
 }
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create)
-{
-	handle_t *handle = NULL;
-	int ret;
-
-	if (create) {
-		handle = ext3_journal_current_handle();
-		J_ASSERT(handle != 0);
-	}
-	ret = ext3_get_block_handle(handle, inode, iblock,
-				bh_result, create, 1);
-	return ret;
-}
-
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
 static int
@@ -812,9 +830,12 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
 
-	if (!handle)
+	if (!create)
 		goto get_block;		/* A read */
 
+	if (max_blocks == 1)
+		goto get_block;		/* A single block get */
+
 	if (handle->h_transaction->t_state == T_LOCKED) {
 		/*
 		 * Huge direct-io writes can hold off commits for long
@@ -841,13 +862,31 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
 	}
 
 get_block:
-	if (ret == 0)
-		ret = ext3_get_block_handle(handle, inode, iblock,
-					bh_result, create, 0);
-	bh_result->b_size = (1 << inode->i_blkbits);
+	if (ret == 0) {
+		ret = ext3_get_blocks_handle(handle, inode, iblock,
+					max_blocks, bh_result, create, 0);
+		if (ret > 0) {
+			bh_result->b_size = (ret << inode->i_blkbits);
+			ret = 0;
+		}
+	}
 	return ret;
 }
 
+static int ext3_get_blocks(struct inode *inode, sector_t iblock,
+		unsigned long maxblocks, struct buffer_head *bh_result,
+		int create)
+{
+	return ext3_direct_io_get_blocks(inode, iblock, maxblocks,
+					bh_result, create);
+}
+
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	return ext3_get_blocks(inode, iblock, 1, bh_result, create);
+}
+
 /*
  * `handle' can be NULL if create is zero
  */
@@ -862,8 +901,16 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
-	if (!*errp && buffer_mapped(&dummy)) {
+	err = ext3_get_blocks_handle(handle, inode, block, 1,
+					&dummy, create, 1);
+	if (err == 1) {
+		err = 0;
+	} else if (err >= 0) {
+		WARN_ON(1);
+		err = -EIO;
+	}
+	*errp = err;
+	if (!err && buffer_mapped(&dummy)) {
 		struct buffer_head *bh;
 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
 		if (!bh) {
-- 
cgit v1.2.3


From b47b24781c59565f45acd765dc995a752d561e96 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:56 -0800
Subject: [PATCH] ext3_get_blocks: multiple block allocation

Add support for multiple block allocation in ext3-get-blocks().

Look up the disk block mapping and count the total number of blocks to
allocate, then pass it to ext3_new_block(), where the real block allocation is
performed.  Once multiple blocks are allocated, prepare the branch with those
just allocated blocks info and finally splice the whole branch into the block
mapping tree.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 270 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 193 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fcfb10f7712..34e5b0dc916 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -235,16 +235,6 @@ no_delete:
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
-static int ext3_alloc_block (handle_t *handle,
-			struct inode * inode, unsigned long goal, int *err)
-{
-	unsigned long result;
-
-	result = ext3_new_block(handle, inode, goal, err);
-	return result;
-}
-
-
 typedef struct {
 	__le32	*p;
 	__le32	key;
@@ -476,15 +466,115 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
 
 	return ext3_find_near(inode, partial);
 }
+/**
+ *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	of direct blocks need to be allocated for the given branch.
+ *
+ * 	@branch: chain of indirect blocks
+ *	@k: number of blocks need for indirect blocks
+ *	@blks: number of data blocks to be mapped.
+ *	@blocks_to_boundary:  the offset in the indirect block
+ *
+ *	return the total number of blocks to be allocate, including the
+ *	direct and indirect blocks.
+ */
+static int
+ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
+		int blocks_to_boundary)
+{
+	unsigned long count = 0;
+
+	/*
+	 * Simple case, [t,d]Indirect block(s) has not allocated yet
+	 * then it's clear blocks on that path have not allocated
+	 */
+	if (k > 0) {
+		/* right now don't hanel cross boundary allocation */
+		if (blks < blocks_to_boundary + 1)
+			count += blks;
+		else
+			count += blocks_to_boundary + 1;
+		return count;
+	}
+
+	count++;
+	while (count < blks && count <= blocks_to_boundary &&
+		le32_to_cpu(*(branch[0].p + count)) == 0) {
+		count++;
+	}
+	return count;
+}
+
+/**
+ *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	@indirect_blks: the number of blocks need to allocate for indirect
+ *			blocks
+ *
+ *	@new_blocks: on return it will store the new block numbers for
+ *	the indirect blocks(if needed) and the first direct block,
+ *	@blks:	on return it will store the total number of allocated
+ *		direct blocks
+ */
+static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
+			unsigned long goal, int indirect_blks, int blks,
+			unsigned long long new_blocks[4], int *err)
+{
+	int target, i;
+	unsigned long count = 0;
+	int index = 0;
+	unsigned long current_block = 0;
+	int ret = 0;
+
+	/*
+	 * Here we try to allocate the requested multiple blocks at once,
+	 * on a best-effort basis.
+	 * To build a branch, we should allocate blocks for
+	 * the indirect blocks(if not allocated yet), and at least
+	 * the first direct block of this branch.  That's the
+	 * minimum number of blocks need to allocate(required)
+	 */
+	target = blks + indirect_blks;
+
+	while (1) {
+		count = target;
+		/* allocating blocks for indirect blocks and direct blocks */
+		current_block = ext3_new_blocks(handle, inode, goal, &count, err);
+		if (*err)
+			goto failed_out;
+
+		target -= count;
+		/* allocate blocks for indirect blocks */
+		while (index < indirect_blks && count) {
+			new_blocks[index++] = current_block++;
+			count--;
+		}
+
+		if (count > 0)
+			break;
+	}
+
+	/* save the new block number for the first direct block */
+	new_blocks[index] = current_block;
+
+	/* total number of blocks allocated for direct blocks */
+	ret = count;
+	*err = 0;
+	return ret;
+failed_out:
+	for (i = 0; i <index; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+	return ret;
+}
 
 /**
  *	ext3_alloc_branch - allocate and set up a chain of blocks.
  *	@inode: owner
- *	@num: depth of the chain (number of blocks to allocate)
+ *	@indirect_blks: number of allocated indirect blocks
+ *	@blks: number of allocated direct blocks
  *	@offsets: offsets (in the blocks) to store the pointers to next.
  *	@branch: place to store the chain in.
  *
- *	This function allocates @num blocks, zeroes out all but the last one,
+ *	This function allocates blocks, zeroes out all but the last one,
  *	links them into chain and (if we are synchronous) writes them to disk.
  *	In other words, it prepares a branch that can be spliced onto the
  *	inode. It stores the information about that chain in the branch[], in
@@ -503,71 +593,79 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
  */
 
 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-			     int num,
-			     unsigned long goal,
-			     int *offsets,
-			     Indirect *branch)
+			int indirect_blks, int *blks, unsigned long goal,
+			int *offsets, Indirect *branch)
 {
 	int blocksize = inode->i_sb->s_blocksize;
-	int n = 0, keys = 0;
+	int i, n = 0;
 	int err = 0;
-	int i;
-	int parent = ext3_alloc_block(handle, inode, goal, &err);
-
-	branch[0].key = cpu_to_le32(parent);
-	if (parent) {
-		for (n = 1; n < num; n++) {
-			struct buffer_head *bh;
-			/* Allocate the next block */
-			int nr = ext3_alloc_block(handle, inode, parent, &err);
-			if (!nr)
-				break;
-			branch[n].key = cpu_to_le32(nr);
+	struct buffer_head *bh;
+	int num;
+	unsigned long long new_blocks[4];
+	unsigned long long current_block;
 
-			/*
-			 * Get buffer_head for parent block, zero it out
-			 * and set the pointer to new one, then send
-			 * parent to disk.  
-			 */
-			bh = sb_getblk(inode->i_sb, parent);
-			if (!bh)
-				break;
-			keys = n+1;
-			branch[n].bh = bh;
-			lock_buffer(bh);
-			BUFFER_TRACE(bh, "call get_create_access");
-			err = ext3_journal_get_create_access(handle, bh);
-			if (err) {
-				unlock_buffer(bh);
-				brelse(bh);
-				break;
-			}
+	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+				*blks, new_blocks, &err);
+	if (err)
+		return err;
 
-			memset(bh->b_data, 0, blocksize);
-			branch[n].p = (__le32*) bh->b_data + offsets[n];
-			*branch[n].p = branch[n].key;
-			BUFFER_TRACE(bh, "marking uptodate");
-			set_buffer_uptodate(bh);
+	branch[0].key = cpu_to_le32(new_blocks[0]);
+	/*
+	 * metadata blocks and data blocks are allocated.
+	 */
+	for (n = 1; n <= indirect_blks;  n++) {
+		/*
+		 * Get buffer_head for parent block, zero it out
+		 * and set the pointer to new one, then send
+		 * parent to disk.
+		 */
+		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		branch[n].bh = bh;
+		lock_buffer(bh);
+		BUFFER_TRACE(bh, "call get_create_access");
+		err = ext3_journal_get_create_access(handle, bh);
+		if (err) {
 			unlock_buffer(bh);
+			brelse(bh);
+			goto failed;
+		}
 
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			err = ext3_journal_dirty_metadata(handle, bh);
-			if (err)
-				break;
-
-			parent = nr;
+		memset(bh->b_data, 0, blocksize);
+		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+		branch[n].key = cpu_to_le32(new_blocks[n]);
+		*branch[n].p = branch[n].key;
+		if ( n == indirect_blks) {
+			current_block = new_blocks[n];
+			/*
+			 * End of chain, update the last new metablock of
+			 * the chain to point to the new allocated
+			 * data blocks numbers
+			 */
+			for (i=1; i < num; i++)
+				*(branch[n].p + i) = cpu_to_le32(++current_block);
 		}
-	}
-	if (n == num)
-		return 0;
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
 
+		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh);
+		if (err)
+			goto failed;
+	}
+	*blks = num;
+	return err;
+failed:
 	/* Allocation failed, free what we already allocated */
-	for (i = 1; i < keys; i++) {
+	for (i = 1; i <= n ; i++) {
 		BUFFER_TRACE(branch[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, branch[i].bh);
 	}
-	for (i = 0; i < keys; i++)
-		ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
+	for (i = 0; i <indirect_blks; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+
+	ext3_free_blocks(handle, inode, new_blocks[i], num);
+
 	return err;
 }
 
@@ -578,7 +676,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  *	@chain: chain of indirect blocks (with a missing link - see
  *		ext3_alloc_branch)
  *	@where: location of missing link
- *	@num:   number of blocks we are adding
+ *	@num:   number of indirect blocks we are adding
+ *	@blks:  number of direct blocks we are adding
  *
  *	This function fills the missing link and does all housekeeping needed in
  *	inode (->i_blocks, etc.). In case of success we end up with the full
@@ -586,12 +685,12 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  */
 
 static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-			      Indirect chain[4], Indirect *where, int num)
+			      Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
 	struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
-
+	unsigned long current_block;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -606,6 +705,13 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* That's it */
 
 	*where->p = where->key;
+	/* update host bufferhead or inode to point to
+	 * more just allocated direct blocks blocks */
+	if (num == 0 && blks > 1) {
+		current_block = le32_to_cpu(where->key + 1);
+		for (i = 1; i < blks; i++)
+			*(where->p + i ) = cpu_to_le32(current_block++);
+	}
 
 	/*
 	 * update the most recently allocated logical & physical block
@@ -613,8 +719,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	 * allocation
 	 */
 	if (block_i) {
-		block_i->last_alloc_logical_block = block;
-		block_i->last_alloc_physical_block = le32_to_cpu(where[num-1].key);
+		block_i->last_alloc_logical_block = block + blks - 1;
+		block_i->last_alloc_physical_block = le32_to_cpu(where[num].key + blks - 1);
 	}
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -647,10 +753,13 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	return err;
 
 err_out:
-	for (i = 1; i < num; i++) {
+	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, where[i].bh);
+		ext3_free_blocks(handle, inode, le32_to_cpu(where[i-1].key), 1);
 	}
+	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+
 	return err;
 }
 
@@ -684,7 +793,7 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	Indirect chain[4];
 	Indirect *partial;
 	unsigned long goal;
-	int left;
+	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -772,12 +881,19 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	goal = ext3_find_goal(inode, iblock, chain, partial);
 
-	left = (chain + depth) - partial;
+	/* the number of blocks need to allocate for [d,t]indirect blocks */
+	indirect_blks = (chain + depth) - partial - 1;
 
+	/*
+	 * Next look up the indirect map to count the totoal number of
+	 * direct blocks to allocate for this branch.
+	 */
+	count = ext3_blks_to_allocate(partial, indirect_blks,
+					maxblocks, blocks_to_boundary);
 	/*
 	 * Block out ext3_truncate while we alter the tree
 	 */
-	err = ext3_alloc_branch(handle, inode, left, goal,
+	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
 				offsets + (partial - chain), partial);
 
 	/*
@@ -788,8 +904,8 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	 * may need to return -EAGAIN upwards in the worst case.  --sct
 	 */
 	if (!err)
-		err = ext3_splice_branch(handle, inode, iblock, chain,
-					 partial, left);
+		err = ext3_splice_branch(handle, inode, iblock,
+					partial, indirect_blks, count);
 	/*
 	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
@@ -824,8 +940,8 @@ out:
 
 static int
 ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks, struct buffer_head *bh_result,
-		int create)
+		unsigned long max_blocks,
+		struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
-- 
cgit v1.2.3


From b54e41ec17ae91dce174eb5a3515e7af4a440d42 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:57 -0800
Subject: [PATCH] ext3_get_blocks: support multiple blocks allocation in
 ext3_new_block()

Change ext3_try_to_allocate() (called via ext3_new_blocks()) to try to
allocate the requested number of blocks on a best effort basis: After
allocated the first block, it will always attempt to allocate the next few(up
to the requested size and not beyond the reservation window) adjacent blocks
at the same time.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 46623f77666..bba4aeb4a70 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -653,9 +653,11 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
  */
 static int
 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-	struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv)
+			struct buffer_head *bitmap_bh, int goal,
+			unsigned long *count, struct ext3_reserve_window *my_rsv)
 {
 	int group_first_block, start, end;
+	unsigned long num = 0;
 
 	/* we do allocation within the reservation window if we have a window */
 	if (my_rsv) {
@@ -713,8 +715,18 @@ repeat:
 			goto fail_access;
 		goto repeat;
 	}
-	return goal;
+	num++;
+	goal++;
+	while (num < *count && goal < end
+		&& ext3_test_allocatable(goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
+		num++;
+		goal++;
+	}
+	*count = num;
+	return goal - num;
 fail_access:
+	*count = num;
 	return -1;
 }
 
@@ -1024,11 +1036,12 @@ static int
 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			unsigned int group, struct buffer_head *bitmap_bh,
 			int goal, struct ext3_reserve_window_node * my_rsv,
-			int *errp)
+			unsigned long *count, int *errp)
 {
 	unsigned long group_first_block;
 	int ret = 0;
 	int fatal;
+	unsigned long num = *count;
 
 	*errp = 0;
 
@@ -1051,7 +1064,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * or last attempt to allocate a block with reservation turned on failed
 	 */
 	if (my_rsv == NULL ) {
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+						goal, count, NULL);
 		goto out;
 	}
 	/*
@@ -1093,11 +1107,13 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-					   &my_rsv->rsv_window);
+					   &num, &my_rsv->rsv_window);
 		if (ret >= 0) {
-			my_rsv->rsv_alloc_hit++;
+			my_rsv->rsv_alloc_hit += num;
+			*count = num;
 			break;				/* succeed */
 		}
+		num = *count;
 	}
 out:
 	if (ret >= 0) {
@@ -1154,8 +1170,8 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
  */
-int ext3_new_block(handle_t *handle, struct inode *inode,
-			unsigned long goal, int *errp)
+int ext3_new_blocks(handle_t *handle, struct inode *inode,
+			unsigned long goal, unsigned long *count, int *errp)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
@@ -1178,6 +1194,7 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
 	static int goal_hits, goal_attempts;
 #endif
 	unsigned long ngroups;
+	unsigned long num = *count;
 
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
@@ -1244,7 +1261,7 @@ retry:
 		if (!bitmap_bh)
 			goto io_error;
 		ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-					bitmap_bh, ret_block, my_rsv, &fatal);
+					bitmap_bh, ret_block, my_rsv, &num, &fatal);
 		if (fatal)
 			goto out;
 		if (ret_block >= 0)
@@ -1281,7 +1298,7 @@ retry:
 		if (!bitmap_bh)
 			goto io_error;
 		ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-					bitmap_bh, -1, my_rsv, &fatal);
+					bitmap_bh, -1, my_rsv, &num, &fatal);
 		if (fatal)
 			goto out;
 		if (ret_block >= 0) 
@@ -1388,6 +1405,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
+	*count = num;
 	return ret_block;
 
 io_error:
@@ -1406,6 +1424,14 @@ out:
 	return 0;
 }
 
+int ext3_new_block(handle_t *handle, struct inode *inode,
+			unsigned long goal, int *errp)
+{
+	unsigned long count = 1;
+
+	return ext3_new_blocks(handle, inode, goal, &count, errp);
+}
+
 unsigned long ext3_count_free_blocks(struct super_block *sb)
 {
 	unsigned long desc_count;
-- 
cgit v1.2.3


From faa569763a7753e0a7cb8fd3919a62c0f7cc1e3c Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:58 -0800
Subject: [PATCH] ext3_get_blocks: Adjust accounting info in ext3_new_blocks()

Update accounting information (quota, boundary checks, free blocks number etc)
in ext3_new_blocks().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index bba4aeb4a70..7a2c7198b05 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1206,7 +1206,7 @@ int ext3_new_blocks(handle_t *handle, struct inode *inode,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
@@ -1333,13 +1333,15 @@ allocated:
 	target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
 				+ le32_to_cpu(es->s_first_data_block);
 
-	if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
-	    target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) ||
+	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) ||
 	    in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group) ||
+	    in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
 		      EXT3_SB(sb)->s_itb_per_group))
 		ext3_error(sb, "ext3_new_block",
 			    "Allocating block in system zone - "
-			    "block = %u", target_block);
+			    "blocks from %u, length %lu", target_block, num);
 
 	performed_allocation = 1;
 
@@ -1358,10 +1360,14 @@ allocated:
 	jbd_lock_bh_state(bitmap_bh);
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
-		if (ext3_test_bit(ret_block,
-				bh2jh(bitmap_bh)->b_committed_data)) {
-			printk("%s: block was unexpectedly set in "
-				"b_committed_data\n", __FUNCTION__);
+		int i;
+
+		for (i = 0; i < num; i++) {
+			if (ext3_test_bit(ret_block,
+					bh2jh(bitmap_bh)->b_committed_data)) {
+				printk("%s: block was unexpectedly set in "
+					"b_committed_data\n", __FUNCTION__);
+			}
 		}
 	}
 	ext3_debug("found bit %d\n", ret_block);
@@ -1372,7 +1378,7 @@ allocated:
 	/* ret_block was blockgroup-relative.  Now it becomes fs-relative */
 	ret_block = target_block;
 
-	if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
+	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
 		ext3_error(sb, "ext3_new_block",
 			    "block(%d) >= blocks count(%d) - "
 			    "block_group = %d, es == %p ", ret_block,
@@ -1390,9 +1396,9 @@ allocated:
 
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	gdp->bg_free_blocks_count =
-			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
+			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
-	percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
+	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext3_journal_dirty_metadata(handle, gdp_bh);
@@ -1405,6 +1411,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
+	DQUOT_FREE_BLOCK(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1419,7 +1426,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		DQUOT_FREE_BLOCK(inode, 1);
+		DQUOT_FREE_BLOCK(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
-- 
cgit v1.2.3


From d48589bfad0e60bff0aa3f9e4875983f607bd44b Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 26 Mar 2006 01:37:59 -0800
Subject: [PATCH] ext3_get_blocks: Adjust reservation window size for mblocks

Optimize the block reservation and the multiple block allocation: with the
knowledge of the total number of blocks ahead, set or adjust the reservation
window size properly (based on the number of blocks needed) before block
allocation happens: if there isn't any reservation yet, make sure the
reservation window equals to or greater than the number of blocks needed,
before create an reservation window; if a reservation window is already
exists, try to extends the window size to match the number of blocks to
allocate.  This could increase the possibility of completing multiple blocks
allocation in a single request, as blocks are only allocated in the range of
the inode's reservation window.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7a2c7198b05..77927d6938f 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1011,6 +1011,31 @@ retry:
 	goto retry;
 }
 
+static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+			struct super_block *sb, int size)
+{
+	struct ext3_reserve_window_node *next_rsv;
+	struct rb_node *next;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	if (!spin_trylock(rsv_lock))
+		return;
+
+	next = rb_next(&my_rsv->rsv_node);
+
+	if (!next)
+		my_rsv->rsv_end += size;
+	else {
+		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+
+		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
+			my_rsv->rsv_end += size;
+		else
+			my_rsv->rsv_end = next_rsv->rsv_start - 1;
+	}
+	spin_unlock(rsv_lock);
+}
+
 /*
  * This is the main function used to allocate a new block and its reservation
  * window.
@@ -1095,6 +1120,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	while (1) {
 		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
 			!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
+			if (my_rsv->rsv_goal_size < *count)
+				my_rsv->rsv_goal_size = *count;
 			ret = alloc_new_reservation(my_rsv, goal, sb,
 							group, bitmap_bh);
 			if (ret < 0)
@@ -1102,7 +1129,10 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 
 			if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
 				goal = -1;
-		}
+		} else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count)
+			try_to_extend_reservation(my_rsv, sb,
+					*count-my_rsv->rsv_end + goal - 1);
+
 		if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
 		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
-- 
cgit v1.2.3


From 205f87f6b342444f722e4559d33318686f7df2ca Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:00 -0800
Subject: [PATCH] change buffer_head.b_size to size_t

Increase the size of the buffer_head b_size field (only) for 64 bit platforms.
Update some old and moldy comments in and around the structure as well.

The b_size increase allows us to perform larger mappings and allocations for
large I/O requests from userspace, which tie in with other changes allowing
the get_block_t() interface to map multiple blocks at once.

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c          | 6 ++++--
 fs/ocfs2/journal.c   | 2 +-
 fs/reiserfs/prints.c | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index f25f5809642..e7a1461f438 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -426,8 +426,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	if (all_mapped) {
 		printk("__find_get_block_slow() failed. "
 			"block=%llu, b_blocknr=%llu\n",
-			(unsigned long long)block, (unsigned long long)bh->b_blocknr);
-		printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
+			(unsigned long long)block,
+			(unsigned long long)bh->b_blocknr);
+		printk("b_state=0x%08lx, b_size=%zu\n",
+			bh->b_state, bh->b_size);
 		printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
 	}
 out_unlock:
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 90641929a43..6a610ae5358 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -377,7 +377,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
 	BUG_ON(!bh);
 	BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED));
 
-	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %hu\n",
+	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n",
 		   (unsigned long long)bh->b_blocknr, type,
 		   (type == OCFS2_JOURNAL_ACCESS_CREATE) ?
 		   "OCFS2_JOURNAL_ACCESS_CREATE" :
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 78b40621b88..27bd3a1df2a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -143,7 +143,7 @@ static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
 	char b[BDEVNAME_SIZE];
 
 	sprintf(buf,
-		"dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+		"dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
 		bdevname(bh->b_bdev, b), bh->b_size,
 		(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
 		bh->b_state, bh->b_page,
-- 
cgit v1.2.3


From b0cf2321c6599138f860517745503691556d8453 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:00 -0800
Subject: [PATCH] pass b_size to ->get_block()

Pass amount of disk needs to be mapped to get_block().  This way one can
modify the fs ->get_block() functions to map multiple blocks at the same time.

[akpm@osdl.org: performance tweak]
[akpm@osdl.org: remove unneeded assignments]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 9 ++++++++-
 fs/mpage.c  | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index e7a1461f438..a507b58550f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1738,6 +1738,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	sector_t block;
 	sector_t last_block;
 	struct buffer_head *bh, *head;
+	const unsigned blocksize = 1 << inode->i_blkbits;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1745,7 +1746,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
 	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, 1 << inode->i_blkbits,
+		create_empty_buffers(page, blocksize,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
 	}
 
@@ -1780,6 +1781,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			clear_buffer_dirty(bh);
 			set_buffer_uptodate(bh);
 		} else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
+			WARN_ON(bh->b_size != blocksize);
 			err = get_block(inode, block, bh, 1);
 			if (err)
 				goto recover;
@@ -1933,6 +1935,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (buffer_new(bh))
 			clear_buffer_new(bh);
 		if (!buffer_mapped(bh)) {
+			WARN_ON(bh->b_size != blocksize);
 			err = get_block(inode, block, bh, 1);
 			if (err)
 				break;
@@ -2088,6 +2091,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 
 			fully_mapped = 0;
 			if (iblock < lblock) {
+				WARN_ON(bh->b_size != blocksize);
 				err = get_block(inode, iblock, bh, 0);
 				if (err)
 					SetPageError(page);
@@ -2409,6 +2413,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
 		create = 1;
 		if (block_start >= to)
 			create = 0;
+		map_bh.b_size = blocksize;
 		ret = get_block(inode, block_in_file + block_in_page,
 					&map_bh, create);
 		if (ret)
@@ -2669,6 +2674,7 @@ int block_truncate_page(struct address_space *mapping,
 
 	err = 0;
 	if (!buffer_mapped(bh)) {
+		WARN_ON(bh->b_size != blocksize);
 		err = get_block(inode, iblock, bh, 0);
 		if (err)
 			goto unlock;
@@ -2755,6 +2761,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
 	struct inode *inode = mapping->host;
 	tmp.b_state = 0;
 	tmp.b_blocknr = 0;
+	tmp.b_size = 1 << inode->i_blkbits;
 	get_block(inode, block, &tmp, 0);
 	return tmp.b_blocknr;
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index e431cb3878d..7903b740cc1 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -192,6 +192,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 				page_block++, block_in_file++) {
 		bh.b_state = 0;
 		if (block_in_file < last_block) {
+			bh.b_size = blocksize;
 			if (get_block(inode, block_in_file, &bh, 0))
 				goto confused;
 		}
@@ -472,6 +473,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	for (page_block = 0; page_block < blocks_per_page; ) {
 
 		map_bh.b_state = 0;
+		map_bh.b_size = 1 << blkbits;
 		if (get_block(inode, block_in_file, &map_bh, 1))
 			goto confused;
 		if (buffer_new(&map_bh))
-- 
cgit v1.2.3


From fa30bd058b746c0e2318a77ff8b4977faa924c2c Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:01 -0800
Subject: [PATCH] map multiple blocks for mpage_readpages()

This patch changes mpage_readpages() and get_block() to get the disk mapping
information for multiple blocks at the same time.

b_size represents the amount of disk mapping that needs to mapped.  On the
successful get_block() b_size indicates the amount of disk mapping thats
actually mapped.  Only the filesystems who care to use this information and
provide multiple disk blocks at a time can choose to do so.

No changes are needed for the filesystems who wants to ignore this.

[akpm@osdl.org: cleanups]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/inode.c              |   3 +-
 fs/mpage.c                  | 104 ++++++++++++++++++++++++++++++++++++--------
 fs/xfs/linux-2.6/xfs_aops.c |   5 ++-
 3 files changed, 90 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 51a5fed90cc..7239ef33948 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -258,7 +258,8 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 static int jfs_get_block(struct inode *ip, sector_t lblock,
 			 struct buffer_head *bh_result, int create)
 {
-	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
+	return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
+			bh_result, create);
 }
 
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/mpage.c b/fs/mpage.c
index 7903b740cc1..9bf2eb30e6f 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -163,9 +163,19 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 	} while (page_bh != head);
 }
 
+/*
+ * This is the worker routine which does all the work of mapping the disk
+ * blocks and constructs largest possible bios, submits them for IO if the
+ * blocks are not contiguous on the disk.
+ *
+ * We pass a buffer_head back and forth and use its buffer_mapped() flag to
+ * represent the validity of its disk mapping and to decide when to do the next
+ * get_block() call.
+ */
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
-			sector_t *last_block_in_bio, get_block_t get_block)
+		sector_t *last_block_in_bio, struct buffer_head *map_bh,
+		unsigned long *first_logical_block, get_block_t get_block)
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
@@ -173,34 +183,72 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
+	sector_t last_block_in_file;
 	sector_t blocks[MAX_BUF_PER_PAGE];
 	unsigned page_block;
 	unsigned first_hole = blocks_per_page;
 	struct block_device *bdev = NULL;
-	struct buffer_head bh;
 	int length;
 	int fully_mapped = 1;
+	unsigned nblocks;
+	unsigned relative_block;
 
 	if (page_has_buffers(page))
 		goto confused;
 
 	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
-	last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
+	last_block = block_in_file + nr_pages * blocks_per_page;
+	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
+	if (last_block > last_block_in_file)
+		last_block = last_block_in_file;
+	page_block = 0;
+
+	/*
+	 * Map blocks using the result from the previous get_blocks call first.
+	 */
+	nblocks = map_bh->b_size >> blkbits;
+	if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
+			block_in_file < (*first_logical_block + nblocks)) {
+		unsigned map_offset = block_in_file - *first_logical_block;
+		unsigned last = nblocks - map_offset;
+
+		for (relative_block = 0; ; relative_block++) {
+			if (relative_block == last) {
+				clear_buffer_mapped(map_bh);
+				break;
+			}
+			if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr + map_offset +
+						relative_block;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
+	}
+
+	/*
+	 * Then do more get_blocks calls until we are done with this page.
+	 */
+	map_bh->b_page = page;
+	while (page_block < blocks_per_page) {
+		map_bh->b_state = 0;
+		map_bh->b_size = 0;
 
-	bh.b_page = page;
-	for (page_block = 0; page_block < blocks_per_page;
-				page_block++, block_in_file++) {
-		bh.b_state = 0;
 		if (block_in_file < last_block) {
-			bh.b_size = blocksize;
-			if (get_block(inode, block_in_file, &bh, 0))
+			map_bh->b_size = (last_block-block_in_file) << blkbits;
+			if (get_block(inode, block_in_file, map_bh, 0))
 				goto confused;
+			*first_logical_block = block_in_file;
 		}
 
-		if (!buffer_mapped(&bh)) {
+		if (!buffer_mapped(map_bh)) {
 			fully_mapped = 0;
 			if (first_hole == blocks_per_page)
 				first_hole = page_block;
+			page_block++;
+			block_in_file++;
+			clear_buffer_mapped(map_bh);
 			continue;
 		}
 
@@ -210,8 +258,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 		 * we just collected from get_block into the page's buffers
 		 * so readpage doesn't have to repeat the get_block call
 		 */
-		if (buffer_uptodate(&bh)) {
-			map_buffer_to_page(page, &bh, page_block);
+		if (buffer_uptodate(map_bh)) {
+			map_buffer_to_page(page, map_bh, page_block);
 			goto confused;
 		}
 	
@@ -219,10 +267,20 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 			goto confused;		/* hole -> non-hole */
 
 		/* Contiguous blocks? */
-		if (page_block && blocks[page_block-1] != bh.b_blocknr-1)
+		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
 			goto confused;
-		blocks[page_block] = bh.b_blocknr;
-		bdev = bh.b_bdev;
+		nblocks = map_bh->b_size >> blkbits;
+		for (relative_block = 0; ; relative_block++) {
+			if (relative_block == nblocks) {
+				clear_buffer_mapped(map_bh);
+				break;
+			} else if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr+relative_block;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
 	}
 
 	if (first_hole != blocks_per_page) {
@@ -261,7 +319,7 @@ alloc_new:
 		goto alloc_new;
 	}
 
-	if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
+	if (buffer_boundary(map_bh) || (first_hole != blocks_per_page))
 		bio = mpage_bio_submit(READ, bio);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
@@ -332,7 +390,10 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
 	struct pagevec lru_pvec;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
 
+	clear_buffer_mapped(&map_bh);
 	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
@@ -343,7 +404,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 					page->index, GFP_KERNEL)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
-					&last_block_in_bio, get_block);
+					&last_block_in_bio, &map_bh,
+					&first_logical_block,
+					get_block);
 			if (!pagevec_add(&lru_pvec, page))
 				__pagevec_lru_add(&lru_pvec);
 		} else {
@@ -365,9 +428,12 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 {
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
 
-	bio = do_mpage_readpage(bio, page, 1,
-			&last_block_in_bio, get_block);
+	clear_buffer_mapped(&map_bh);
+	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
+			&map_bh, &first_logical_block, get_block);
 	if (bio)
 		mpage_bio_submit(READ, bio);
 	return 0;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4f2476f188b..a79b84f8b55 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1310,8 +1310,9 @@ xfs_get_block(
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock, 0, bh_result,
-					create, 0, BMAPI_WRITE);
+	return __xfs_get_block(inode, iblock,
+				bh_result->b_size >> inode->i_blkbits,
+				bh_result, create, 0, BMAPI_WRITE);
 }
 
 STATIC int
-- 
cgit v1.2.3


From 1d8fa7a2b9a39d18727acc5c468e870df606c852 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:02 -0800
Subject: [PATCH] remove ->get_blocks() support

Now that get_block() can handle mapping multiple disk blocks, no need to have
->get_blocks().  This patch removes fs specific ->get_blocks() added for DIO
and makes it users use get_block() instead.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c              |  3 ++-
 fs/direct-io.c              | 27 ++++++++++++++-------------
 fs/ext2/inode.c             | 14 +-------------
 fs/ext3/inode.c             | 12 ++----------
 fs/fat/inode.c              |  2 +-
 fs/hfs/inode.c              | 13 +------------
 fs/hfsplus/inode.c          | 13 +------------
 fs/jfs/inode.c              |  2 +-
 fs/ocfs2/aops.c             |  2 +-
 fs/reiserfs/inode.c         |  1 -
 fs/xfs/linux-2.6/xfs_aops.c |  6 +++---
 11 files changed, 27 insertions(+), 68 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9a451a9ffad..5983d42df01 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -131,9 +131,10 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 
 static int
 blkdev_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks, struct buffer_head *bh, int create)
+		struct buffer_head *bh, int create)
 {
 	sector_t end_block = max_block(I_BDEV(inode));
+	unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
 
 	if ((iblock + max_blocks) > end_block) {
 		max_blocks = end_block - iblock;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 235ed8d1f11..9d1d2aa73e4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -86,12 +86,12 @@ struct dio {
 	unsigned first_block_in_page;	/* doesn't change, Used only once */
 	int boundary;			/* prev block is at a boundary */
 	int reap_counter;		/* rate limit reaping */
-	get_blocks_t *get_blocks;	/* block mapping function */
+	get_block_t *get_block;		/* block mapping function */
 	dio_iodone_t *end_io;		/* IO completion function */
 	sector_t final_block_in_bio;	/* current final block in bio + 1 */
 	sector_t next_block_for_io;	/* next block to be put under IO,
 					   in dio_blocks units */
-	struct buffer_head map_bh;	/* last get_blocks() result */
+	struct buffer_head map_bh;	/* last get_block() result */
 
 	/*
 	 * Deferred addition of a page to the dio.  These variables are
@@ -211,9 +211,9 @@ static struct page *dio_get_page(struct dio *dio)
 
 /*
  * Called when all DIO BIO I/O has been completed - let the filesystem
- * know, if it registered an interest earlier via get_blocks.  Pass the
+ * know, if it registered an interest earlier via get_block.  Pass the
  * private field of the map buffer_head so that filesystems can use it
- * to hold additional state between get_blocks calls and dio_complete.
+ * to hold additional state between get_block calls and dio_complete.
  */
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
@@ -493,7 +493,7 @@ static int dio_bio_reap(struct dio *dio)
  * The fs is allowed to map lots of blocks at once.  If it wants to do that,
  * it uses the passed inode-relative block number as the file offset, as usual.
  *
- * get_blocks() is passed the number of i_blkbits-sized blocks which direct_io
+ * get_block() is passed the number of i_blkbits-sized blocks which direct_io
  * has remaining to do.  The fs should not map more than this number of blocks.
  *
  * If the fs has mapped a lot of blocks, it should populate bh->b_size to
@@ -506,7 +506,7 @@ static int dio_bio_reap(struct dio *dio)
  * In the case of filesystem holes: the fs may return an arbitrarily-large
  * hole by returning an appropriate value in b_size and by clearing
  * buffer_mapped().  However the direct-io code will only process holes one
- * block at a time - it will repeatedly call get_blocks() as it walks the hole.
+ * block at a time - it will repeatedly call get_block() as it walks the hole.
  */
 static int get_more_blocks(struct dio *dio)
 {
@@ -548,7 +548,8 @@ static int get_more_blocks(struct dio *dio)
 		 * at a higher level for inside-i_size block-instantiating
 		 * writes.
 		 */
-		ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
+		map_bh->b_size = fs_count << dio->blkbits;
+		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
 	return ret;
@@ -783,11 +784,11 @@ static void dio_zero_block(struct dio *dio, int end)
  * happily perform page-sized but 512-byte aligned IOs.  It is important that
  * blockdev IO be able to have fine alignment and large sizes.
  *
- * So what we do is to permit the ->get_blocks function to populate bh.b_size
+ * So what we do is to permit the ->get_block function to populate bh.b_size
  * with the size of IO which is permitted at this offset and this i_blkbits.
  *
  * For best results, the blockdev should be set up with 512-byte i_blkbits and
- * it should set b_size to PAGE_SIZE or more inside get_blocks().  This gives
+ * it should set b_size to PAGE_SIZE or more inside get_block().  This gives
  * fine alignment but still allows this function to work in PAGE_SIZE units.
  */
 static int do_direct_IO(struct dio *dio)
@@ -947,7 +948,7 @@ out:
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
-	unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io,
+	unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
 	struct dio *dio)
 {
 	unsigned long user_addr; 
@@ -969,7 +970,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 	dio->boundary = 0;
 	dio->reap_counter = 0;
-	dio->get_blocks = get_blocks;
+	dio->get_block = get_block;
 	dio->end_io = end_io;
 	dio->map_bh.b_private = NULL;
 	dio->final_block_in_bio = -1;
@@ -1177,7 +1178,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-	unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
+	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	int dio_lock_type)
 {
 	int seg;
@@ -1273,7 +1274,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		(end > i_size_read(inode)));
 
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
-				nr_segs, blkbits, get_blocks, end_io, dio);
+				nr_segs, blkbits, get_block, end_io, dio);
 
 	if (rw == READ && dio_lock_type == DIO_LOCKING)
 		release_i_mutex = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a717837f272..04af9c45dce 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -667,18 +667,6 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping,block,ext2_get_block);
 }
 
-static int
-ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = ext2_get_block(inode, iblock, bh_result, create);
-	if (ret == 0)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t
 ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
@@ -687,7 +675,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = file->f_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				offset, nr_segs, ext2_get_blocks, NULL);
+				offset, nr_segs, ext2_get_block, NULL);
 }
 
 static int
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 34e5b0dc916..0cd126176bb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -940,11 +940,11 @@ out:
 
 static int
 ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long max_blocks,
 		struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
+	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	if (!create)
 		goto get_block;		/* A read */
@@ -989,18 +989,10 @@ get_block:
 	return ret;
 }
 
-static int ext3_get_blocks(struct inode *inode, sector_t iblock,
-		unsigned long maxblocks, struct buffer_head *bh_result,
-		int create)
-{
-	return ext3_direct_io_get_blocks(inode, iblock, maxblocks,
-					bh_result, create);
-}
-
 static int ext3_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
-	return ext3_get_blocks(inode, iblock, 1, bh_result, create);
+	return ext3_direct_io_get_blocks(inode, iblock, bh_result, create);
 }
 
 /*
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 297300fe81c..404bfc9f738 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -101,11 +101,11 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
 }
 
 static int fat_get_blocks(struct inode *inode, sector_t iblock,
-			  unsigned long max_blocks,
 			  struct buffer_head *bh_result, int create)
 {
 	struct super_block *sb = inode->i_sb;
 	int err;
+	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
 	if (err)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 39fd85b9b91..2c564701724 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -98,17 +98,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static int hfs_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			  struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = hfs_get_block(inode, iblock, bh_result, create);
-	if (!ret)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
@@ -116,7 +105,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, hfs_get_blocks, NULL);
+				  offset, nr_segs, hfs_get_block, NULL);
 }
 
 static int hfs_writepages(struct address_space *mapping,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 12ed2b7d046..9fbe4d2aeec 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -93,17 +93,6 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static int hfsplus_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			      struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = hfsplus_get_block(inode, iblock, bh_result, create);
-	if (!ret)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
@@ -111,7 +100,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, hfsplus_get_blocks, NULL);
+				  offset, nr_segs, hfsplus_get_block, NULL);
 }
 
 static int hfsplus_writepages(struct address_space *mapping,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7239ef33948..04eb78f1252 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -302,7 +302,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode = file->f_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				offset, nr_segs, jfs_get_blocks, NULL);
+				offset, nr_segs, jfs_get_block, NULL);
 }
 
 struct address_space_operations jfs_aops = {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index bf931ba1d36..0d858d0b25b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -540,7 +540,6 @@ bail:
  * 					fs_count, map_bh, dio->rw == WRITE);
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
-				     unsigned long max_blocks,
 				     struct buffer_head *bh_result, int create)
 {
 	int ret;
@@ -548,6 +547,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	u64 p_blkno;
 	int contig_blocks;
 	unsigned char blocksize_bits;
+	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
 
 	if (!inode || !bh_result) {
 		mlog(ML_ERROR, "inode or bh_result is null\n");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 62e18c19b44..9857e50f85e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -466,7 +466,6 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
    direct_IO request. */
 static int reiserfs_get_blocks_direct_io(struct inode *inode,
 					 sector_t iblock,
-					 unsigned long max_blocks,
 					 struct buffer_head *bh_result,
 					 int create)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a79b84f8b55..c02f7c5b746 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1319,12 +1319,12 @@ STATIC int
 xfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
-	unsigned long		max_blocks,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock, max_blocks, bh_result,
-					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+	return __xfs_get_block(inode, iblock,
+				bh_result->b_size >> inode->i_blkbits,
+				bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
 STATIC void
-- 
cgit v1.2.3


From d6859bfca8cbfe4105704e410b0afa50beabbbb9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 26 Mar 2006 01:38:03 -0800
Subject: [PATCH] ext3: cleanups and WARN_ON()

- Clean up a few little layout things and comments.

- Add a WARN_ON to a case which I was wondering about.

- Tune up some inlines.

Cc: Mingming Cao <cmm@us.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 239 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 114 insertions(+), 125 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0cd126176bb..e68587a7f36 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -44,16 +44,16 @@ static int ext3_writepage_trans_blocks(struct inode *inode);
 /*
  * Test whether an inode is a fast symlink.
  */
-static inline int ext3_inode_is_fast_symlink(struct inode *inode)
+static int ext3_inode_is_fast_symlink(struct inode *inode)
 {
 	int ea_blocks = EXT3_I(inode)->i_file_acl ?
 		(inode->i_sb->s_blocksize >> 9) : 0;
 
-	return (S_ISLNK(inode->i_mode) &&
-		inode->i_blocks - ea_blocks == 0);
+	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
-/* The ext3 forget function must perform a revoke if we are freeing data
+/*
+ * The ext3 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
  * revoked in all cases. 
  *
@@ -61,10 +61,8 @@ static inline int ext3_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  */
-
-int ext3_forget(handle_t *handle, int is_metadata,
-		       struct inode *inode, struct buffer_head *bh,
-		       int blocknr)
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, int blocknr)
 {
 	int err;
 
@@ -104,10 +102,9 @@ int ext3_forget(handle_t *handle, int is_metadata,
 }
 
 /*
- * Work out how many blocks we need to progress with the next chunk of a
+ * Work out how many blocks we need to proceed with the next chunk of a
  * truncate transaction.
  */
-
 static unsigned long blocks_for_truncate(struct inode *inode) 
 {
 	unsigned long needed;
@@ -141,7 +138,6 @@ static unsigned long blocks_for_truncate(struct inode *inode)
  * extend fails, we need to propagate the failure up and restart the
  * transaction in the top-level truncate loop. --sct 
  */
-
 static handle_t *start_transaction(struct inode *inode) 
 {
 	handle_t *result;
@@ -194,9 +190,11 @@ void ext3_delete_inode (struct inode * inode)
 
 	handle = start_transaction(inode);
 	if (IS_ERR(handle)) {
-		/* If we're going to skip the normal cleanup, we still
-		 * need to make sure that the in-core orphan linked list
-		 * is properly cleaned up. */
+		/*
+		 * If we're going to skip the normal cleanup, we still need to
+		 * make sure that the in-core orphan linked list is properly
+		 * cleaned up.
+		 */
 		ext3_orphan_del(NULL, inode);
 		goto no_delete;
 	}
@@ -247,7 +245,7 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
 	p->bh = bh;
 }
 
-static inline int verify_chain(Indirect *from, Indirect *to)
+static int verify_chain(Indirect *from, Indirect *to)
 {
 	while (from <= to && from->key == *from->p)
 		from++;
@@ -317,7 +315,7 @@ static int ext3_block_to_path(struct inode *inode,
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
+		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
 	}
 	if (boundary)
 		*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -409,7 +407,6 @@ no_block:
  *
  *	Caller must make sure that @ind is valid and will stay that way.
  */
-
 static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 {
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -419,17 +416,18 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 	unsigned long colour;
 
 	/* Try to find previous block */
-	for (p = ind->p - 1; p >= start; p--)
+	for (p = ind->p - 1; p >= start; p--) {
 		if (*p)
 			return le32_to_cpu(*p);
+	}
 
 	/* No such thing, so let's try location of indirect block */
 	if (ind->bh)
 		return ind->bh->b_blocknr;
 
 	/*
-	 * It is going to be refered from inode itself? OK, just put it into
-	 * the same cylinder group then.
+	 * It is going to be referred to from the inode itself? OK, just put it
+	 * into the same cylinder group then.
 	 */
 	bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
 		le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
@@ -453,7 +451,9 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
 static unsigned long ext3_find_goal(struct inode *inode, long block,
 		Indirect chain[4], Indirect *partial)
 {
-	struct ext3_block_alloc_info *block_i =  EXT3_I(inode)->i_block_alloc_info;
+	struct ext3_block_alloc_info *block_i;
+
+	block_i =  EXT3_I(inode)->i_block_alloc_info;
 
 	/*
 	 * try the heuristic for sequential allocation,
@@ -466,6 +466,7 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
 
 	return ext3_find_near(inode, partial);
 }
+
 /**
  *	ext3_blks_to_allocate: Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
@@ -478,8 +479,7 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
  *	return the total number of blocks to be allocate, including the
  *	direct and indirect blocks.
  */
-static int
-ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
+static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 		int blocks_to_boundary)
 {
 	unsigned long count = 0;
@@ -489,7 +489,7 @@ ext3_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
 	 * then it's clear blocks on that path have not allocated
 	 */
 	if (k > 0) {
-		/* right now don't hanel cross boundary allocation */
+		/* right now we don't handle cross boundary allocation */
 		if (blks < blocks_to_boundary + 1)
 			count += blks;
 		else
@@ -538,7 +538,7 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	while (1) {
 		count = target;
 		/* allocating blocks for indirect blocks and direct blocks */
-		current_block = ext3_new_blocks(handle, inode, goal, &count, err);
+		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
 		if (*err)
 			goto failed_out;
 
@@ -591,7 +591,6 @@ failed_out:
  *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *	as described above and return 0.
  */
-
 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 			int indirect_blks, int *blks, unsigned long goal,
 			int *offsets, Indirect *branch)
@@ -670,27 +669,28 @@ failed:
 }
 
 /**
- *	ext3_splice_branch - splice the allocated branch onto inode.
- *	@inode: owner
- *	@block: (logical) number of block we are adding
- *	@chain: chain of indirect blocks (with a missing link - see
- *		ext3_alloc_branch)
- *	@where: location of missing link
- *	@num:   number of indirect blocks we are adding
- *	@blks:  number of direct blocks we are adding
- *
- *	This function fills the missing link and does all housekeeping needed in
- *	inode (->i_blocks, etc.). In case of success we end up with the full
- *	chain to new block and return 0.
+ * ext3_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *	ext3_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
  */
-
-static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-			      Indirect *where, int num, int blks)
+static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+			long block, Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
-	struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
+	struct ext3_block_alloc_info *block_i;
 	unsigned long current_block;
+
+	block_i = EXT3_I(inode)->i_block_alloc_info;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -705,8 +705,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* That's it */
 
 	*where->p = where->key;
-	/* update host bufferhead or inode to point to
-	 * more just allocated direct blocks blocks */
+
+	/*
+	 * Update the host buffer_head or inode to point to more just allocated
+	 * direct blocks blocks
+	 */
 	if (num == 0 && blks > 1) {
 		current_block = le32_to_cpu(where->key + 1);
 		for (i = 1; i < blks; i++)
@@ -720,7 +723,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	 */
 	if (block_i) {
 		block_i->last_alloc_logical_block = block + blks - 1;
-		block_i->last_alloc_physical_block = le32_to_cpu(where[num].key + blks - 1);
+		block_i->last_alloc_physical_block =
+				le32_to_cpu(where[num].key + blks - 1);
 	}
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -731,7 +735,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	/* had we spliced it onto indirect block? */
 	if (where->bh) {
 		/*
-		 * akpm: If we spliced it onto an indirect block, we haven't
+		 * If we spliced it onto an indirect block, we haven't
 		 * altered the inode.  Note however that if it is being spliced
 		 * onto an indirect block at the very end of the file (the
 		 * file is growing) then we *will* alter the inode to reflect
@@ -756,7 +760,7 @@ err_out:
 	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, where[i].bh);
-		ext3_free_blocks(handle, inode, le32_to_cpu(where[i-1].key), 1);
+		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
 	}
 	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
 
@@ -775,17 +779,16 @@ err_out:
  * allocations is needed - we simply release blocks and do not touch anything
  * reachable from inode.
  *
- * akpm: `handle' can be NULL if create == 0.
+ * `handle' can be NULL if create == 0.
  *
  * The BKL may not be held on entry here.  Be sure to take it early.
  * return > 0, # of blocks mapped or allocated.
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  */
-
-int
-ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
-		unsigned long maxblocks, struct buffer_head *bh_result,
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+		sector_t iblock, unsigned long maxblocks,
+		struct buffer_head *bh_result,
 		int create, int extend_disksize)
 {
 	int err = -EIO;
@@ -802,7 +805,7 @@ ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 
 
 	J_ASSERT(handle != NULL || create == 0);
-	depth = ext3_block_to_path(inode, iblock, offsets, &blocks_to_boundary);
+	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
@@ -998,8 +1001,8 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 /*
  * `handle' can be NULL if create is zero
  */
-struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
-				long block, int create, int * errp)
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+				long block, int create, int *errp)
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
@@ -1029,17 +1032,18 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 			J_ASSERT(create != 0);
 			J_ASSERT(handle != 0);
 
-			/* Now that we do not always journal data, we
-			   should keep in mind whether this should
-			   always journal the new buffer as metadata.
-			   For now, regular file writes use
-			   ext3_get_block instead, so it's not a
-			   problem. */
+			/*
+			 * Now that we do not always journal data, we should
+			 * keep in mind whether this should always journal the
+			 * new buffer as metadata.  For now, regular file
+			 * writes use ext3_get_block instead, so it's not a
+			 * problem.
+			 */
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
 			fatal = ext3_journal_get_create_access(handle, bh);
 			if (!fatal && !buffer_uptodate(bh)) {
-				memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+				memset(bh->b_data,0,inode->i_sb->s_blocksize);
 				set_buffer_uptodate(bh);
 			}
 			unlock_buffer(bh);
@@ -1061,7 +1065,7 @@ err:
 	return NULL;
 }
 
-struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,
+struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
 			       int block, int create, int *err)
 {
 	struct buffer_head * bh;
@@ -1137,9 +1141,8 @@ static int walk_page_buffers(	handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.  
  */
-
-static int do_journal_get_write_access(handle_t *handle, 
-				       struct buffer_head *bh)
+static int do_journal_get_write_access(handle_t *handle,
+					struct buffer_head *bh)
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
@@ -1180,8 +1183,7 @@ out:
 	return ret;
 }
 
-int
-ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_data(handle, bh);
 	if (err)
@@ -1206,7 +1208,6 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
  * ext3 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-
 static int ext3_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
@@ -1416,7 +1417,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * we don't need to open a transaction here.
  */
 static int ext3_ordered_writepage(struct page *page,
-			struct writeback_control *wbc)
+				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head *page_bufs;
@@ -1907,11 +1908,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *		c) free the subtrees growing from the inode past the @chain[0].
  *			(no partially truncated stuff there).  */
 
-static Indirect *ext3_find_shared(struct inode *inode,
-				int depth,
-				int offsets[4],
-				Indirect chain[4],
-				__le32 *top)
+static Indirect *ext3_find_shared(struct inode *inode, int depth,
+			int offsets[4], Indirect chain[4], __le32 *top)
 {
 	Indirect *partial, *p;
 	int k, err;
@@ -1950,8 +1948,7 @@ static Indirect *ext3_find_shared(struct inode *inode,
 	}
 	/* Writer: end */
 
-	while(partial > p)
-	{
+	while(partial > p) {
 		brelse(partial->bh);
 		partial--;
 	}
@@ -1967,10 +1964,9 @@ no_top:
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void
-ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
-		unsigned long block_to_free, unsigned long count,
-		__le32 *first, __le32 *last)
+static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, unsigned long block_to_free,
+		unsigned long count, __le32 *first, __le32 *last)
 {
 	__le32 *p;
 	if (try_to_extend_transaction(handle, inode)) {
@@ -2231,8 +2227,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * that's fine - as long as they are linked from the inode, the post-crash
  * ext3_truncate() run will find them and release them.
  */
-
-void ext3_truncate(struct inode * inode)
+void ext3_truncate(struct inode *inode)
 {
 	handle_t *handle;
 	struct ext3_inode_info *ei = EXT3_I(inode);
@@ -2356,29 +2351,26 @@ void ext3_truncate(struct inode * inode)
 do_indirects:
 	/* Kill the remaining (whole) subtrees */
 	switch (offsets[0]) {
-		default:
-			nr = i_data[EXT3_IND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 1);
-				i_data[EXT3_IND_BLOCK] = 0;
-			}
-		case EXT3_IND_BLOCK:
-			nr = i_data[EXT3_DIND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 2);
-				i_data[EXT3_DIND_BLOCK] = 0;
-			}
-		case EXT3_DIND_BLOCK:
-			nr = i_data[EXT3_TIND_BLOCK];
-			if (nr) {
-				ext3_free_branches(handle, inode, NULL,
-						   &nr, &nr+1, 3);
-				i_data[EXT3_TIND_BLOCK] = 0;
-			}
-		case EXT3_TIND_BLOCK:
-			;
+	default:
+		nr = i_data[EXT3_IND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT3_IND_BLOCK] = 0;
+		}
+	case EXT3_IND_BLOCK:
+		nr = i_data[EXT3_DIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT3_DIND_BLOCK] = 0;
+		}
+	case EXT3_DIND_BLOCK:
+		nr = i_data[EXT3_TIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT3_TIND_BLOCK] = 0;
+		}
+	case EXT3_TIND_BLOCK:
+		;
 	}
 
 	ext3_discard_reservation(inode);
@@ -2387,8 +2379,10 @@ do_indirects:
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	ext3_mark_inode_dirty(handle, inode);
 
-	/* In a multi-transaction truncate, we only make the final
-	 * transaction synchronous */
+	/*
+	 * In a multi-transaction truncate, we only make the final transaction
+	 * synchronous
+	 */
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 out_stop:
@@ -2414,20 +2408,16 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
 	struct ext3_group_desc * gdp;
 
 
-	if ((ino != EXT3_ROOT_INO &&
-		ino != EXT3_JOURNAL_INO &&
-		ino != EXT3_RESIZE_INO &&
-		ino < EXT3_FIRST_INO(sb)) ||
-		ino > le32_to_cpu(
-			EXT3_SB(sb)->s_es->s_inodes_count)) {
-		ext3_error (sb, "ext3_get_inode_block",
+	if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO &&
+		ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) ||
+		ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
+		ext3_error(sb, "ext3_get_inode_block",
 			    "bad inode number: %lu", ino);
 		return 0;
 	}
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 	if (block_group >= EXT3_SB(sb)->s_groups_count) {
-		ext3_error (sb, "ext3_get_inode_block",
-			    "group >= groups count");
+		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
 		return 0;
 	}
 	smp_rmb();
@@ -2440,7 +2430,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	gdp = (struct ext3_group_desc *) bh->b_data;
+	gdp = (struct ext3_group_desc *)bh->b_data;
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
@@ -2989,7 +2979,7 @@ err_out:
 
 
 /*
- * akpm: how many blocks doth make a writepage()?
+ * How many blocks doth make a writepage()?
  *
  * With N blocks per page, it may be:
  * N data blocks
@@ -3079,8 +3069,8 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 }
 
 /*
- * akpm: What we do here is to mark the in-core inode as clean
- * with respect to inode dirtiness (it may still be data-dirty).
+ * What we do here is to mark the in-core inode as clean with respect to inode
+ * dirtiness (it may still be data-dirty).
  * This means that the in-core inode may be reaped by prune_icache
  * without having to perform any I/O.  This is a very good thing,
  * because *any* task may call prune_icache - even ones which
@@ -3112,7 +3102,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
 }
 
 /*
- * akpm: ext3_dirty_inode() is called from __mark_inode_dirty()
+ * ext3_dirty_inode() is called from __mark_inode_dirty()
  *
  * We're really interested in the case where a file is being extended.
  * i_size has been changed by generic_commit_write() and we thus need
@@ -3148,7 +3138,7 @@ out:
 	return;
 }
 
-#ifdef AKPM
+#if 0
 /* 
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
@@ -3156,8 +3146,7 @@ out:
  * returns no iloc structure, so the caller needs to repeat the iloc
  * lookup to mark the inode dirty later.
  */
-static inline int
-ext3_pin_inode(handle_t *handle, struct inode *inode)
+static int ext3_pin_inode(handle_t *handle, struct inode *inode)
 {
 	struct ext3_iloc iloc;
 
-- 
cgit v1.2.3


From f91a2ad2ed97099fb565e3336f8df0df717f2ba9 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:04 -0800
Subject: [PATCH] ext3: multi-block get_block()

Mingming Cao recently added multi-block allocation support for ext3,
currently used only by DIO.  I added support to map multiple blocks for
mpage_readpages().  This patch add support for ext3_get_block() to deal
with multi-block mapping.  Basically it renames ext3_direct_io_get_blocks()
as ext3_get_block().

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index e68587a7f36..48ae0339af1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -941,9 +941,8 @@ out:
 
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
-static int
-ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create)
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
 	int ret = 0;
@@ -992,12 +991,6 @@ get_block:
 	return ret;
 }
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create)
-{
-	return ext3_direct_io_get_blocks(inode, iblock, bh_result, create);
-}
-
 /*
  * `handle' can be NULL if create is zero
  */
@@ -1648,11 +1641,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 
 				 offset, nr_segs,
-				 ext3_direct_io_get_blocks, NULL);
+				 ext3_get_block, NULL);
 
 	/*
-	 * Reacquire the handle: ext3_direct_io_get_block() can restart the
-	 * transaction
+	 * Reacquire the handle: ext3_get_block() can restart the transaction
 	 */
 	handle = journal_current_handle();
 
-- 
cgit v1.2.3


From a0e9285233a32edf267d27cd03fe0056951422cf Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 26 Mar 2006 01:38:05 -0800
Subject: [PATCH] ext3: "nobh" writeback support for filesystems blocksize <
 pagesize

There is no valid reason why we can't support "nobh" option for filesystems
with blocksize != PAGESIZE.

This patch lets them use "nobh" option for writeback mode for blocksize <
pagesize.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 86e443182de..f8a5266ea1f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1678,12 +1678,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (test_opt(sb, NOBH)) {
-		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
-			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
-				"since filesystem blocksize doesn't match "
-				"pagesize\n");
-			clear_opt(sbi->s_mount_opt, NOBH);
-		}
 		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
 			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
 				"its supported only with writeback mode\n");
-- 
cgit v1.2.3


From 4dee26b7e26eb3c78da8bf6c1c52ee5419145b28 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 26 Mar 2006 01:38:10 -0800
Subject: [PATCH] hrtimers: remove it_real_value calculation from proc/*/stat

Remove the it_real_value from /proc/*/stat, during 1.2.x was the last time it
returned useful data (as it was directly maintained by the scheduler), now
it's only a waste of time to calculate it.  Return 0 instead.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/array.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7eb1bd7f800..7a76ad57023 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -330,7 +330,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	unsigned long  min_flt = 0,  maj_flt = 0;
 	cputime_t cutime, cstime, utime, stime;
 	unsigned long rsslim = 0;
-	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
 
@@ -386,7 +385,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			utime = cputime_add(utime, task->signal->utime);
 			stime = cputime_add(stime, task->signal->stime);
 		}
-		it_real_value = task->signal->real_timer.expires;
 	}
 	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
 	read_unlock(&tasklist_lock);
@@ -413,7 +411,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	start_time = nsec_to_clock_t(start_time);
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
 		task->pid,
 		tcomm,
@@ -435,7 +433,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		priority,
 		nice,
 		num_threads,
-		(long) ktime_to_clock_t(it_real_value),
 		start_time,
 		vsize,
 		mm ? get_mm_rss(mm) : 0,
-- 
cgit v1.2.3


From 05cfb614ddbf3181540ce09d44d96486f8ba8d6a Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 26 Mar 2006 01:38:12 -0800
Subject: [PATCH] hrtimers: remove data field

The nanosleep cleanup allows to remove the data field of hrtimer.  The
callback function can use container_of() to get it's own data.  Since the
hrtimer structure is anyway embedded in other structures, this adds no
overhead.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 995cba3c62b..c7397c46ad6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -632,7 +632,7 @@ static int de_thread(struct task_struct *tsk)
 		 * synchronize with any firing (by calling del_timer_sync)
 		 * before we can safely let the old group leader die.
 		 */
-		sig->real_timer.data = current;
+		sig->tsk = current;
 		spin_unlock_irq(lock);
 		if (hrtimer_cancel(&sig->real_timer))
 			hrtimer_restart(&sig->real_timer);
-- 
cgit v1.2.3


From b9a2838cc26c4c5369fbd2482acbc5ab60573479 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <mita@miraclelinux.com>
Date: Sun, 26 Mar 2006 01:39:53 -0800
Subject: [PATCH] bitops: ntfs: remove generic_ffs()

Now the only user who are using generic_ffs() is ntfs filesystem.  This patch
isolates generic_ffs() as ntfs_ffs() for ntfs.

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
Cc: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ntfs/logfile.c |  4 ++--
 fs/ntfs/mft.c     |  2 +-
 fs/ntfs/ntfs.h    | 29 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 0fd70295cca..4af2ad1193e 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -515,10 +515,10 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 		log_page_size = PAGE_CACHE_SIZE;
 	log_page_mask = log_page_size - 1;
 	/*
-	 * Use generic_ffs() instead of ffs() to enable the compiler to
+	 * Use ntfs_ffs() instead of ffs() to enable the compiler to
 	 * optimize log_page_size and log_page_bits into constants.
 	 */
-	log_page_bits = generic_ffs(log_page_size) - 1;
+	log_page_bits = ntfs_ffs(log_page_size) - 1;
 	size &= ~(s64)(log_page_size - 1);
 	/*
 	 * Ensure the log file is big enough to store at least the two restart
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 4e72bc7afdf..2438c00ec0c 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2670,7 +2670,7 @@ mft_rec_already_initialized:
 			ni->name_len = 4;
 
 			ni->itype.index.block_size = 4096;
-			ni->itype.index.block_size_bits = generic_ffs(4096) - 1;
+			ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1;
 			ni->itype.index.collation_rule = COLLATION_FILE_NAME;
 			if (vol->cluster_size <= ni->itype.index.block_size) {
 				ni->itype.index.vcn_size = vol->cluster_size;
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 0624c8ef4d9..166142960b5 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -132,4 +132,33 @@ extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
 /* From fs/ntfs/upcase.c */
 extern ntfschar *generate_default_upcase(void);
 
+static inline int ntfs_ffs(int x)
+{
+	int r = 1;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff)) {
+		x >>= 16;
+		r += 16;
+	}
+	if (!(x & 0xff)) {
+		x >>= 8;
+		r += 8;
+	}
+	if (!(x & 0xf)) {
+		x >>= 4;
+		r += 4;
+	}
+	if (!(x & 3)) {
+		x >>= 2;
+		r += 2;
+	}
+	if (!(x & 1)) {
+		x >>= 1;
+		r += 1;
+	}
+	return r;
+}
+
 #endif /* _LINUX_NTFS_H */
-- 
cgit v1.2.3


From 3be7d29fb9c02962b49be636b30ca9cadd0fda4b Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Sun, 26 Mar 2006 18:58:31 +0200
Subject: Remove ugly debugging stuff

Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/dcache.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0c78f05819d..0778f49f993 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 
-/* #define DCACHE_DEBUG 1 */
 
 int sysctl_vfs_cache_pressure = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -603,10 +602,6 @@ resume:
 		 */
 		if (!list_empty(&dentry->d_subdirs)) {
 			this_parent = dentry;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, found);
-#endif
 			goto repeat;
 		}
 	}
@@ -616,10 +611,6 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
 	if (this_parent != parent) {
 		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
-this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
-#endif
 		goto resume;
 	}
 out:
-- 
cgit v1.2.3


From 86c79cbcee7d617c5aaf9b4f7e6cc093397d3451 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:41 -0800
Subject: [PATCH] uml: fix hostfs stack corruption

Noted by Oleg Drokin:
We initialized an extra slot of struct kstatfs.spare, sometimes
causing stack corruption.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Oleg Drokin <green@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_user.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b97809deba6..23b7cee7212 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -360,7 +360,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	spare_out[2] = buf.f_spare[2];
 	spare_out[3] = buf.f_spare[3];
 	spare_out[4] = buf.f_spare[4];
-	spare_out[5] = buf.f_spare[5];
 	return(0);
 }
 
-- 
cgit v1.2.3


From 718c604a28e35848754a65b839e4877ec34b2fca Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:42 -0800
Subject: [PATCH] autofs4: lookup white space cleanup

Whitespace and formating changes to lookup code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 62d8d4acb8b..2c676bd44ac 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -296,20 +296,20 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 {
 	struct super_block *sb = mnt->mnt_sb;
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
-	struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
 	/* Block on any pending expiry here; invalidate the dentry
            when expiration is done to trigger mount request with a new
            dentry */
-	if (de_info && (de_info->flags & AUTOFS_INF_EXPIRING)) {
+	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 		DPRINTK("waiting for expire %p name=%.*s",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
-		
+
 		DPRINTK("expire done status=%d", status);
-		
+
 		/*
 		 * If the directory still exists the mount request must
 		 * continue otherwise it can't be followed at the right
@@ -323,18 +323,21 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	DPRINTK("dentry=%p %.*s ino=%p",
 		 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 
-	/* Wait for a pending mount, triggering one if there isn't one already */
+	/*
+	 * Wait for a pending mount, triggering one if there
+	 * isn't one already
+	 */
 	if (dentry->d_inode == NULL) {
 		DPRINTK("waiting for mount name=%.*s",
 			 dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-		 
+
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
 			return 0; /* Try to get the kernel to invalidate this dentry */
-		
+
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
@@ -367,8 +370,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/* We don't update the usages for the autofs daemon itself, this
-	   is necessary for recursive autofs mounts */
+	/*
+	 * We don't update the usages for the autofs daemon itself, this
+	 * is necessary for recursive autofs mounts
+	 */
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
@@ -384,9 +389,9 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
  * yet completely filled in, and revalidate has to delay such
  * lookups..
  */
-static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode * dir = dentry->d_parent->d_inode;
+	struct inode *dir = dentry->d_parent->d_inode;
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
@@ -462,12 +467,13 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	DPRINTK("name = %.*s",
 		dentry->d_name.len, dentry->d_name.name);
 
+	/* File name too long to exist */
 	if (dentry->d_name.len > NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
+		return ERR_PTR(-ENAMETOOLONG);
 
 	sbi = autofs4_sbi(dir->i_sb);
-
 	oz_mode = autofs4_oz_mode(sbi);
+
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, process_group(current), sbi->catatonic, oz_mode);
 
@@ -519,7 +525,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	 * doesn't do the right thing for all system calls, but it should
 	 * be OK for the operations we permit from an autofs.
 	 */
-	if ( dentry->d_inode && d_unhashed(dentry) )
+	if (dentry->d_inode && d_unhashed(dentry))
 		return ERR_PTR(-ENOENT);
 
 	return NULL;
-- 
cgit v1.2.3


From f360ce3be466d50de153b001b24561ca7593042b Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:43 -0800
Subject: [PATCH] autofs4: use libfs routines for readdir

Change readdir routines to use the cursor based routines in libfs.c.  This
removes reliance on old readdir code from 2.4 and should improve efficiency of
readdir in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 126 +++++++++++++++++-------------------------------------
 1 file changed, 40 insertions(+), 86 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2c676bd44ac..af9a4c6bbad 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -30,7 +30,6 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs4_dcache_readdir(struct file *, void *, filldir_t);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -82,7 +81,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 
 	DPRINTK("needs_reghost = %d", sbi->needs_reghost);
 
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 /* Update usage from here to top of tree, so that scan of
@@ -103,75 +102,21 @@ static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
 	spin_unlock(&dcache_lock);
 }
 
-/*
- * From 2.4 kernel readdir.c
- */
-static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
-{
-	int i;
-	struct dentry *dentry = filp->f_dentry;
-
-	i = filp->f_pos;
-	switch (i) {
-		case 0:
-			if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		case 1:
-			if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		default: {
-			struct list_head *list;
-			int j = i-2;
-
-			spin_lock(&dcache_lock);
-			list = dentry->d_subdirs.next;
-
-			for (;;) {
-				if (list == &dentry->d_subdirs) {
-					spin_unlock(&dcache_lock);
-					return 0;
-				}
-				if (!j)
-					break;
-				j--;
-				list = list->next;
-			}
-
-			while(1) {
-				struct dentry *de = list_entry(list,
-						struct dentry, d_u.d_child);
-
-				if (!d_unhashed(de) && de->d_inode) {
-					spin_unlock(&dcache_lock);
-					if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
-						break;
-					spin_lock(&dcache_lock);
-				}
-				filp->f_pos++;
-				list = list->next;
-				if (list != &dentry->d_subdirs)
-					continue;
-				spin_unlock(&dcache_lock);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct vfsmount *mnt = file->f_vfsmnt;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor;
 	int status;
 
+	status = dcache_dir_open(inode, file);
+	if (status)
+		goto out;
+
+	cursor = file->private_data;
+	cursor->d_fsdata = NULL;
+
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
 
@@ -180,12 +125,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		dcache_dir_close(inode, file);
+		status = -EBUSY;
+		goto out;
 	}
 
+	status = -ENOENT;
 	if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
 		struct nameidata nd;
-		int empty;
+		int empty, ret;
 
 		/* In case there are stale directory dentrys from a failed mount */
 		spin_lock(&dcache_lock);
@@ -195,13 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!empty)
 			d_invalidate(dentry);
 
-		nd.dentry = dentry;
-		nd.mnt = mnt;
 		nd.flags = LOOKUP_DIRECTORY;
-		status = (dentry->d_op->d_revalidate)(dentry, &nd);
+		ret = (dentry->d_op->d_revalidate)(dentry, &nd);
 
-		if (!status)
-			return -ENOENT;
+		if (!ret) {
+			dcache_dir_close(inode, file);
+			goto out;
+		}
 	}
 
 	if (d_mountpoint(dentry)) {
@@ -212,25 +160,29 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
 			dput(fp_dentry);
 			mntput(fp_mnt);
-			return -ENOENT;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
 
 		fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
 		status = PTR_ERR(fp);
 		if (IS_ERR(fp)) {
-			file->private_data = NULL;
-			return status;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
-		file->private_data = fp;
+		cursor->d_fsdata = fp;
 	}
-out:
 	return 0;
+out:
+	return status;
 }
 
 static int autofs4_dir_close(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
+	int status = 0;
 
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -240,26 +192,28 @@ static int autofs4_dir_close(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		status = -EBUSY;
+		goto out;
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
-
-		if (!fp)
-			return -ENOENT;
-
+		struct file *fp = cursor->d_fsdata;
+		if (!fp) {
+			status = -ENOENT;
+			goto out;
+		}
 		filp_close(fp, current->files);
-		file->private_data = NULL;
 	}
 out:
-	return 0;
+	dcache_dir_close(inode, file);
+	return status;
 }
 
 static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
 	int status;
 
 	DPRINTK("file=%p dentry=%p %.*s",
@@ -274,7 +228,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
+		struct file *fp = cursor->d_fsdata;
 
 		if (!fp)
 			return -ENOENT;
@@ -289,7 +243,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 		return status;
 	}
 out:
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
-- 
cgit v1.2.3


From 2d753e62b87ab2fc72bb4ff5153791d32ff9c08e Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: can't mount due to mount point dir not empty

Addresse a problem where stale dentrys stop mounts from happening.

When a mount point directory is pre-created and a non-existent entry within it
is requested a dentry ends up being created within the mount point directory
which stops future mounts.  The problem is solved by ignoring negative,
unhashed dentrys in the mount point d_subdirs list.

Additionally the apparent cacheing of -ENOENT returns from requests is
removed.  The test on d_time is a tautology and d_time is not initialised and
has an unexpected value.  In short it doesn't do what it's meant to.

The cacheing of failed requests to the daemon is important and will be
followed up later.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 8 --------
 fs/autofs4/root.c     | 9 ++++-----
 2 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f54c5b21f87..eea25934da6 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -41,14 +41,6 @@
 
 #define AUTOFS_SUPER_MAGIC 0x0187
 
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away.  This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ)	/* 1 minute */
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index af9a4c6bbad..c7ff3577434 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -294,14 +294,13 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
-			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 1;
+			return 0;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 1;
+			return 0;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -360,13 +359,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	/* Negative dentry.. invalidate if "old" */
 	if (dentry->d_inode == NULL)
-		return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
+		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    list_empty(&dentry->d_subdirs)) {
+	    simple_empty_nolock(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From 1f5f2c3059ae8cc23cb3303daf324b06ba79517a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: expire code readability cleanup

Change the names of the boolean functions autofs4_check_mount and
autofs4_check_tree to autofs4_mount_busy and autofs4_tree_busy respectively
and alters their return codes to suit in order to aid code readabilty.

A couple of white space cleanups are included as well.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 54 ++++++++++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index dc39589df16..bcc17f53369 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -16,7 +16,7 @@
 
 static unsigned long now;
 
-/* Check if a dentry can be expired return 1 if it can else return 0 */
+/* Check if a dentry can be expired */
 static inline int autofs4_can_expire(struct dentry *dentry,
 					unsigned long timeout, int do_now)
 {
@@ -41,14 +41,13 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 		     attempts if expire fails the first time */
 		ino->last_used = now;
 	}
-
 	return 1;
 }
 
-/* Check a mount point for busyness return 1 if not busy, otherwise */
-static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
+/* Check a mount point for busyness */
+static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
-	int status = 0;
+	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -65,7 +64,7 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
 
 	/* The big question */
 	if (may_umount_tree(mnt) == 0)
-		status = 1;
+		status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -75,30 +74,29 @@ done:
 
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
- * Return 1 if the tree is busy or 0 otherwise
  */
-static int autofs4_check_tree(struct vfsmount *mnt,
-	       		      struct dentry *top,
-			      unsigned long timeout,
-			      int do_now)
+static int autofs4_tree_busy(struct vfsmount *mnt,
+	       		     struct dentry *top,
+			     unsigned long timeout,
+			     int do_now)
 {
 	struct dentry *this_parent = top;
 	struct list_head *next;
 
-	DPRINTK("parent %p %.*s",
+	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
 
 	/* Negative dentry - give up */
 	if (!simple_positive(top))
-		return 0;
+		return 1;
 
 	/* Timeout of a tree mount is determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
-		return 0;
+		return 1;
 
 	/* Is someone visiting anywhere in the tree ? */
 	if (may_umount_tree(mnt))
-		return 0;
+		return 1;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -126,9 +124,9 @@ resume:
 
 		if (d_mountpoint(dentry)) {
 			/* First busy => tree busy */
-			if (!autofs4_check_mount(mnt, dentry)) {
+			if (autofs4_mount_busy(mnt, dentry)) {
 				dput(dentry);
-				return 0;
+				return 1;
 			}
 		}
 
@@ -144,7 +142,7 @@ resume:
 	}
 	spin_unlock(&dcache_lock);
 
-	return 1;
+	return 0;
 }
 
 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
@@ -188,7 +186,7 @@ resume:
 				goto cont;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry))
+			if (!autofs4_mount_busy(mnt, dentry))
 				return dentry;
 
 		}
@@ -241,7 +239,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
-		if ( !simple_positive(dentry) ) {
+		if (!simple_positive(dentry)) {
 			next = next->next;
 			continue;
 		}
@@ -259,21 +257,21 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				goto next;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry)) {
+			if (!autofs4_mount_busy(mnt, dentry)) {
 				expired = dentry;
 				break;
 			}
 			goto next;
 		}
 
-		if ( simple_empty(dentry) )
+		if (simple_empty(dentry))
 			goto next;
 
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
 			/* Lock the tree as we must expire as a whole */
 			spin_lock(&sbi->fs_lock);
-			if (autofs4_check_tree(mnt, dentry, timeout, do_now)) {
+			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
 				struct autofs_info *inf = autofs4_dentry_ino(dentry);
 
 				/* Set this flag early to catch sys_chdir and the like */
@@ -297,7 +295,7 @@ next:
 		next = next->next;
 	}
 
-	if ( expired ) {
+	if (expired) {
 		DPRINTK("returning %p %.*s",
 			expired, (int)expired->d_name.len, expired->d_name.name);
 		spin_lock(&dcache_lock);
@@ -352,16 +350,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		return -EFAULT;
 
 	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
-		struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
                    little easier */
-		de_info->flags |= AUTOFS_INF_EXPIRING;
+		ino->flags |= AUTOFS_INF_EXPIRING;
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
-		de_info->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~AUTOFS_INF_EXPIRING;
 		dput(dentry);
 	}
-		
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 1ce12bad85863478619688c0c7363f93a9e5edb8 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:45 -0800
Subject: [PATCH] autofs4: simplify expire tree traversal

Simplify the expire tree traversal code by using a function from namespace.c
to calculate the next entry in the top down tree traversals carried out during
the expire operation.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 102 +++++++++++++++++++++-------------------------------
 1 file changed, 40 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index bcc17f53369..165fe9e2d57 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -72,6 +72,27 @@ done:
 	return status;
 }
 
+/*
+ * Calculate next entry in top down tree traversal.
+ * From next_mnt in namespace.c - elegant.
+ */
+static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+{
+	struct list_head *next = p->d_subdirs.next;
+
+	if (next == &p->d_subdirs) {
+		while (1) {
+			if (p == root)
+				return NULL;
+			next = p->d_u.d_child.next;
+			if (next != &p->d_parent->d_subdirs)
+				break;
+			p = p->d_parent;
+		}
+	}
+	return list_entry(next, struct dentry, d_u.d_child);
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -80,8 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct dentry *this_parent = top;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
@@ -99,49 +119,28 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		return 1;
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
-
-		if (!simple_empty_nolock(dentry)) {
-			this_parent = dentry;
-			goto repeat;
-		}
+			p, (int) p->d_name.len, p->d_name.name);
 
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* First busy => tree busy */
-			if (autofs4_mount_busy(mnt, dentry)) {
-				dput(dentry);
+			if (autofs4_mount_busy(mnt, p)) {
+				dput(p);
 				return 1;
 			}
 		}
-
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != top) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return 0;
 }
 
@@ -150,59 +149,38 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 					   unsigned long timeout,
 					   int do_now)
 {
-	struct dentry *this_parent = parent;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = parent; p; p = next_dentry(p, parent)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
+			p, (int) p->d_name.len, p->d_name.name);
 
-		if (!list_empty(&dentry->d_subdirs)) {
-			this_parent = dentry;
-			goto repeat;
-		}
-
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			if (!autofs4_can_expire(p, timeout, do_now))
 				goto cont;
 
 			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry))
-				return dentry;
+			if (!autofs4_mount_busy(mnt, p))
+				return p;
 
 		}
 cont:
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 1aff3c8b0511b5bb54acf7859e0c6ec9ae7287a9 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:46 -0800
Subject: [PATCH] autofs4: fix false negative return from expire

Fix the case where an expire returns busy on a tree mount when it is in fact
not busy.  This case was overlooked when the patch to prevent the expiring
away of "scaffolding" directories for tree mounts was applied.

The problem arises when a tree of mounts is a member of a map with other keys.
 The current logic will not expire the tree if any other mount in the map is
busy.  The solution is to maintain a "minimum" use count for each autofs
dentry and compare this to the actual dentry usage count during expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  1 +
 fs/autofs4/expire.c   | 34 +++++++++++++++++++++++++---------
 fs/autofs4/inode.c    | 12 +++++++++++-
 fs/autofs4/root.c     | 23 ++++++++++++++++++++++-
 4 files changed, 59 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index eea25934da6..00da71d0f32 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -55,6 +55,7 @@ struct autofs_info {
 
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
+	atomic_t count;
 
 	mode_t	mode;
 	size_t	size;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 165fe9e2d57..053d92a745b 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -101,6 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
+	struct autofs_info *ino;
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -110,14 +111,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 	if (!simple_positive(top))
 		return 1;
 
-	/* Timeout of a tree mount is determined by its top dentry */
-	if (!autofs4_can_expire(top, timeout, do_now))
-		return 1;
-
-	/* Is someone visiting anywhere in the tree ? */
-	if (may_umount_tree(mnt))
-		return 1;
-
 	spin_lock(&dcache_lock);
 	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
@@ -130,17 +123,40 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		p = dget(p);
 		spin_unlock(&dcache_lock);
 
+		/*
+		 * Is someone visiting anywhere in the subtree ?
+		 * If there's no mount we need to check the usage
+		 * count for the autofs dentry.
+		 */
+		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
-			/* First busy => tree busy */
 			if (autofs4_mount_busy(mnt, p)) {
 				dput(p);
 				return 1;
 			}
+		} else {
+			unsigned int ino_count = atomic_read(&ino->count);
+
+			/* allow for dget above and top is already dgot */
+			if (p == top)
+				ino_count += 2;
+			else
+				ino_count++;
+
+			if (atomic_read(&p->d_count) > ino_count) {
+				dput(p);
+				return 1;
+			}
 		}
 		dput(p);
 		spin_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
+
+	/* Timeout of a tree mount is ultimately determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
 	return 0;
 }
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 1ad98d48e55..2335b1d6490 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -46,6 +46,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	ino->size = 0;
 
 	ino->last_used = jiffies;
+	atomic_set(&ino->count, 0);
 
 	ino->sbi = sbi;
 
@@ -64,10 +65,19 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 
 void autofs4_free_ino(struct autofs_info *ino)
 {
+	struct autofs_info *p_ino;
+
 	if (ino->dentry) {
 		ino->dentry->d_fsdata = NULL;
-		if (ino->dentry->d_inode)
+		if (ino->dentry->d_inode) {
+			struct dentry *parent = ino->dentry->d_parent;
+			if (atomic_dec_and_test(&ino->count)) {
+				p_ino = autofs4_dentry_ino(parent);
+				if (p_ino && parent != ino->dentry)
+					atomic_dec(&p_ino->count);
+			}
 			dput(ino->dentry);
+		}
 		ino->dentry = NULL;
 	}
 	if (ino->free)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c7ff3577434..d196712c4b9 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -490,6 +490,7 @@ static int autofs4_dir_symlink(struct inode *dir,
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 	char *cp;
 
@@ -523,6 +524,10 @@ static int autofs4_dir_symlink(struct inode *dir,
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
 	dir->i_mtime = CURRENT_TIME;
@@ -549,11 +554,17 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	/* This allows root to remove symlinks */
 	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
 		return -EACCES;
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
 
 	dentry->d_inode->i_size = 0;
@@ -570,6 +581,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
@@ -584,8 +596,12 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
-
 	dentry->d_inode->i_size = 0;
 	dentry->d_inode->i_nlink = 0;
 
@@ -599,6 +615,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 
 	if ( !autofs4_oz_mode(sbi) )
@@ -621,6 +638,10 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 	dir->i_nlink++;
 	dir->i_mtime = CURRENT_TIME;
-- 
cgit v1.2.3


From e0a7aae94030b878601eb67686b696de4a3764f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:47 -0800
Subject: [PATCH] autofs4: expire mounts that hold no (extra) references only

Alter the expire semantics that define how "busyness" is determined.
Currently a last_used counter is updated on every revalidate from processes
other than the mount owner process group.

This patch changes that so that an expire candidate is busy only if it has a
reference count greater than the expected minimum, such as when there is an
open file or working directory in use.

This method is the only way that busyness can be established for direct mounts
within the new implementation.  For consistency the expire semantic is made
the same for all mounts.

A side effect of the patch is that mounts which remain mounted unessessarily
in the presence of some GUI programs that scan the filesystem should now
expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 36 ++++++++++++++++++++++--------------
 fs/autofs4/root.c   |  4 ++++
 2 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 053d92a745b..6ae2fc8233f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -47,6 +47,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 /* Check a mount point for busyness */
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
+	struct dentry *top = dentry;
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
@@ -62,9 +63,14 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	if (is_autofs4_dentry(dentry))
 		goto done;
 
-	/* The big question */
-	if (may_umount_tree(mnt) == 0)
-		status = 0;
+	/* Update the expiry counter if fs is busy */
+	if (may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		ino->last_used = jiffies;
+		goto done;
+	}
+
+	status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -101,7 +107,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct autofs_info *ino;
+	struct autofs_info *top_ino = autofs4_dentry_ino(top);
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -127,14 +133,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
 		 * count for the autofs dentry.
+		 * If the fs is busy update the expiry counter.
 		 */
-		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
 			if (autofs4_mount_busy(mnt, p)) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		} else {
+			struct autofs_info *ino = autofs4_dentry_ino(p);
 			unsigned int ino_count = atomic_read(&ino->count);
 
 			/* allow for dget above and top is already dgot */
@@ -144,6 +152,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 				ino_count++;
 
 			if (atomic_read(&p->d_count) > ino_count) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
@@ -183,14 +192,13 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 		spin_unlock(&dcache_lock);
 
 		if (d_mountpoint(p)) {
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(p, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, p))
 				goto cont;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, p))
+			/* Can we expire this guy */
+			if (autofs4_can_expire(p, timeout, do_now))
 				return p;
-
 		}
 cont:
 		dput(p);
@@ -246,12 +254,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, dentry))
 				goto next;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry)) {
+			/* Can we expire this guy */
+			if (autofs4_can_expire(dentry, timeout, do_now)) {
 				expired = dentry;
 				break;
 			}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d196712c4b9..3a4a5b47575 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -330,6 +330,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
+	/* Initialize expiry counter after successful mount */
+	if (ino)
+		ino->last_used = jiffies;
+
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-- 
cgit v1.2.3


From 862b110f0132401e422783bf68521ade3dc67578 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:48 -0800
Subject: [PATCH] autofs4: remove update_atime unused function

Remove the update of i_atime from autofs4 in favour of having VFS update it.
i_atime is never used for expire in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 38 ++++----------------------------------
 1 file changed, 4 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3a4a5b47575..72dca3335e2 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -84,24 +84,6 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 	return dcache_readdir(file, dirent, filldir);
 }
 
-/* Update usage from here to top of tree, so that scan of
-   top-level directories will give a useful result */
-static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
-{
-	struct dentry *top = dentry->d_sb->s_root;
-
-	spin_lock(&dcache_lock);
-	for(; dentry != top; dentry = dentry->d_parent) {
-		struct autofs_info *ino = autofs4_dentry_ino(dentry);
-
-		if (ino) {
-			touch_atime(mnt, dentry);
-			ino->last_used = jiffies;
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
@@ -246,10 +228,9 @@ out:
 	return dcache_readdir(file, dirent, filldir);
 }
 
-static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
+static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
-	struct super_block *sb = mnt->mnt_sb;
-	struct autofs_sb_info *sbi = autofs4_sbi(sb);
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
@@ -323,13 +304,6 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/*
-	 * We don't update the usages for the autofs daemon itself, this
-	 * is necessary for recursive autofs mounts
-	 */
-	if (!autofs4_oz_mode(sbi))
-		autofs4_update_usage(mnt, dentry);
-
 	/* Initialize expiry counter after successful mount */
 	if (ino)
 		ino->last_used = jiffies;
@@ -357,7 +331,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 
@@ -374,15 +348,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 	spin_unlock(&dcache_lock);
 
-	/* Update the usage list */
-	if (!oz_mode)
-		autofs4_update_usage(nd->mnt, dentry);
-
 	return 1;
 }
 
-- 
cgit v1.2.3


From d7c4a5f1080a61fd4a3a00ba5f741986f8fb71f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: add a show mount options for proc filesystem

Add show_options method to display autofs4 mount options in the proc
filesystem.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 +++
 fs/autofs4/inode.c    | 37 +++++++++++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 00da71d0f32..d82a019ff8e 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -87,11 +87,14 @@ struct autofs_wait_queue {
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
+	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
 	int version;
 	int sub_version;
+	int min_proto;
+	int max_proto;
 	unsigned long exp_timeout;
 	int reghost_enabled;
 	int needs_reghost;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2335b1d6490..d9a71dab40f 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/parser.h>
 #include <linux/bitops.h>
@@ -163,9 +164,26 @@ static void autofs4_put_super(struct super_block *sb)
 	DPRINTK("shutting down");
 }
 
+static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
+
+	if (!sbi)
+		return 0;
+
+	seq_printf(m, ",fd=%d", sbi->pipefd);
+	seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+	seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
+	seq_printf(m, ",minproto=%d", sbi->min_proto);
+	seq_printf(m, ",maxproto=%d", sbi->max_proto);
+
+	return 0;
+}
+
 static struct super_operations autofs4_sops = {
 	.put_super	= autofs4_put_super,
 	.statfs		= simple_statfs,
+	.show_options	= autofs4_show_options,
 };
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
@@ -261,7 +279,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	int pipefd;
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
-	int minproto, maxproto;
 
 	sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if ( !sbi )
@@ -273,12 +290,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->root = NULL;
+	sbi->pipefd = -1;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
 	sbi->oz_pgrp = process_group(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->min_proto = 0;
+	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
@@ -311,22 +331,26 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
 			  &sbi->oz_pgrp,
-			  &minproto, &maxproto)) {
+			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
 	/* Couldn't this be tested earlier? */
-	if (maxproto < AUTOFS_MIN_PROTO_VERSION ||
-	    minproto > AUTOFS_MAX_PROTO_VERSION) {
+	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
+	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
 		printk("autofs: kernel does not match daemon version "
 		       "daemon (%d, %d) kernel (%d, %d)\n",
-			minproto, maxproto,
+			sbi->min_proto, sbi->max_proto,
 			AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
 		goto fail_dput;
 	}
 
-	sbi->version = maxproto > AUTOFS_MAX_PROTO_VERSION ? AUTOFS_MAX_PROTO_VERSION : maxproto;
+	/* Establish highest kernel protocol version */
+	if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
+		sbi->version = AUTOFS_MAX_PROTO_VERSION;
+	else
+		sbi->version = sbi->max_proto;
 	sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
 	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
@@ -339,6 +363,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if ( !pipe->f_op || !pipe->f_op->write )
 		goto fail_fput;
 	sbi->pipe = pipe;
+	sbi->pipefd = pipefd;
 
 	/*
 	 * Take a reference to the root dentry so we get a chance to
-- 
cgit v1.2.3


From e77fbddf77c071a5735a4bc63a30649bd64baf14 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: white space cleanup for waitq.c

Whitespace and formating changes to waitq code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/waitq.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index be78e9378c0..b0bb9d43bcd 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -33,7 +33,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 	sbi->catatonic = 1;
 	wq = sbi->queues;
 	sbi->queues = NULL;	/* Erase all wait queues */
-	while ( wq ) {
+	while (wq) {
 		nwq = wq->next;
 		wq->status = -ENOENT; /* Magic is gone - report failure */
 		kfree(wq->name);
@@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 		fput(sbi->pipe);	/* Close the pipe */
 		sbi->pipe = NULL;
 	}
-
 	shrink_dcache_sb(sbi->sb);
 }
 
@@ -165,7 +164,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	int len, status;
 
 	/* In catatonic mode, we don't wait for nobody */
-	if ( sbi->catatonic )
+	if (sbi->catatonic)
 		return -ENOENT;
 	
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
@@ -190,7 +189,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		/* Can't wait for an expire if there's no mount */
 		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
 			kfree(name);
@@ -200,7 +199,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
-		if ( !wq ) {
+		if (!wq) {
 			kfree(name);
 			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
@@ -240,14 +239,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 	/* wq->name is NULL if and only if the lock is already released */
 
-	if ( sbi->catatonic ) {
+	if (sbi->catatonic) {
 		/* We might have slept, so check again for catatonic mode */
 		wq->status = -ENOENT;
 		kfree(wq->name);
 		wq->name = NULL;
 	}
 
-	if ( wq->name ) {
+	if (wq->name) {
 		/* Block all but "shutdown" signals while waiting */
 		sigset_t oldset;
 		unsigned long irqflags;
@@ -283,12 +282,12 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
-	for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
-		if ( wq->wait_queue_token == wait_queue_token )
+	for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) {
+		if (wq->wait_queue_token == wait_queue_token)
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		mutex_unlock(&sbi->wq_mutex);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 90a59c7cf5dd68b41ffab61dd30eba1ef5746e66 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:50 -0800
Subject: [PATCH] autofs4: rename simple_empty_nolock function

Rename the function simple_empty_nolock to __simple_empty in line with kernel
naming conventions.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 2 +-
 fs/autofs4/root.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d82a019ff8e..bc1b0543d2b 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -201,7 +201,7 @@ static inline int simple_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
-static inline int simple_empty_nolock(struct dentry *dentry)
+static inline int __simple_empty(struct dentry *dentry)
 {
 	struct dentry *child;
 	int ret = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 72dca3335e2..dcd4802a5d5 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -343,7 +343,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    simple_empty_nolock(dentry)) {
+	    __simple_empty(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v1.2.3


From e3474a8eb38e48dea6690d1fabd75f3c7fd2f93f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:51 -0800
Subject: [PATCH] autofs4: change may_umount* functions to boolean

Change the functions may_umount and may_umount_tree to boolean functions to
aid code readability.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/dirhash.c | 2 +-
 fs/autofs4/expire.c | 2 +-
 fs/autofs4/root.c   | 2 +-
 fs/namespace.c      | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 5ccfcf26310..3fded389d06 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -92,7 +92,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			;
 		dput(dentry);
 
-		if ( may_umount(mnt) == 0 ) {
+		if ( may_umount(mnt) ) {
 			mntput(mnt);
 			DPRINTK(("autofs: signaling expire on %s\n", ent->name));
 			return ent; /* Expirable! */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 6ae2fc8233f..02a218fbde5 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -64,7 +64,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		goto done;
 
 	/* Update the expiry counter if fs is busy */
-	if (may_umount_tree(mnt)) {
+	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
 		ino->last_used = jiffies;
 		goto done;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index dcd4802a5d5..26eb1f02486 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -699,7 +699,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
 {
 	int status = 0;
 
-	if (may_umount(mnt) == 0)
+	if (may_umount(mnt))
 		status = 1;
 
 	DPRINTK("returning %d", status);
diff --git a/fs/namespace.c b/fs/namespace.c
index e069a4c5e38..bf478addb85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -459,9 +459,9 @@ int may_umount_tree(struct vfsmount *mnt)
 	spin_unlock(&vfsmount_lock);
 
 	if (actual_refs > minimum_refs)
-		return -EBUSY;
+		return 0;
 
-	return 0;
+	return 1;
 }
 
 EXPORT_SYMBOL(may_umount_tree);
@@ -481,10 +481,10 @@ EXPORT_SYMBOL(may_umount_tree);
  */
 int may_umount(struct vfsmount *mnt)
 {
-	int ret = 0;
+	int ret = 1;
 	spin_lock(&vfsmount_lock);
 	if (propagate_mount_busy(mnt, 2))
-		ret = -EBUSY;
+		ret = 0;
 	spin_unlock(&vfsmount_lock);
 	return ret;
 }
-- 
cgit v1.2.3


From 051d381259eb57d6074d02a6ba6e90e744f1a29f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:53 -0800
Subject: [PATCH] autofs4: nameidata needs to be up to date for follow_link

In order to be able to trigger a mount using the follow_link inode method the
nameidata struct that is passed in needs to have the vfsmount of the autofs
trigger not its parent.

During a path walk if an autofs trigger is mounted on a dentry, when the
follow_link method is called, the nameidata struct contains the vfsmount and
mountpoint dentry of the parent mount while the dentry that is passed in is
the root of the autofs trigger mount.  I believe it is impossible to get the
vfsmount of the trigger mount, within the follow_link method, when only the
parent vfsmount and the root dentry of the trigger mount are known.

This patch updates the nameidata struct on entry to __do_follow_link if it
detects that it is out of date.  It moves the path_to_nameidata to above
__do_follow_link to facilitate calling it from there.  The dput_path is moved
as well as that seemed sensible.  No changes are made to these two functions.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 98dc2e13436..22f6e8d16aa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -546,6 +546,22 @@ struct path {
 	struct dentry *dentry;
 };
 
+static inline void dput_path(struct path *path, struct nameidata *nd)
+{
+	dput(path->dentry);
+	if (path->mnt != nd->mnt)
+		mntput(path->mnt);
+}
+
+static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+{
+	dput(nd->dentry);
+	if (nd->mnt != path->mnt)
+		mntput(nd->mnt);
+	nd->mnt = path->mnt;
+	nd->dentry = path->dentry;
+}
+
 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int error;
@@ -555,8 +571,11 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	touch_atime(path->mnt, dentry);
 	nd_set_link(nd, NULL);
 
-	if (path->mnt == nd->mnt)
-		mntget(path->mnt);
+	if (path->mnt != nd->mnt) {
+		path_to_nameidata(path, nd);
+		dget(dentry);
+	}
+	mntget(path->mnt);
 	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(cookie);
 	if (!IS_ERR(cookie)) {
@@ -573,22 +592,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	return error;
 }
 
-static inline void dput_path(struct path *path, struct nameidata *nd)
-{
-	dput(path->dentry);
-	if (path->mnt != nd->mnt)
-		mntput(path->mnt);
-}
-
-static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
-{
-	dput(nd->dentry);
-	if (nd->mnt != path->mnt)
-		mntput(nd->mnt);
-	nd->mnt = path->mnt;
-	nd->dentry = path->dentry;
-}
-
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
-- 
cgit v1.2.3


From 34ca959cfc15cf09ad4da4f31ab034691e51af78 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:54 -0800
Subject: [PATCH] autofs4: add v5 follow_link mount trigger method

This patch adds a follow_link inode method for the root of an autofs direct
mount trigger.  It also adds the corresponding mount options and updates the
show_mount method.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  8 +++++++
 fs/autofs4/inode.c    | 52 +++++++++++++++++++++++++++++++++--------
 fs/autofs4/root.c     | 64 +++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 103 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index bc1b0543d2b..ed388a1d8fc 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -3,6 +3,7 @@
  * linux/fs/autofs/autofs_i.h
  *
  *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
+ *   Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -84,6 +85,10 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
+#define AUTOFS_TYP_INDIRECT     0x0001
+#define AUTOFS_TYP_DIRECT       0x0002
+#define AUTOFS_TYP_OFFSET       0x0004
+
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
@@ -96,6 +101,7 @@ struct autofs_sb_info {
 	int min_proto;
 	int max_proto;
 	unsigned long exp_timeout;
+	unsigned int type;
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
@@ -162,6 +168,8 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *,
 extern struct inode_operations autofs4_symlink_inode_operations;
 extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
+extern struct inode_operations autofs4_indirect_root_inode_operations;
+extern struct inode_operations autofs4_direct_root_inode_operations;
 extern struct file_operations autofs4_dir_operations;
 extern struct file_operations autofs4_root_operations;
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d9a71dab40f..3801bed94e4 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -3,6 +3,7 @@
  * linux/fs/autofs/inode.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ *  Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -177,6 +178,13 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
+	if (sbi->type & AUTOFS_TYP_OFFSET)
+		seq_printf(m, ",offset");
+	else if (sbi->type & AUTOFS_TYP_DIRECT)
+		seq_printf(m, ",direct");
+	else
+		seq_printf(m, ",indirect");
+
 	return 0;
 }
 
@@ -186,7 +194,8 @@ static struct super_operations autofs4_sops = {
 	.show_options	= autofs4_show_options,
 };
 
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
+enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
+	Opt_indirect, Opt_direct, Opt_offset};
 
 static match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
@@ -195,11 +204,15 @@ static match_table_t tokens = {
 	{Opt_pgrp, "pgrp=%u"},
 	{Opt_minproto, "minproto=%u"},
 	{Opt_maxproto, "maxproto=%u"},
+	{Opt_indirect, "indirect"},
+	{Opt_direct, "direct"},
+	{Opt_offset, "offset"},
 	{Opt_err, NULL}
 };
 
 static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
-			 pid_t *pgrp, int *minproto, int *maxproto)
+			 pid_t *pgrp, unsigned int *type,
+			 int *minproto, int *maxproto)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -253,6 +266,15 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 				return 1;
 			*maxproto = option;
 			break;
+		case Opt_indirect:
+			*type = AUTOFS_TYP_INDIRECT;
+			break;
+		case Opt_direct:
+			*type = AUTOFS_TYP_DIRECT;
+			break;
+		case Opt_offset:
+			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			break;
 		default:
 			return 1;
 		}
@@ -271,6 +293,11 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
+void autofs4_dentry_release(struct dentry *);
+static struct dentry_operations autofs4_sb_dentry_operations = {
+	.d_release      = autofs4_dentry_release,
+};
+
 int autofs4_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode * root_inode;
@@ -297,6 +324,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->type = 0;
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -315,27 +343,31 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (!ino)
 		goto fail_free;
 	root_inode = autofs4_get_inode(s, ino);
-	kfree(ino);
 	if (!root_inode)
-		goto fail_free;
+		goto fail_ino;
 
-	root_inode->i_op = &autofs4_root_inode_operations;
-	root_inode->i_fop = &autofs4_root_operations;
 	root = d_alloc_root(root_inode);
-	pipe = NULL;
-
 	if (!root)
 		goto fail_iput;
+	pipe = NULL;
+
+	root->d_op = &autofs4_sb_dentry_operations;
+	root->d_fsdata = ino;
 
 	/* Can this call block? */
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
-			  &sbi->oz_pgrp,
+			  &sbi->oz_pgrp, &sbi->type,
 			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
+	root_inode->i_fop = &autofs4_root_operations;
+	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+			&autofs4_direct_root_inode_operations :
+			&autofs4_indirect_root_inode_operations;
+
 	/* Couldn't this be tested earlier? */
 	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
 	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
@@ -391,6 +423,8 @@ fail_dput:
 fail_iput:
 	printk("autofs: get root dentry failed\n");
 	iput(root_inode);
+fail_ino:
+	kfree(ino);
 fail_free:
 	kfree(sbi);
 fail_unlock:
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 26eb1f02486..3f004858224 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -30,6 +30,7 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
+static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -46,7 +47,7 @@ struct file_operations autofs4_dir_operations = {
 	.readdir	= autofs4_dir_readdir,
 };
 
-struct inode_operations autofs4_root_inode_operations = {
+struct inode_operations autofs4_indirect_root_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
 	.symlink	= autofs4_dir_symlink,
@@ -54,6 +55,11 @@ struct inode_operations autofs4_root_inode_operations = {
 	.rmdir		= autofs4_dir_rmdir,
 };
 
+struct inode_operations autofs4_direct_root_inode_operations = {
+	.lookup		= autofs4_lookup,
+	.follow_link	= autofs4_follow_link,
+};
+
 struct inode_operations autofs4_dir_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
@@ -252,7 +258,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		 */
 		status = d_invalidate(dentry);
 		if (status != -EBUSY)
-			return 0;
+			return -ENOENT;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
@@ -271,17 +277,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
-			return 0; /* Try to get the kernel to invalidate this dentry */
+			return status; /* Try to get the kernel to invalidate this dentry */
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 0;
+			return status;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -300,7 +306,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		}
 	}
 
@@ -311,7 +317,41 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-	return 1;
+	return status;
+}
+
+/* For autofs direct mounts the follow link triggers the mount */
+static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	int oz_mode = autofs4_oz_mode(sbi);
+	unsigned int lookup_type;
+	int status;
+
+	DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
+		dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
+		nd->flags);
+
+	/* If it's our master or we shouldn't trigger a mount we're done */
+	lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
+	if (oz_mode || !lookup_type)
+		goto done;
+
+	status = try_to_fill_dentry(dentry, 0);
+	if (status)
+		goto out_error;
+
+	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+		status = -ENOENT;
+		goto out_error;
+	}
+
+done:
+	return NULL;
+
+out_error:
+	path_release(nd);
+	return ERR_PTR(status);
 }
 
 /*
@@ -326,13 +366,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
-	int status = 1;
+	int status = 0;
 
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 
 	/* Negative dentry.. invalidate if "old" */
@@ -349,14 +389,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 	spin_unlock(&dcache_lock);
 
 	return 1;
 }
 
-static void autofs4_dentry_release(struct dentry *de)
+void autofs4_dentry_release(struct dentry *de)
 {
 	struct autofs_info *inf;
 
-- 
cgit v1.2.3


From 3a15e2ab5d6e79a79291734ac24f33d51c0ae389 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add v5 expire logic

This patch adds expire logic for autofs direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 02a218fbde5..8fd92eaf936 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -99,6 +99,39 @@ static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
 	return list_entry(next, struct dentry, d_u.d_child);
 }
 
+/*
+ * Check a direct mount point for busyness.
+ * Direct mounts have similar expiry semantics to tree mounts.
+ * The tree is not busy iff no mountpoints are busy and there are no
+ * autofs submounts.
+ */
+static int autofs4_direct_busy(struct vfsmount *mnt,
+				struct dentry *top,
+				unsigned long timeout,
+				int do_now)
+{
+	DPRINTK("top %p %.*s",
+		top, (int) top->d_name.len, top->d_name.name);
+
+	/* Not a mountpoint - give up */
+	if (!d_mountpoint(top))
+		return 1;
+
+	/* If it's busy update the expiry counters */
+	if (!may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		if (ino)
+			ino->last_used = jiffies;
+		return 1;
+	}
+
+	/* Timeout of a direct mount is determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
+	return 0;
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -208,16 +241,48 @@ cont:
 	return NULL;
 }
 
+/* Check if we can expire a direct mount (possibly a tree) */
+static struct dentry *autofs4_expire_direct(struct super_block *sb,
+					    struct vfsmount *mnt,
+					    struct autofs_sb_info *sbi,
+					    int how)
+{
+	unsigned long timeout;
+	struct dentry *root = dget(sb->s_root);
+	int do_now = how & AUTOFS_EXP_IMMEDIATE;
+
+	if (!sbi->exp_timeout || !root)
+		return NULL;
+
+	now = jiffies;
+	timeout = sbi->exp_timeout;
+
+	/* Lock the tree as we must expire as a whole */
+	spin_lock(&sbi->fs_lock);
+	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+		struct autofs_info *ino = autofs4_dentry_ino(root);
+
+		/* Set this flag early to catch sys_chdir and the like */
+		ino->flags |= AUTOFS_INF_EXPIRING;
+		spin_unlock(&sbi->fs_lock);
+		return root;
+	}
+	spin_unlock(&sbi->fs_lock);
+	dput(root);
+
+	return NULL;
+}
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
  */
-static struct dentry *autofs4_expire(struct super_block *sb,
-				     struct vfsmount *mnt,
-				     struct autofs_sb_info *sbi,
-				     int how)
+static struct dentry *autofs4_expire_indirect(struct super_block *sb,
+					      struct vfsmount *mnt,
+					      struct autofs_sb_info *sbi,
+					      int how)
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
@@ -249,7 +314,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		dentry = dget(dentry);
 		spin_unlock(&dcache_lock);
 
-		/* Case 1: indirect mount or top level direct mount */
+		/*
+		 * Case 1: (i) indirect mount or top level pseudo direct mount
+		 *	   (autofs-4.1).
+		 *	   (ii) indirect mount with offset mount, check the "/"
+		 *	   offset (autofs-5.0+).
+		 */
 		if (d_mountpoint(dentry)) {
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -283,7 +353,10 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				break;
 			}
 			spin_unlock(&sbi->fs_lock);
-		/* Case 3: direct mount, expire individual leaves */
+		/*
+		 * Case 3: pseudo direct mount, expire individual leaves
+		 *	   (autofs-4.1).
+		 */
 		} else {
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
 			if (expired) {
@@ -325,7 +398,7 @@ int autofs4_expire_run(struct super_block *sb,
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = autofs_ptype_expire;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, 0)) == NULL)
+	if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
 		return -EAGAIN;
 
 	pkt.len = dentry->d_name.len;
@@ -351,7 +424,12 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
+	if (sbi->type & AUTOFS_TYP_DIRECT)
+		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
+	else
+		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
+
+	if (dentry) {
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
-- 
cgit v1.2.3


From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add new packet type for v5 communications

This patch define a new autofs packet for autofs v5 and updates the waitq.c
functions to handle the additional packet type.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 23 +++++++++-----
 fs/autofs4/waitq.c    | 86 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 90 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ed388a1d8fc..37c8d909d1e 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -77,6 +77,12 @@ struct autofs_wait_queue {
 	int hash;
 	int len;
 	char *name;
+	u32 dev;
+	u64 ino;
+	uid_t uid;
+	gid_t gid;
+	pid_t pid;
+	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
 	atomic_t notified;
@@ -180,13 +186,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info
 
 /* Queue management functions */
 
-enum autofs_notify
-{
-	NFY_NONE,
-	NFY_MOUNT,
-	NFY_EXPIRE
-};
-
 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
@@ -204,6 +203,16 @@ static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **de
 	return res;
 }
 
+static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
+{
+	return new_encode_dev(sbi->sb->s_dev);
+}
+
+static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
+{
+	return sbi->sb->s_root->d_inode->i_ino;
+}
+
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index b0bb9d43bcd..12da2c977b0 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -3,7 +3,7 @@
  * linux/fs/autofs/waitq.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -97,7 +97,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = type;
-	if (type == autofs_ptype_missing) {
+	switch (type) {
+	/* Kernel protocol v4 missing and expire packets */
+	case autofs_ptype_missing:
+	{
 		struct autofs_packet_missing *mp = &pkt.missing;
 
 		pktsz = sizeof(*mp);
@@ -106,7 +109,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		mp->len = wq->len;
 		memcpy(mp->name, wq->name, wq->len);
 		mp->name[wq->len] = '\0';
-	} else if (type == autofs_ptype_expire_multi) {
+		break;
+	}
+	case autofs_ptype_expire_multi:
+	{
 		struct autofs_packet_expire_multi *ep = &pkt.expire_multi;
 
 		pktsz = sizeof(*ep);
@@ -115,7 +121,34 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		ep->len = wq->len;
 		memcpy(ep->name, wq->name, wq->len);
 		ep->name[wq->len] = '\0';
-	} else {
+		break;
+	}
+	/*
+	 * Kernel protocol v5 packet for handling indirect and direct
+	 * mount missing and expire requests
+	 */
+	case autofs_ptype_missing_indirect:
+	case autofs_ptype_expire_indirect:
+	case autofs_ptype_missing_direct:
+	case autofs_ptype_expire_direct:
+	{
+		struct autofs_v5_packet *packet = &pkt.v5_packet;
+
+		pktsz = sizeof(*packet);
+
+		packet->wait_queue_token = wq->wait_queue_token;
+		packet->len = wq->len;
+		memcpy(packet->name, wq->name, wq->len);
+		packet->name[wq->len] = '\0';
+		packet->dev = wq->dev;
+		packet->ino = wq->ino;
+		packet->uid = wq->uid;
+		packet->gid = wq->gid;
+		packet->pid = wq->pid;
+		packet->tgid = wq->tgid;
+		break;
+	}
+	default:
 		printk("autofs4_notify_daemon: bad type %d!\n", type);
 		return;
 	}
@@ -161,7 +194,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 {
 	struct autofs_wait_queue *wq;
 	char *name;
-	int len, status;
+	unsigned int len = 0;
+	unsigned int hash = 0;
+	int status;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
@@ -171,11 +206,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (!name)
 		return -ENOMEM;
 
-	len = autofs4_getpath(sbi, dentry, &name);
-	if (!len) {
-		kfree(name);
-		return -ENOENT;
+	/* If this is a direct mount request create a dummy name */
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+		len = sprintf(name, "%p", dentry);
+	else {
+		len = autofs4_getpath(sbi, dentry, &name);
+		if (!len) {
+			kfree(name);
+			return -ENOENT;
+		}
 	}
+	hash = full_name_hash(name, len);
 
 	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
 		kfree(name);
@@ -211,9 +252,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->next = sbi->queues;
 		sbi->queues = wq;
 		init_waitqueue_head(&wq->queue);
-		wq->hash = dentry->d_name.hash;
+		wq->hash = hash;
 		wq->name = name;
 		wq->len = len;
+		wq->dev = autofs4_get_dev(sbi);
+		wq->ino = autofs4_get_ino(sbi);
+		wq->uid = current->uid;
+		wq->gid = current->gid;
+		wq->pid = current->pid;
+		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
 		atomic_set(&wq->notified, 1);
@@ -227,8 +274,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	}
 
 	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
-		int type = (notify == NFY_MOUNT ?
-			autofs_ptype_missing : autofs_ptype_expire_multi);
+		int type;
+
+		if (sbi->version < 5) {
+			if (notify == NFY_MOUNT)
+				type = autofs_ptype_missing;
+			else
+				type = autofs_ptype_expire_multi;
+		} else {
+			if (notify == NFY_MOUNT)
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_missing_direct :
+					 autofs_ptype_missing_indirect;
+			else
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_expire_direct :
+					autofs_ptype_expire_indirect;
+		}
 
 		DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
-- 
cgit v1.2.3


From 44d53eb041d901620b1090590a549a705767fd10 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:56 -0800
Subject: [PATCH] autofs4: change AUTOFS_TYP_* AUTOFS_TYPE_*

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  6 +++---
 fs/autofs4/expire.c   |  2 +-
 fs/autofs4/inode.c    | 12 ++++++------
 fs/autofs4/waitq.c    |  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 37c8d909d1e..ff6239d57b4 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -91,9 +91,9 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
-#define AUTOFS_TYP_INDIRECT     0x0001
-#define AUTOFS_TYP_DIRECT       0x0002
-#define AUTOFS_TYP_OFFSET       0x0004
+#define AUTOFS_TYPE_INDIRECT     0x0001
+#define AUTOFS_TYPE_DIRECT       0x0002
+#define AUTOFS_TYPE_OFFSET       0x0004
 
 struct autofs_sb_info {
 	u32 magic;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8fd92eaf936..08e33218a64 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -424,7 +424,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYP_DIRECT)
+	if (sbi->type & AUTOFS_TYPE_DIRECT)
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3801bed94e4..94388890549 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -178,9 +178,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
-	if (sbi->type & AUTOFS_TYP_OFFSET)
+	if (sbi->type & AUTOFS_TYPE_OFFSET)
 		seq_printf(m, ",offset");
-	else if (sbi->type & AUTOFS_TYP_DIRECT)
+	else if (sbi->type & AUTOFS_TYPE_DIRECT)
 		seq_printf(m, ",direct");
 	else
 		seq_printf(m, ",indirect");
@@ -267,13 +267,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 			*maxproto = option;
 			break;
 		case Opt_indirect:
-			*type = AUTOFS_TYP_INDIRECT;
+			*type = AUTOFS_TYPE_INDIRECT;
 			break;
 		case Opt_direct:
-			*type = AUTOFS_TYP_DIRECT;
+			*type = AUTOFS_TYPE_DIRECT;
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			*type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET;
 			break;
 		default:
 			return 1;
@@ -364,7 +364,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+	root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 12da2c977b0..894d74671bd 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -207,7 +207,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
 		len = sprintf(name, "%p", dentry);
 	else {
 		len = autofs4_getpath(sbi, dentry, &name);
@@ -283,11 +283,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
-- 
cgit v1.2.3


From 3370c74b2e147169d5bba6665e03d3281f5795ed Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 27 Mar 2006 01:14:57 -0800
Subject: [PATCH] Remove redundant check from autofs4_put_super

We have to have a valid sbi here, or we'd have oopsed already.  (There's a
dereference of sbi->catatonic a few lines above)

Coverity #740

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 94388890549..4eddee4e76f 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -157,8 +157,7 @@ static void autofs4_put_super(struct super_block *sb)
 		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
 	/* Clean up and release dangling references */
-	if (sbi)
-		autofs4_force_release(sbi);
+	autofs4_force_release(sbi);
 
 	kfree(sbi);
 
-- 
cgit v1.2.3


From 871f94344cea36b2ce91231f442f9f9298529712 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:58 -0800
Subject: [PATCH] autofs4: follow_link missing functionality

This functionality is also need for operation of autofs v5 direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/expire.c |  4 ----
 fs/autofs4/root.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 08e33218a64..b8ce02607d6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -113,10 +113,6 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
 	DPRINTK("top %p %.*s",
 		top, (int) top->d_name.len, top->d_name.name);
 
-	/* Not a mountpoint - give up */
-	if (!d_mountpoint(top))
-		return 1;
-
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3f004858224..c8fe43a475e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -57,6 +57,9 @@ struct inode_operations autofs4_indirect_root_inode_operations = {
 
 struct inode_operations autofs4_direct_root_inode_operations = {
 	.lookup		= autofs4_lookup,
+	.unlink		= autofs4_dir_unlink,
+	.mkdir		= autofs4_dir_mkdir,
+	.rmdir		= autofs4_dir_rmdir,
 	.follow_link	= autofs4_follow_link,
 };
 
@@ -337,15 +340,50 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (oz_mode || !lookup_type)
 		goto done;
 
-	status = try_to_fill_dentry(dentry, 0);
-	if (status)
-		goto out_error;
+	/*
+	 * If a request is pending wait for it.
+	 * If it's a mount then it won't be expired till at least
+	 * a liitle later and if it's an expire then we might need
+	 * to mount it again.
+	 */
+	if (autofs4_ispending(dentry)) {
+		DPRINTK("waiting for active request %p name=%.*s",
+			dentry, dentry->d_name.len, dentry->d_name.name);
 
-	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
-		status = -ENOENT;
-		goto out_error;
+		status = autofs4_wait(sbi, dentry, NFY_NONE);
+
+		DPRINTK("request done status=%d", status);
 	}
 
+	/*
+	 * If the dentry contains directories then it is an
+	 * autofs multi-mount with no root mount offset. So
+	 * don't try to mount it again.
+	 */
+	spin_lock(&dcache_lock);
+	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+
+		status = try_to_fill_dentry(dentry, 0);
+		if (status)
+			goto out_error;
+
+		/*
+		 * The mount succeeded but if there is no root mount
+		 * it must be an autofs multi-mount with no root offset
+		 * so we don't need to follow the mount.
+		 */
+		if (d_mountpoint(dentry)) {
+			if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+				status = -ENOENT;
+				goto out_error;
+			}
+		}
+
+		goto done;
+	}
+	spin_unlock(&dcache_lock);
+
 done:
 	return NULL;
 
-- 
cgit v1.2.3


From 3e7b19198003fc25b11838e709f17d4fa173b2d7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] autofs4: atomic var underflow

Fix accidental underflow of the atomic counter.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 2 +-
 fs/autofs4/waitq.c    | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ff6239d57b4..617fd7b3744 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -85,7 +85,7 @@ struct autofs_wait_queue {
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
-	atomic_t notified;
+	atomic_t notify;
 	atomic_t wait_ctr;
 };
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 894d74671bd..142ab6aa2aa 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -263,7 +263,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
-		atomic_set(&wq->notified, 1);
+		atomic_set(&wq->notify, 1);
 		mutex_unlock(&sbi->wq_mutex);
 	} else {
 		atomic_inc(&wq->wait_ctr);
@@ -273,9 +273,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
 	}
 
-	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
+	if (notify != NFY_NONE && atomic_read(&wq->notify)) {
 		int type;
 
+		atomic_dec(&wq->notify);
+
 		if (sbi->version < 5) {
 			if (notify == NFY_MOUNT)
 				type = autofs_ptype_missing;
-- 
cgit v1.2.3


From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache'

The 'auth_domain's are simply handles on internal data structures.  They do
not cache information from user-space, and forcing them into the mold of a
'cache' misrepresents their true nature and causes confusion.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 417ec02df44..ac0997731fc 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -242,7 +242,7 @@ static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
 
 static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_client->h);
+	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
 	new->ek_fsid[0] = item->ek_fsid[0];
@@ -474,7 +474,7 @@ static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
 }
 static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
 {
-	cache_get(&item->ex_client->h);
+	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
@@ -1129,7 +1129,6 @@ exp_delclient(struct nfsctl_client *ncp)
 	 */
 	if (dom) {
 		err = auth_unix_forget_old(dom);
-		dom->h.expiry_time = get_seconds();
 		auth_domain_put(dom);
 	}
 
-- 
cgit v1.2.3


From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:00 -0800
Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export

Current svc_expkey holds a pointer to the svc_export structure, so updates to
that structure have to be in-place, which is a wart on the whole cache
infrastruct.  So we break that linkage and just do a second lookup.

If this became a performance issue, it would be possible to put a direct link
back in which was only used conditionally.  i.e.  when an object is replaced
in the cache, we set a flag in the old object.  When dereferencing the link
from svc_expkey, if the flag is set, we drop the reference and do a fresh
lookup.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 60 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ac0997731fc..587829ed651 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -73,8 +73,10 @@ void expkey_put(struct cache_head *item, struct cache_detail *cd)
 	if (cache_put(item, cd)) {
 		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
 		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags))
-			exp_put(key->ek_export);
+		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
+			dput(key->ek_dentry);
+			mntput(key->ek_mnt);
+		}
 		auth_domain_put(key->ek_client);
 		kfree(key);
 	}
@@ -164,26 +166,18 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	} else {
 		struct nameidata nd;
 		struct svc_expkey *ek;
-		struct svc_export *exp;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
 
 		dprintk("Found the path %s\n", buf);
-		exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
-
-		err = -ENOENT;
-		if (!exp)
-			goto out_nd;
-		key.ek_export = exp;
-		dprintk("And found export\n");
+		key.ek_mnt = nd.mnt;
+		key.ek_dentry = nd.dentry;
 		
 		ek = svc_expkey_lookup(&key, 1);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		exp_put(exp);
 		err = 0;
-	out_nd:
 		path_release(&nd);
 	}
 	cache_flush();
@@ -214,7 +208,7 @@ static int expkey_show(struct seq_file *m,
 	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		seq_printf(m, " ");
-		seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n");
+		seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n");
 	}
 	seq_printf(m, "\n");
 	return 0;
@@ -252,8 +246,8 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 
 static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_export->h);
-	new->ek_export = item->ek_export;
+	new->ek_mnt = mntget(item->ek_mnt);
+	new->ek_dentry = dget(item->ek_dentry);
 }
 
 static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
@@ -519,7 +513,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
-	key.ek_export = exp;
+	key.ek_mnt = exp->ex_mnt;
+	key.ek_dentry = exp->ex_dentry;
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
@@ -741,8 +736,8 @@ exp_export(struct nfsctl_export *nxp)
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
 	    !IS_ERR(fsid_key) &&
-	    fsid_key->ek_export &&
-	    fsid_key->ek_export != exp)
+	    fsid_key->ek_mnt &&
+	    (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) )
 		goto finish;
 
 	if (exp) {
@@ -912,6 +907,24 @@ out:
 	return err;
 }
 
+struct svc_export *
+exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+	 struct cache_req *reqp)
+{
+	struct svc_export *exp;
+	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
+	if (!ek || IS_ERR(ek))
+		return ERR_PTR(PTR_ERR(ek));
+
+	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
+	expkey_put(&ek->h, &svc_expkey_cache);
+
+	if (!exp || IS_ERR(exp))
+		return ERR_PTR(PTR_ERR(exp));
+	return exp;
+}
+
+
 /*
  * Called when we need the filehandle for the root of the pseudofs,
  * for a given NFSv4 client.   The root is defined to be the
@@ -922,6 +935,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	       struct cache_req *creq)
 {
 	struct svc_expkey *fsid_key;
+	struct svc_export *exp;
 	int rv;
 	u32 fsidv[2];
 
@@ -933,8 +947,14 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	if (!fsid_key || IS_ERR(fsid_key))
 		return nfserr_perm;
 
-	rv = fh_compose(fhp, fsid_key->ek_export, 
-			  fsid_key->ek_export->ex_dentry, NULL);
+	exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
+	if (exp == NULL)
+		rv = nfserr_perm;
+	else if (IS_ERR(exp))
+		rv = nfserrno(PTR_ERR(exp));
+	else
+		rv = fh_compose(fhp, exp,
+				fsid_key->ek_dentry, NULL);
 	expkey_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
-- 
cgit v1.2.3


From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:01 -0800
Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches

These were an unnecessary wart.  Also only have one 'DefineSimpleCache..'
instead of two.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c    |  4 ++--
 fs/nfsd/nfs4idmap.c | 10 ++--------
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 587829ed651..c591761a1ad 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -250,7 +250,7 @@ static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -482,7 +482,7 @@ static inline void svc_export_update(struct svc_export *new, struct svc_export *
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+static DefineSimpleCacheLookup(svc_export, svc_export)
 
 
 struct svc_expkey *
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 13369650cdf..dea690aa8bb 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -76,12 +76,6 @@ struct ent {
 	char              authname[IDMAP_NAMESZ];
 };
 
-#define DefineSimpleCacheLookupMap(STRUCT, FUNC)			\
-        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
-        (struct STRUCT *item, int set), /*no setup */,			\
-	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
-	STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
-
 /* Common entry handling */
 
 #define ENT_HASHBITS          8
@@ -264,7 +258,7 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookupMap(ent, idtoname);
+static DefineSimpleCacheLookup(ent, idtoname);
 
 /*
  * Name -> ID cache
@@ -390,7 +384,7 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookupMap(ent, nametoid);
+static DefineSimpleCacheLookup(ent, nametoid);
 
 /*
  * Exported API
-- 
cgit v1.2.3


From 4f7774c3a0558526c0faaf525581f2029480b313 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:03 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_export

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 125 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c591761a1ad..2ebd77d758b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -258,16 +258,6 @@ static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-static inline int svc_export_hash(struct svc_export *item)
-{
-	int rv;
-
-	rv = hash_ptr(item->ex_client, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS);
-	return rv;
-}
-
 void svc_export_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -298,7 +288,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_export *svc_export_lookup(struct svc_export *, int);
+struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
 {
@@ -411,11 +402,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		if (err) goto out;
 	}
 
-	expp = svc_export_lookup(&exp, 1);
+	expp = svc_export_lookup(&exp);
 	if (expp)
-		exp_put(expp);
-	err = 0;
+		expp = svc_export_update(&exp, expp);
+	else
+		err = -ENOMEM;
 	cache_flush();
+	if (expp == NULL)
+		err = -ENOMEM;
+	else
+		exp_put(expp);
  out:
 	if (nd.dentry)
 		path_release(&nd);
@@ -449,40 +445,95 @@ static int svc_export_show(struct seq_file *m,
 	seq_puts(m, ")\n");
 	return 0;
 }
-struct cache_detail svc_export_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPORT_HASHMAX,
-	.hash_table	= export_table,
-	.name		= "nfsd.export",
-	.cache_put	= svc_export_put,
-	.cache_request	= svc_export_request,
-	.cache_parse	= svc_export_parse,
-	.cache_show	= svc_export_show,
-};
-
-static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
+static int svc_export_match(struct cache_head *a, struct cache_head *b)
 {
-	return a->ex_client == b->ex_client &&
-		a->ex_dentry == b->ex_dentry &&
-		a->ex_mnt == b->ex_mnt;
+	struct svc_export *orig = container_of(a, struct svc_export, h);
+	struct svc_export *new = container_of(b, struct svc_export, h);
+	return orig->ex_client == new->ex_client &&
+		orig->ex_dentry == new->ex_dentry &&
+		orig->ex_mnt == new->ex_mnt;
 }
-static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
+
+static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
 }
 
-static inline void svc_export_update(struct svc_export *new, struct svc_export *item)
+static void export_update(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	new->ex_flags = item->ex_flags;
 	new->ex_anon_uid = item->ex_anon_uid;
 	new->ex_anon_gid = item->ex_anon_gid;
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export, svc_export)
+static struct cache_head *svc_export_alloc(void)
+{
+	struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_export_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPORT_HASHMAX,
+	.hash_table	= export_table,
+	.name		= "nfsd.export",
+	.cache_put	= svc_export_put,
+	.cache_request	= svc_export_request,
+	.cache_parse	= svc_export_parse,
+	.cache_show	= svc_export_show,
+	.match		= svc_export_match,
+	.init		= svc_export_init,
+	.update		= export_update,
+	.alloc		= svc_export_alloc,
+};
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
+
+struct svc_export *
+svc_export_update(struct svc_export *new, struct svc_export *old)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+				 &old->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
 
 
 struct svc_expkey *
@@ -568,7 +619,7 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
 	key.ex_mnt = mnt;
 	key.ex_dentry = dentry;
 
-	exp = svc_export_lookup(&key, 0);
+	exp = svc_export_lookup(&key);
 	if (exp != NULL) 
 		switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
 		case 0: break;
@@ -770,13 +821,13 @@ exp_export(struct nfsctl_export *nxp)
 	new.ex_anon_gid = nxp->ex_anon_gid;
 	new.ex_fsid = nxp->ex_dev;
 
-	exp = svc_export_lookup(&new, 1);
+	exp = svc_export_lookup(&new);
+	if (exp)
+		exp = svc_export_update(&new, exp);
 
-	if (exp == NULL)
+	if (!exp)
 		goto finish;
 
-	err = 0;
-
 	if (exp_hash(clp, exp) ||
 	    exp_fsid_hash(clp, exp)) {
 		/* failed to create at least one index */
-- 
cgit v1.2.3


From 8d270f7f4cdab792d7263926ac59f747258735ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:04 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_expkey cache

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 136 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 99 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 2ebd77d758b..abd68965822 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,17 +57,6 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-static inline int svc_expkey_hash(struct svc_expkey *item)
-{
-	int hash = item->ek_fsidtype;
-	char * cp = (char*)item->ek_fsid;
-	int len = key_len(item->ek_fsidtype);
-
-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
-	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
-	return hash & EXPKEY_HASHMASK;
-}
-
 void expkey_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -97,7 +86,8 @@ static void expkey_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int);
+static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -108,6 +98,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	int fsidtype;
 	char *ep;
 	struct svc_expkey key;
+	struct svc_expkey *ek;
 
 	if (mesg[mlen-1] != '\n')
 		return -EINVAL;
@@ -152,20 +143,25 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	key.ek_fsidtype = fsidtype;
 	memcpy(key.ek_fsid, buf, len);
 
+	ek = svc_expkey_lookup(&key);
+	err = -ENOMEM;
+	if (!ek)
+		goto out;
+
 	/* now we want a pathname, or empty meaning NEGATIVE  */
+	err = -EINVAL;
 	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0)
 		goto out;
 	dprintk("Path seems to be <%s>\n", buf);
 	err = 0;
 	if (len == 0) {
-		struct svc_expkey *ek;
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
+		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
-		struct svc_expkey *ek;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
@@ -174,10 +170,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		key.ek_mnt = nd.mnt;
 		key.ek_dentry = nd.dentry;
 		
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		err = 0;
+		else
+			err = -ENOMEM;
 		path_release(&nd);
 	}
 	cache_flush();
@@ -213,29 +210,25 @@ static int expkey_show(struct seq_file *m,
 	seq_printf(m, "\n");
 	return 0;
 }
-	
-struct cache_detail svc_expkey_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPKEY_HASHMAX,
-	.hash_table	= expkey_table,
-	.name		= "nfsd.fh",
-	.cache_put	= expkey_put,
-	.cache_request	= expkey_request,
-	.cache_parse	= expkey_parse,
-	.cache_show	= expkey_show,
-};
 
-static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
+static inline int expkey_match (struct cache_head *a, struct cache_head *b)
 {
-	if (a->ek_fsidtype != b->ek_fsidtype ||
-	    a->ek_client != b->ek_client ||
-	    memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0)
+	struct svc_expkey *orig = container_of(a, struct svc_expkey, h);
+	struct svc_expkey *new = container_of(b, struct svc_expkey, h);
+
+	if (orig->ek_fsidtype != new->ek_fsidtype ||
+	    orig->ek_client != new->ek_client ||
+	    memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0)
 		return 0;
 	return 1;
 }
 
-static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_init(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
@@ -244,13 +237,80 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 	new->ek_fsid[2] = item->ek_fsid[2];
 }
 
-static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_update(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	new->ek_mnt = mntget(item->ek_mnt);
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
+static struct cache_head *expkey_alloc(void)
+{
+	struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_expkey_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPKEY_HASHMAX,
+	.hash_table	= expkey_table,
+	.name		= "nfsd.fh",
+	.cache_put	= expkey_put,
+	.cache_request	= expkey_request,
+	.cache_parse	= expkey_parse,
+	.cache_show	= expkey_show,
+	.match		= expkey_match,
+	.init		= expkey_init,
+	.update       	= expkey_update,
+	.alloc		= expkey_alloc,
+};
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+	struct cache_head *ch;
+	int hash = item->ek_fsidtype;
+	char * cp = (char*)item->ek_fsid;
+	int len = key_len(item->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
+static struct svc_expkey *
+svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+{
+	struct cache_head *ch;
+	int hash = new->ek_fsidtype;
+	char * cp = (char*)new->ek_fsid;
+	int len = key_len(new->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+				 &old->h, hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -549,7 +609,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
 
-	ek = svc_expkey_lookup(&key, 0);
+	ek = svc_expkey_lookup(&key);
 	if (ek != NULL)
 		if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
 			ek = ERR_PTR(err);
@@ -569,7 +629,9 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
-	ek = svc_expkey_lookup(&key, 1);
+	ek = svc_expkey_lookup(&key);
+	if (ek)
+		ek = svc_expkey_update(&key,ek);
 	if (ek) {
 		expkey_put(&ek->h, &svc_expkey_cache);
 		return 0;
-- 
cgit v1.2.3


From f9ecc921b5b5e135050e7f22fef873c249aee3fc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:06 -0800
Subject: [PATCH] knfsd: Use new cache code for name/id lookup caches

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4idmap.c | 126 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 103 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index dea690aa8bb..75cfbb68b20 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -82,9 +82,12 @@ struct ent {
 #define ENT_HASHMAX           (1 << ENT_HASHBITS)
 #define ENT_HASHMASK          (ENT_HASHMAX - 1)
 
-static inline void
-ent_init(struct ent *new, struct ent *itm)
+static void
+ent_init(struct cache_head *cnew, struct cache_head *citm)
 {
+	struct ent *new = container_of(cnew, struct ent, h);
+	struct ent *itm = container_of(citm, struct ent, h);
+
 	new->id = itm->id;
 	new->type = itm->type;
 
@@ -92,12 +95,6 @@ ent_init(struct ent *new, struct ent *itm)
 	strlcpy(new->authname, itm->authname, sizeof(new->name));
 }
 
-static inline void
-ent_update(struct ent *new, struct ent *itm)
-{
-	ent_init(new, itm);
-}
-
 static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
@@ -107,6 +104,16 @@ ent_put(struct cache_head *ch, struct cache_detail *cd)
 	}
 }
 
+static struct cache_head *
+ent_alloc(void)
+{
+	struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL);
+	if (e)
+		return &e->h;
+	else
+		return NULL;
+}
+
 /*
  * ID -> Name cache
  */
@@ -143,9 +150,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-idtoname_match(struct ent *a, struct ent *b)
+static int
+idtoname_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->id == b->id && a->type == b->type &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -178,7 +188,8 @@ warn_no_idmapd(struct cache_detail *detail)
 
 
 static int         idtoname_parse(struct cache_detail *, char *, int);
-static struct ent *idtoname_lookup(struct ent *, int);
+static struct ent *idtoname_lookup(struct ent *);
+static struct ent *idtoname_update(struct ent *, struct ent *);
 
 static struct cache_detail idtoname_cache = {
 	.owner		= THIS_MODULE,
@@ -190,6 +201,10 @@ static struct cache_detail idtoname_cache = {
 	.cache_parse	= idtoname_parse,
 	.cache_show	= idtoname_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= idtoname_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 int
@@ -232,6 +247,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (ent.h.expiry_time == 0)
 		goto out;
 
+	error = -ENOMEM;
+	res = idtoname_lookup(&ent);
+	if (!res)
+		goto out;
+
 	/* Name */
 	error = qword_get(&buf, buf1, PAGE_SIZE);
 	if (error == -EINVAL)
@@ -246,7 +266,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 		memcpy(ent.name, buf1, sizeof(ent.name));
 	}
 	error = -ENOMEM;
-	if ((res = idtoname_lookup(&ent, 1)) == NULL)
+	res = idtoname_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &idtoname_cache);
@@ -258,7 +279,31 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookup(ent, idtoname);
+
+static struct ent *
+idtoname_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache,
+						    &item->h,
+						    idtoname_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+idtoname_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&idtoname_cache,
+						    &new->h, &old->h,
+						    idtoname_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
 
 /*
  * Name -> ID cache
@@ -285,9 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-nametoid_match(struct ent *a, struct ent *b)
+static int
+nametoid_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -311,7 +359,8 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 	return 0;
 }
 
-static struct ent *nametoid_lookup(struct ent *, int);
+static struct ent *nametoid_lookup(struct ent *);
+static struct ent *nametoid_update(struct ent *, struct ent *);
 static int         nametoid_parse(struct cache_detail *, char *, int);
 
 static struct cache_detail nametoid_cache = {
@@ -324,6 +373,10 @@ static struct cache_detail nametoid_cache = {
 	.cache_parse	= nametoid_parse,
 	.cache_show	= nametoid_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= nametoid_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 static int
@@ -373,7 +426,11 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 		set_bit(CACHE_NEGATIVE, &ent.h.flags);
 
 	error = -ENOMEM;
-	if ((res = nametoid_lookup(&ent, 1)) == NULL)
+	res = nametoid_lookup(&ent);
+	if (res == NULL)
+		goto out;
+	res = nametoid_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &nametoid_cache);
@@ -384,7 +441,30 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookup(ent, nametoid);
+
+static struct ent *
+nametoid_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache,
+						    &item->h,
+						    nametoid_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+nametoid_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&nametoid_cache,
+						    &new->h, &old->h,
+						    nametoid_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
 
 /*
  * Exported API
@@ -452,24 +532,24 @@ idmap_defer(struct cache_req *req)
 }
 
 static inline int
-do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item,
 		struct idmap_defer_req *mdr)
 {
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		return -ENOMEM;
 	return cache_check(detail, &(*item)->h, &mdr->req);
 }
 
 static inline int
-do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int),
+do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 			struct ent *key, struct cache_detail *detail,
 			struct ent **item)
 {
 	int ret = -ENOMEM;
 
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		goto out_err;
 	ret = -ETIMEDOUT;
@@ -490,7 +570,7 @@ out_err:
 
 static int
 idmap_lookup(struct svc_rqst *rqstp,
-		struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+		struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item)
 {
 	struct idmap_defer_req *mdr;
-- 
cgit v1.2.3


From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:09 -0800
Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs

.. it makes some of the code nicer.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c    | 51 ++++++++++++++++++++++++---------------------------
 fs/nfsd/nfs4idmap.c | 18 ++++++++----------
 fs/nfsd/nfsfh.c     |  2 +-
 3 files changed, 33 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index abd68965822..cc811a1094c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,18 +57,17 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct cache_head *item, struct cache_detail *cd)
+void expkey_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
-		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
-			dput(key->ek_dentry);
-			mntput(key->ek_mnt);
-		}
-		auth_domain_put(key->ek_client);
-		kfree(key);
+	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+
+	if (test_bit(CACHE_VALID, &key->h.flags) &&
+	    !test_bit(CACHE_NEGATIVE, &key->h.flags)) {
+		dput(key->ek_dentry);
+		mntput(key->ek_mnt);
 	}
+	auth_domain_put(key->ek_client);
+	kfree(key);
 }
 
 static void expkey_request(struct cache_detail *cd,
@@ -158,7 +157,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
@@ -172,7 +171,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else
 			err = -ENOMEM;
 		path_release(&nd);
@@ -318,15 +317,13 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-void svc_export_put(struct cache_head *item, struct cache_detail *cd)
+static void svc_export_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_export *exp = container_of(item, struct svc_export, h);
-		dput(exp->ex_dentry);
-		mntput(exp->ex_mnt);
-		auth_domain_put(exp->ex_client);
-		kfree(exp);
-	}
+	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+	dput(exp->ex_dentry);
+	mntput(exp->ex_mnt);
+	auth_domain_put(exp->ex_client);
+	kfree(exp);
 }
 
 static void svc_export_request(struct cache_detail *cd,
@@ -633,7 +630,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	if (ek)
 		ek = svc_expkey_update(&key,ek);
 	if (ek) {
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 		return 0;
 	}
 	return -ENOMEM;
@@ -762,7 +759,7 @@ static void exp_fsid_unhash(struct svc_export *exp)
 	ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -800,7 +797,7 @@ static void exp_unhash(struct svc_export *exp)
 	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -902,7 +899,7 @@ finish:
 	if (exp)
 		exp_put(exp);
 	if (fsid_key && !IS_ERR(fsid_key))
-		expkey_put(&fsid_key->h, &svc_expkey_cache);
+		cache_put(&fsid_key->h, &svc_expkey_cache);
 	if (clp)
 		auth_domain_put(clp);
 	path_release(&nd);
@@ -1030,7 +1027,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
 		return ERR_PTR(PTR_ERR(ek));
 
 	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
-	expkey_put(&ek->h, &svc_expkey_cache);
+	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (!exp || IS_ERR(exp))
 		return ERR_PTR(PTR_ERR(exp));
@@ -1068,7 +1065,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	else
 		rv = fh_compose(fhp, exp,
 				fsid_key->ek_dentry, NULL);
-	expkey_put(&fsid_key->h, &svc_expkey_cache);
+	cache_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
 
@@ -1187,7 +1184,7 @@ static int e_show(struct seq_file *m, void *p)
 	cache_get(&exp->h);
 	if (cache_check(&svc_export_cache, &exp->h, NULL))
 		return 0;
-	if (cache_put(&exp->h, &svc_export_cache)) BUG();
+	cache_put(&exp->h, &svc_export_cache);
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 75cfbb68b20..4b6aa60dfce 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -96,12 +96,10 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
 }
 
 static void
-ent_put(struct cache_head *ch, struct cache_detail *cd)
+ent_put(struct kref *ref)
 {
-	if (cache_put(ch, cd)) {
-		struct ent *map = container_of(ch, struct ent, h);
-		kfree(map);
-	}
+	struct ent *map = container_of(ref, struct ent, h.ref);
+	kfree(map);
 }
 
 static struct cache_head *
@@ -270,7 +268,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &idtoname_cache);
+	cache_put(&res->h, &idtoname_cache);
 
 	error = 0;
 out:
@@ -433,7 +431,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &nametoid_cache);
+	cache_put(&res->h, &nametoid_cache);
 	error = 0;
 out:
 	kfree(buf1);
@@ -562,7 +560,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 		goto out_put;
 	return 0;
 out_put:
-	ent_put(&(*item)->h, detail);
+	cache_put(&(*item)->h, detail);
 out_err:
 	*item = NULL;
 	return ret;
@@ -613,7 +611,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
 	if (ret)
 		return ret;
 	*id = item->id;
-	ent_put(&item->h, &nametoid_cache);
+	cache_put(&item->h, &nametoid_cache);
 	return 0;
 }
 
@@ -635,7 +633,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 	ret = strlen(item->name);
 	BUG_ON(ret > IDMAP_NAMESZ);
 	memcpy(name, item->name, ret);
-	ent_put(&item->h, &idtoname_cache);
+	cache_put(&item->h, &idtoname_cache);
 	return ret;
 }
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 7a3e397b4ed..3f2ec2e6d06 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -506,7 +506,7 @@ fh_put(struct svc_fh *fhp)
 		nfsd_nr_put++;
 	}
 	if (exp) {
-		svc_export_put(&exp->h, &svc_export_cache);
+		cache_put(&exp->h, &svc_export_cache);
 		fhp->fh_export = NULL;
 	}
 	return;
-- 
cgit v1.2.3


From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 27 Mar 2006 01:15:10 -0800
Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global
 code static

We can now make some code static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cc811a1094c..c340be0a3f5 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,7 +57,7 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct kref *ref)
+static void expkey_put(struct kref *ref)
 {
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
@@ -87,6 +87,8 @@ static void expkey_request(struct cache_detail *cd,
 
 static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
+static struct cache_detail svc_expkey_cache;
+
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -255,7 +257,7 @@ static struct cache_head *expkey_alloc(void)
 		return NULL;
 }
 
-struct cache_detail svc_expkey_cache = {
+static struct cache_detail svc_expkey_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
 	.hash_table	= expkey_table,
@@ -345,7 +347,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_update(struct svc_export *new,
+					    struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
@@ -574,7 +577,7 @@ svc_export_lookup(struct svc_export *exp)
 		return NULL;
 }
 
-struct svc_export *
+static struct svc_export *
 svc_export_update(struct svc_export *new, struct svc_export *old)
 {
 	struct cache_head *ch;
@@ -593,7 +596,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
 }
 
 
-struct svc_expkey *
+static struct svc_expkey *
 exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_expkey key, *ek;
-- 
cgit v1.2.3


From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:59 -0800
Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat

Replace for_each_pgdat() with for_each_online_pgdat().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index d597758dd12..23f1f3a6807 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -493,7 +493,7 @@ static void free_more_memory(void)
 	wakeup_pdflush(1024);
 	yield();
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
 			try_to_free_pages(zones, GFP_NOFS);
-- 
cgit v1.2.3


From 6a4d44c1f1108d6c9e8850e8cf166aaba0e56eae Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:55 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: holders/slaves subdirectory

Creating "slaves" and "holders" directories in /sys/block/<disk> and
creating "holders" directory under /sys/block/<disk>/<partition>

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f924f459bdb..60523cea713 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,6 +297,30 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
+#ifdef CONFIG_SYSFS
+static inline void partition_sysfs_add_subdir(struct hd_struct *p)
+{
+	struct kobject *k;
+
+	k = kobject_get(&p->kobj);
+	p->holder_dir = kobject_add_dir(k, "holders");
+	kobject_put(k);
+}
+
+static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+{
+	struct kobject *k;
+
+	k = kobject_get(&disk->kobj);
+	disk->holder_dir = kobject_add_dir(k, "holders");
+	disk->slave_dir = kobject_add_dir(k, "slaves");
+	kobject_put(k);
+}
+#else
+#define partition_sysfs_add_subdir(x)	do { } while (0)
+#define disk_sysfs_add_subdirs(x)	do { } while (0)
+#endif
+
 void delete_partition(struct gendisk *disk, int part)
 {
 	struct hd_struct *p = disk->part[part-1];
@@ -310,6 +334,10 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
+#ifdef CONFIG_SYSFS
+	if (p->holder_dir)
+		kobject_unregister(p->holder_dir);
+#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -337,6 +365,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 	p->kobj.parent = &disk->kobj;
 	p->kobj.ktype = &ktype_part;
 	kobject_register(&p->kobj);
+	partition_sysfs_add_subdir(p);
 	disk->part[part-1] = p;
 }
 
@@ -383,6 +412,7 @@ void register_disk(struct gendisk *disk)
 	if ((err = kobject_add(&disk->kobj)))
 		return;
 	disk_sysfs_symlinks(disk);
+ 	disk_sysfs_add_subdirs(disk);
 	kobject_uevent(&disk->kobj, KOBJ_ADD);
 
 	/* No minors to use for partitions */
@@ -483,6 +513,12 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
+#ifdef CONFIG_SYSFS
+	if (disk->holder_dir)
+		kobject_unregister(disk->holder_dir);
+	if (disk->slave_dir)
+		kobject_unregister(disk->slave_dir);
+#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
-- 
cgit v1.2.3


From 100873687d81d4ce7b1299b447d33e87ba1e9583 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 27 Mar 2006 01:17:56 -0800
Subject: [PATCH]
 dm-md-dependency-tree-in-sysfs-holders-slaves-subdirectory-tidy

Remove all the CONFIG_SYSFS stuff.  That's supposed to all be implemented up
in header files.

Yes, the CONFIG_SYSFS=n data structures will be a little larger than
necessary, but that's a tradeoff we can decide to make.

Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 60523cea713..af0cb4b9e78 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,7 +297,6 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
-#ifdef CONFIG_SYSFS
 static inline void partition_sysfs_add_subdir(struct hd_struct *p)
 {
 	struct kobject *k;
@@ -316,10 +315,6 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
 	disk->slave_dir = kobject_add_dir(k, "slaves");
 	kobject_put(k);
 }
-#else
-#define partition_sysfs_add_subdir(x)	do { } while (0)
-#define disk_sysfs_add_subdirs(x)	do { } while (0)
-#endif
 
 void delete_partition(struct gendisk *disk, int part)
 {
@@ -334,10 +329,8 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
-#ifdef CONFIG_SYSFS
 	if (p->holder_dir)
 		kobject_unregister(p->holder_dir);
-#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -513,12 +506,10 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
-#ifdef CONFIG_SYSFS
 	if (disk->holder_dir)
 		kobject_unregister(disk->holder_dir);
 	if (disk->slave_dir)
 		kobject_unregister(disk->slave_dir);
-#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
-- 
cgit v1.2.3


From 641dc636b0475582e48584340b774bd1e90d40d9 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:57 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: bd_claim_by_kobject

Adding bd_claim_by_kobject() function which takes kobject as additional
signature of holder device and creates sysfs symlinks between holder device
and claimed device.  bd_release_from_kobject() is a counterpart of
bd_claim_by_kobject.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 297 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 297 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5983d42df01..3f36df7e037 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -266,6 +266,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+		INIT_LIST_HEAD(&bdev->bd_holder_list);
+#endif
 		inode_init_once(&ei->vfs_inode);
 	}
 }
@@ -490,6 +493,300 @@ void bd_release(struct block_device *bdev)
 
 EXPORT_SYMBOL(bd_release);
 
+#ifdef CONFIG_SYSFS
+/*
+ * Functions for bd_claim_by_kobject / bd_release_from_kobject
+ *
+ *     If a kobject is passed to bd_claim_by_kobject()
+ *     and the kobject has a parent directory,
+ *     following symlinks are created:
+ *        o from the kobject to the claimed bdev
+ *        o from "holders" directory of the bdev to the parent of the kobject
+ *     bd_release_from_kobject() removes these symlinks.
+ *
+ *     Example:
+ *        If /dev/dm-0 maps to /dev/sda, kobject corresponding to
+ *        /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
+ *           /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
+ */
+
+static struct kobject *bdev_get_kobj(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(&bdev->bd_part->kobj);
+	else
+		return kobject_get(&bdev->bd_disk->kobj);
+}
+
+static struct kobject *bdev_get_holder(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(bdev->bd_part->holder_dir);
+	else
+		return kobject_get(bdev->bd_disk->holder_dir);
+}
+
+static void add_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_create_link(from, to, kobject_name(to));
+}
+
+static void del_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_remove_link(from, kobject_name(to));
+}
+
+/*
+ * 'struct bd_holder' contains pointers to kobjects symlinked by
+ * bd_claim_by_kobject.
+ * It's connected to bd_holder_list which is protected by bdev->bd_sem.
+ */
+struct bd_holder {
+	struct list_head list;	/* chain of holders of the bdev */
+	int count;		/* references from the holder */
+	struct kobject *sdir;	/* holder object, e.g. "/block/dm-0/slaves" */
+	struct kobject *hdev;	/* e.g. "/block/dm-0" */
+	struct kobject *hdir;	/* e.g. "/block/sda/holders" */
+	struct kobject *sdev;	/* e.g. "/block/sda" */
+};
+
+/*
+ * Get references of related kobjects at once.
+ * Returns 1 on success. 0 on failure.
+ *
+ * Should call bd_holder_release_dirs() after successful use.
+ */
+static int bd_holder_grab_dirs(struct block_device *bdev,
+			struct bd_holder *bo)
+{
+	if (!bdev || !bo)
+		return 0;
+
+	bo->sdir = kobject_get(bo->sdir);
+	if (!bo->sdir)
+		return 0;
+
+	bo->hdev = kobject_get(bo->sdir->parent);
+	if (!bo->hdev)
+		goto fail_put_sdir;
+
+	bo->sdev = bdev_get_kobj(bdev);
+	if (!bo->sdev)
+		goto fail_put_hdev;
+
+	bo->hdir = bdev_get_holder(bdev);
+	if (!bo->hdir)
+		goto fail_put_sdev;
+
+	return 1;
+
+fail_put_sdev:
+	kobject_put(bo->sdev);
+fail_put_hdev:
+	kobject_put(bo->hdev);
+fail_put_sdir:
+	kobject_put(bo->sdir);
+
+	return 0;
+}
+
+/* Put references of related kobjects at once. */
+static void bd_holder_release_dirs(struct bd_holder *bo)
+{
+	kobject_put(bo->hdir);
+	kobject_put(bo->sdev);
+	kobject_put(bo->hdev);
+	kobject_put(bo->sdir);
+}
+
+static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return NULL;
+
+	bo->count = 1;
+	bo->sdir = kobj;
+
+	return bo;
+}
+
+static void free_bd_holder(struct bd_holder *bo)
+{
+	kfree(bo);
+}
+
+/**
+ * add_bd_holder - create sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @bo:		preallocated and initialized by alloc_bd_holder()
+ *
+ * If there is no matching entry with @bo in @bdev->bd_holder_list,
+ * add @bo to the list, create symlinks.
+ *
+ * Returns 1 if @bo was added to the list.
+ * Returns 0 if @bo wasn't used by any reason and should be freed.
+ */
+static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+{
+	struct bd_holder *tmp;
+
+	if (!bo)
+		return 0;
+
+	list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
+		if (tmp->sdir == bo->sdir) {
+			tmp->count++;
+			return 0;
+		}
+	}
+
+	if (!bd_holder_grab_dirs(bdev, bo))
+		return 0;
+
+	add_symlink(bo->sdir, bo->sdev);
+	add_symlink(bo->hdir, bo->hdev);
+	list_add_tail(&bo->list, &bdev->bd_holder_list);
+	return 1;
+}
+
+/**
+ * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @kobj:	holder's kobject
+ *
+ * If there is matching entry with @kobj in @bdev->bd_holder_list
+ * and no other bd_claim() from the same kobject,
+ * remove the struct bd_holder from the list, delete symlinks for it.
+ *
+ * Returns a pointer to the struct bd_holder when it's removed from the list
+ * and ready to be freed.
+ * Returns NULL if matching claim isn't found or there is other bd_claim()
+ * by the same kobject.
+ */
+static struct bd_holder *del_bd_holder(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	list_for_each_entry(bo, &bdev->bd_holder_list, list) {
+		if (bo->sdir == kobj) {
+			bo->count--;
+			BUG_ON(bo->count < 0);
+			if (!bo->count) {
+				list_del(&bo->list);
+				del_symlink(bo->sdir, bo->sdev);
+				del_symlink(bo->hdir, bo->hdev);
+				bd_holder_release_dirs(bo);
+				return bo;
+			}
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * bd_claim_by_kobject - bd_claim() with additional kobject signature
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @kobj:	holder's kobject
+ *
+ * Do bd_claim() and if it succeeds, create sysfs symlinks between
+ * the bdev and the holder's kobject.
+ * Use bd_release_from_kobject() when relesing the claimed bdev.
+ *
+ * Returns 0 on success. (same as bd_claim())
+ * Returns errno on failure.
+ */
+static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
+				struct kobject *kobj)
+{
+	int res;
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return -EINVAL;
+
+	bo = alloc_bd_holder(kobj);
+	if (!bo)
+		return -ENOMEM;
+
+	down(&bdev->bd_sem);
+	res = bd_claim(bdev, holder);
+	if (res || !add_bd_holder(bdev, bo))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+
+	return res;
+}
+
+/**
+ * bd_release_from_kobject - bd_release() with additional kobject signature
+ *
+ * @bdev:	block device to be released
+ * @kobj:	holder's kobject
+ *
+ * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
+ */
+static void bd_release_from_kobject(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return;
+
+	down(&bdev->bd_sem);
+	bd_release(bdev);
+	if ((bo = del_bd_holder(bdev, kobj)))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+}
+
+/**
+ * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @disk:	holder's gendisk
+ *
+ * Call bd_claim_by_kobject() with getting @disk->slave_dir.
+ */
+int bd_claim_by_disk(struct block_device *bdev, void *holder,
+			struct gendisk *disk)
+{
+	return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
+}
+EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+
+/**
+ * bd_release_from_disk - wrapper function for bd_release_from_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @disk:	holder's gendisk
+ *
+ * Call bd_release_from_kobject() and put @disk->slave_dir.
+ */
+void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
+{
+	bd_release_from_kobject(bdev, disk->slave_dir);
+	kobject_put(disk->slave_dir);
+}
+EXPORT_SYMBOL_GPL(bd_release_from_disk);
+#endif
+
 /*
  * Tries to open block device by device number.  Use it ONLY if you
  * really do not have anything better - i.e. when you are behind a
-- 
cgit v1.2.3


From b4cf1b72eec0e197257a5b07dc9ec53552cdd123 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:18:00 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: convert bd_sem to bd_mutex

Convert bd_sem to bd_mutex

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3f36df7e037..17c76182f38 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -723,11 +723,11 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	res = bd_claim(bdev, holder);
 	if (res || !add_bd_holder(bdev, bo))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 
 	return res;
 }
@@ -748,11 +748,11 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 }
 
 /**
-- 
cgit v1.2.3


From 5149fa47ec90eb5e79e28f3a7fbcf29421524817 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Mon, 27 Mar 2006 14:26:26 +1100
Subject: [PATCH] powerpc: Cope with duplicate node & property names in
 /proc/device-tree

Various dodgy firmware might give us nodes and/or properties in the device
tree with conflicting names. That's generally ok, except for when we export
the device tree via /proc, so check when we're creating the proc device tree
and munge names accordingly.

Tested on a faked device tree with kexec, would be good if someone with
actual bogus firmware could try it, but just for completeness.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 fs/proc/proc_devtree.c | 103 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 596b4b4f1cc..abdf068bc27 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -52,7 +52,8 @@ static int property_read_proc(char *page, char **start, off_t off,
  * Add a property to a node
  */
 static struct proc_dir_entry *
-__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
+__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
+		const char *name)
 {
 	struct proc_dir_entry *ent;
 
@@ -60,14 +61,14 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
 	 * Unfortunately proc_register puts each new entry
 	 * at the beginning of the list.  So we rearrange them.
 	 */
-	ent = create_proc_read_entry(pp->name,
-				     strncmp(pp->name, "security-", 9)
+	ent = create_proc_read_entry(name,
+				     strncmp(name, "security-", 9)
 				     ? S_IRUGO : S_IRUSR, de,
 				     property_read_proc, pp);
 	if (ent == NULL)
 		return NULL;
 
-	if (!strncmp(pp->name, "security-", 9))
+	if (!strncmp(name, "security-", 9))
 		ent->size = 0; /* don't leak number of password chars */
 	else
 		ent->size = pp->length;
@@ -78,7 +79,7 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp)
 
 void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop)
 {
-	__proc_device_tree_add_prop(pde, prop);
+	__proc_device_tree_add_prop(pde, prop, prop->name);
 }
 
 void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
@@ -105,6 +106,69 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 	}
 }
 
+/*
+ * Various dodgy firmware might give us nodes and/or properties with
+ * conflicting names. That's generally ok, except for exporting via /proc,
+ * so munge names here to ensure they're unique.
+ */
+
+static int duplicate_name(struct proc_dir_entry *de, const char *name)
+{
+	struct proc_dir_entry *ent;
+	int found = 0;
+
+	spin_lock(&proc_subdir_lock);
+
+	for (ent = de->subdir; ent != NULL; ent = ent->next) {
+		if (strcmp(ent->name, name) == 0) {
+			found = 1;
+			break;
+		}
+	}
+
+	spin_unlock(&proc_subdir_lock);
+
+	return found;
+}
+
+static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
+		const char *name)
+{
+	char *fixed_name;
+	int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */
+	int i = 1, size;
+
+realloc:
+	fixed_name = kmalloc(fixup_len, GFP_KERNEL);
+	if (fixed_name == NULL) {
+		printk(KERN_ERR "device-tree: Out of memory trying to fixup "
+				"name \"%s\"\n", name);
+		return name;
+	}
+
+retry:
+	size = snprintf(fixed_name, fixup_len, "%s#%d", name, i);
+	size++; /* account for NULL */
+
+	if (size > fixup_len) {
+		/* We ran out of space, free and reallocate. */
+		kfree(fixed_name);
+		fixup_len = size;
+		goto realloc;
+	}
+
+	if (duplicate_name(de, fixed_name)) {
+		/* Multiple duplicates. Retry with a different offset. */
+		i++;
+		goto retry;
+	}
+
+	printk(KERN_WARNING "device-tree: Duplicate name in %s, "
+			"renamed to \"%s\"\n", np->full_name, fixed_name);
+
+	return fixed_name;
+}
+
 /*
  * Process a node, adding entries for its children and its properties.
  */
@@ -118,37 +182,30 @@ void proc_device_tree_add_node(struct device_node *np,
 
 	set_node_proc_entry(np, de);
 	for (child = NULL; (child = of_get_next_child(np, child));) {
+		/* Use everything after the last slash, or the full name */
 		p = strrchr(child->full_name, '/');
 		if (!p)
 			p = child->full_name;
 		else
 			++p;
+
+		if (duplicate_name(de, p))
+			p = fixup_name(np, de, p);
+
 		ent = proc_mkdir(p, de);
 		if (ent == 0)
 			break;
 		proc_device_tree_add_node(child, ent);
 	}
 	of_node_put(child);
+
 	for (pp = np->properties; pp != 0; pp = pp->next) {
-		/*
-		 * Yet another Apple device-tree bogosity: on some machines,
-		 * they have properties & nodes with the same name. Those
-		 * properties are quite unimportant for us though, thus we
-		 * simply "skip" them here, but we do have to check.
-		 */
-		spin_lock(&proc_subdir_lock);
-		for (ent = de->subdir; ent != NULL; ent = ent->next)
-			if (!strcmp(ent->name, pp->name))
-				break;
-		spin_unlock(&proc_subdir_lock);
-		if (ent != NULL) {
-			printk(KERN_WARNING "device-tree: property \"%s\" name"
-			       " conflicts with node in %s\n", pp->name,
-			       np->full_name);
-			continue;
-		}
+		p = pp->name;
+
+		if (duplicate_name(de, p))
+			p = fixup_name(np, de, p);
 
-		ent = __proc_device_tree_add_prop(de, pp);
+		ent = __proc_device_tree_add_prop(de, pp, p);
 		if (ent == 0)
 			break;
 	}
-- 
cgit v1.2.3


From e8222502ee6157e2713da9e0792c21f4ad458d50 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 28 Mar 2006 23:15:54 +1100
Subject: [PATCH] powerpc: Kill _machine and hard-coded platform numbers

This removes statically assigned platform numbers and reworks the
powerpc platform probe code to use a better mechanism.  With this,
board support files can simply declare a new machine type with a
macro, and implement a probe() function that uses the flattened
device-tree to detect if they apply for a given machine.

We now have a machine_is() macro that replaces the comparisons of
_machine with the various PLATFORM_* constants.  This commit also
changes various drivers to use the new macro instead of looking at
_machine.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 fs/partitions/mac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index bb22cdd0cb1..813292f2121 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -12,6 +12,7 @@
 #include "mac.h"
 
 #ifdef CONFIG_PPC_PMAC
+#include <asm/machdep.h>
 extern void note_bootable_part(dev_t dev, int part, int goodness);
 #endif
 
@@ -79,7 +80,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
 		 * If this is the first bootable partition, tell the
 		 * setup code, in case it wants to make this the root.
 		 */
-		if (_machine == _MACH_Pmac) {
+		if (machine_is(powermac)) {
 			int goodness = 0;
 
 			mac_fix_string(part->processor, 16);
-- 
cgit v1.2.3


From a28af471b8946de052a0eb0c080d5457be93f168 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:26 -0800
Subject: [PATCH] fs/fat/: proper prototypes for two functions

Add proper prototypes for fat_cache_init() and fat_cache_destroy() in
msdos_fs.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 404bfc9f738..c1ce284f8a9 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1435,9 +1435,6 @@ out_fail:
 
 EXPORT_SYMBOL_GPL(fat_fill_super);
 
-int __init fat_cache_init(void);
-void fat_cache_destroy(void);
-
 static int __init init_fat_fs(void)
 {
 	int err;
-- 
cgit v1.2.3


From f5b95ff010d0a4e40f877277f8f0e62fcd83b5a8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:29 -0800
Subject: [PATCH] autofs4: proper prototype for autofs4_dentry_release()

Add a proper prototype for autofs4_dentry_release() to autofs_i.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 3 +++
 fs/autofs4/inode.c    | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 617fd7b3744..c70204a6126 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -230,3 +230,6 @@ static inline int __simple_empty(struct dentry *dentry)
 out:
 	return ret;
 }
+
+void autofs4_dentry_release(struct dentry *);
+
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4eddee4e76f..fde78b110dd 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -292,7 +292,6 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
-void autofs4_dentry_release(struct dentry *);
 static struct dentry_operations autofs4_sb_dentry_operations = {
 	.d_release      = autofs4_dentry_release,
 };
-- 
cgit v1.2.3


From 70674f95c0a2ea694d5c39f4e514f538a09be36f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 28 Mar 2006 01:56:33 -0800
Subject: [PATCH] Optimize select/poll by putting small data sets on the stack

Optimize select and poll by a using stack space for small fd sets

This brings back an old optimization from Linux 2.0.  Using the stack is
faster than kmalloc.  On a Intel P4 system it speeds up a select of a
single pty fd by about 13% (~4000 cycles -> ~3500)

It also saves memory because a daemon hanging in select or poll will
usually save one or two less pages.  This can add up - e.g.  if you have 10
daemons blocking in poll/select you save 40KB of memory.

I did a patch for this long ago, but it was never applied.  This version is
a reimplementation of the old patch that tries to be less intrusive.  I
only did the minimal changes needed for the stack allocation.

The cut off point before external memory is allocated is currently at
832bytes.  The system calls always allocate this much memory on the stack.

These 832 bytes are divided into 256 bytes frontend data (for the select
bitmaps of the pollfds) and the rest of the space for the wait queues used
by the low level drivers.  There are some extreme cases where this won't
work out for select and it falls back to allocating memory too early -
especially with very sparse large select bitmaps - but the majority of
processes who only have a small number of file descriptors should be ok.
[TBD: 832/256 might not be the best split for select or poll]

I suspect more optimizations might be possible, but they would be more
complicated.  One way would be to cache the select/poll context over
multiple system calls because typically the input values should be similar.
 Problem is when to flush the file descriptors out though.

Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/select.c | 106 ++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 64 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/select.c b/fs/select.c
index 1815a57d225..d8b4f0722b8 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -29,12 +29,6 @@
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
-struct poll_table_entry {
-	struct file * filp;
-	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
-};
-
 struct poll_table_page {
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *pwq)
 	init_poll_funcptr(&pwq->pt, __pollwait);
 	pwq->error = 0;
 	pwq->table = NULL;
+	pwq->inline_index = 0;
 }
 
 EXPORT_SYMBOL(poll_initwait);
 
+static void free_poll_entry(struct poll_table_entry *entry)
+{
+	remove_wait_queue(entry->wait_address,&entry->wait);
+	fput(entry->filp);
+}
+
 void poll_freewait(struct poll_wqueues *pwq)
 {
 	struct poll_table_page * p = pwq->table;
+	int i;
+	for (i = 0; i < pwq->inline_index; i++)
+		free_poll_entry(pwq->inline_entries + i);
 	while (p) {
 		struct poll_table_entry * entry;
 		struct poll_table_page *old;
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *pwq)
 		entry = p->entry;
 		do {
 			entry--;
-			remove_wait_queue(entry->wait_address,&entry->wait);
-			fput(entry->filp);
+			free_poll_entry(entry);
 		} while (entry > p->entries);
 		old = p;
 		p = p->next;
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *pwq)
 
 EXPORT_SYMBOL(poll_freewait);
 
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-		       poll_table *_p)
+static struct poll_table_entry *poll_get_entry(poll_table *_p)
 {
 	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
 
+	if (p->inline_index < N_INLINE_POLL_ENTRIES)
+		return p->inline_entries + p->inline_index++;
+
 	if (!table || POLL_TABLE_FULL(table)) {
 		struct poll_table_page *new_table;
 
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		if (!new_table) {
 			p->error = -ENOMEM;
 			__set_current_state(TASK_RUNNING);
-			return;
+			return NULL;
 		}
 		new_table->entry = new_table->entries;
 		new_table->next = table;
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		table = new_table;
 	}
 
-	/* Add a new entry */
-	{
-		struct poll_table_entry * entry = table->entry;
-		table->entry = entry+1;
-	 	get_file(filp);
-	 	entry->filp = filp;
-		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
-		add_wait_queue(wait_address,&entry->wait);
-	}
+	return table->entry++;
+}
+
+/* Add a new entry */
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+				poll_table *p)
+{
+	struct poll_table_entry *entry = poll_get_entry(p);
+	if (!entry)
+		return;
+	get_file(filp);
+	entry->filp = filp;
+	entry->wait_address = wait_address;
+	init_waitqueue_entry(&entry->wait, current);
+	add_wait_queue(wait_address,&entry->wait);
 }
 
 #define FDS_IN(fds, n)		(fds->in + n)
@@ -284,16 +294,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 	return retval;
 }
 
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
-
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
  * like to be certain this leads to no problems. So I return
@@ -312,6 +312,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	char *bits;
 	int ret, size, max_fdset;
 	struct fdtable *fdt;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_fds[SELECT_STACK_ALLOC];
 
 	ret = -EINVAL;
 	if (n < 0)
@@ -332,7 +334,10 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	if (6*size < SELECT_STACK_ALLOC)
+		bits = stack_fds;
+	else
+		bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -367,7 +372,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 		ret = -EFAULT;
 
 out:
-	select_bits_free(bits, size);
+	if (bits != stack_fds)
+		kfree(bits);
 out_nofds:
 	return ret;
 }
@@ -619,6 +625,9 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 	return count;
 }
 
+#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
+			sizeof(struct pollfd))
+
 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 {
 	struct poll_wqueues table;
@@ -628,6 +637,9 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
  	struct poll_list *walk;
 	struct fdtable *fdt;
 	int max_fdset;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_pps[POLL_STACK_ALLOC];
+	struct poll_list *stack_pp = NULL;
 
 	/* Do a sanity check on nfds ... */
 	rcu_read_lock();
@@ -645,14 +657,23 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 	err = -ENOMEM;
 	while(i!=0) {
 		struct poll_list *pp;
-		pp = kmalloc(sizeof(struct poll_list)+
-				sizeof(struct pollfd)*
-				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
-					GFP_KERNEL);
-		if(pp==NULL)
-			goto out_fds;
+		int num, size;
+		if (stack_pp == NULL)
+			num = N_STACK_PPS;
+		else
+			num = POLLFD_PER_PAGE;
+		if (num > i)
+			num = i;
+		size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
+		if (!stack_pp)
+			stack_pp = pp = (struct poll_list *)stack_pps;
+		else {
+			pp = kmalloc(size, GFP_KERNEL);
+			if (!pp)
+				goto out_fds;
+		}
 		pp->next=NULL;
-		pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+		pp->len = num;
 		if (head == NULL)
 			head = pp;
 		else
@@ -660,7 +681,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 
 		walk = pp;
 		if (copy_from_user(pp->entries, ufds + nfds-i, 
-				sizeof(struct pollfd)*pp->len)) {
+				sizeof(struct pollfd)*num)) {
 			err = -EFAULT;
 			goto out_fds;
 		}
@@ -689,7 +710,8 @@ out_fds:
 	walk = head;
 	while(walk!=NULL) {
 		struct poll_list *pp = walk->next;
-		kfree(walk);
+		if (walk != stack_pp)
+			kfree(walk);
 		walk = pp;
 	}
 	poll_freewait(&table);
-- 
cgit v1.2.3


From e4a1f129f9e43a5e5d28fe6d1b214246a398cdce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Mar 2006 01:56:34 -0800
Subject: [PATCH] use fget_light() in select/poll

Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/select.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/select.c b/fs/select.c
index d8b4f0722b8..05cd199a112 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -231,17 +231,18 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 			}
 
 			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+				int fput_needed;
 				if (i >= n)
 					break;
 				if (!(bit & all_bits))
 					continue;
-				file = fget(i);
+				file = fget_light(i, &fput_needed);
 				if (file) {
 					f_op = file->f_op;
 					mask = DEFAULT_POLLMASK;
 					if (f_op && f_op->poll)
 						mask = (*f_op->poll)(file, retval ? NULL : wait);
-					fput(file);
+					fput_light(file, fput_needed);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						retval++;
@@ -557,14 +558,15 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
 		fdp = fdpage+i;
 		fd = fdp->fd;
 		if (fd >= 0) {
-			struct file * file = fget(fd);
+			int fput_needed;
+			struct file * file = fget_light(fd, &fput_needed);
 			mask = POLLNVAL;
 			if (file != NULL) {
 				mask = DEFAULT_POLLMASK;
 				if (file->f_op && file->f_op->poll)
 					mask = file->f_op->poll(file, *pwait);
 				mask &= fdp->events | POLLERR | POLLHUP;
-				fput(file);
+				fput_light(file, fput_needed);
 			}
 			if (mask) {
 				*pwait = NULL;
-- 
cgit v1.2.3


From 68c3431ae22912be580c68d3955ef46515582943 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Tue, 28 Mar 2006 01:56:35 -0800
Subject: [PATCH] Fold select_bits_alloc/free into caller code.

Remove an unnecessary level of indirection in allocating and freeing select
bits, as per the select_bits_alloc() and select_bits_free() functions.
Both select.c and compat.c are updated.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index ef5a0771592..7f8e26ea427 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1639,15 +1639,6 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  * This is a virtual copy of sys_select from fs/select.c and probably
  * should be compared to it from time to time
  */
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
 
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
@@ -1686,7 +1677,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -1720,7 +1711,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	compat_set_fd_set(n, exp, fds.res_ex);
 
 out:
-	select_bits_free(bits, size);
+	kfree(bits);
 out_nofds:
 	return ret;
 }
-- 
cgit v1.2.3


From 0a945022778f100115d0cb6234eb28fc1b15ccaf Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:37 -0800
Subject: [PATCH] for_each_possible_cpu: fixes for generic part

replaces for_each_cpu with for_each_possible_cpu().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c           | 2 +-
 fs/proc/proc_misc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index bbc74331473..55f4e702256 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -373,6 +373,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
 void __init files_defer_init(void)
 {
 	int i;
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		fdtable_defer_list_init(i);
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1e9ea37d457..1edce0c34bf 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -534,7 +534,7 @@ static int show_stat(struct seq_file *p, void *v)
 	if (wall_to_monotonic.tv_nsec)
 		--jif;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int j;
 
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
-- 
cgit v1.2.3


From 99ac48f54a91d02140c497edc31dc57d4bc5c85d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:41 -0800
Subject: [PATCH] mark f_ops const in the inode

Mark the f_ops members of inodes as const, as well as fix the
ripple-through this causes by places that copy this f_ops and then "do
stuff" with it.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/char_dev.c       | 4 ++--
 fs/debugfs/inode.c  | 2 +-
 fs/inode.c          | 2 +-
 fs/nfsd/vfs.c       | 2 +-
 fs/proc/generic.c   | 2 +-
 fs/proc/internal.h  | 2 +-
 fs/proc/proc_misc.c | 2 +-
 fs/select.c         | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index 8c6eb04d31e..b53dffa46ba 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -250,7 +250,7 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
 }
 
 int register_chrdev(unsigned int major, const char *name,
-		    struct file_operations *fops)
+		    const struct file_operations *fops)
 {
 	struct char_device_struct *cd;
 	struct cdev *cdev;
@@ -473,7 +473,7 @@ struct cdev *cdev_alloc(void)
 	return p;
 }
 
-void cdev_init(struct cdev *cdev, struct file_operations *fops)
+void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 {
 	memset(cdev, 0, sizeof *cdev);
 	INIT_LIST_HEAD(&cdev->list);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d4f1a2cddd4..85d166cdcae 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -191,7 +191,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
  */
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
-				   struct file_operations *fops)
+				   const struct file_operations *fops)
 {
 	struct dentry *dentry = NULL;
 	int error;
diff --git a/fs/inode.c b/fs/inode.c
index 1fddf2803af..32b7c337502 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -104,7 +104,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 {
 	static struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
+	static const struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5320e5afadd..31018333dc3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -706,7 +706,7 @@ nfsd_close(struct file *filp)
  * after it.
  */
 static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-			      struct file_operations *fop)
+			      const struct file_operations *fop)
 {
 	struct inode *inode = dp->d_inode;
 	int (*fsync) (struct file *, struct dentry *, int);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 47b7a20d45e..4ba03009cf7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -560,7 +560,7 @@ static void proc_kill_inodes(struct proc_dir_entry *de)
 		struct file * filp = list_entry(p, struct file, f_u.fu_list);
 		struct dentry * dentry = filp->f_dentry;
 		struct inode * inode;
-		struct file_operations *fops;
+		const struct file_operations *fops;
 
 		if (dentry->d_op != &proc_dentry_operations)
 			continue;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 95a1cf32b83..0502f17b860 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -30,7 +30,7 @@ do {						\
 
 #endif
 
-extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f);
+extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
 extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
 extern int proc_tid_stat(struct task_struct *,  char *);
 extern int proc_tgid_stat(struct task_struct *, char *);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1edce0c34bf..ef5a3323f4b 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -731,7 +731,7 @@ static struct file_operations proc_sysrq_trigger_operations = {
 
 struct proc_dir_entry *proc_root_kcore;
 
-void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
 {
 	struct proc_dir_entry *entry;
 	entry = create_proc_entry(name, mode, NULL);
diff --git a/fs/select.c b/fs/select.c
index 05cd199a112..b3a3a1326af 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,7 +220,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
 			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
 			unsigned long res_in = 0, res_out = 0, res_ex = 0;
-			struct file_operations *f_op = NULL;
+			const struct file_operations *f_op = NULL;
 			struct file *file = NULL;
 
 			in = *inp++; out = *outp++; ex = *exp++;
-- 
cgit v1.2.3


From 4b6f5d20b04dcbc3d888555522b90ba6d36c4106 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:42 -0800
Subject: [PATCH] Make most file operations structs in fs/ const

This is a conversion to make the various file_operations structs in fs/
const.  Basically a regexp job, with a few manual fixups

The goal is both to increase correctness (harder to accidentally write to
shared datastructures) and reducing the false sharing of cachelines with
things that get dirty in .data (while .rodata is nicely read only and thus
cache clean)

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/v9fs_vfs.h                |  4 ++--
 fs/9p/vfs_dir.c                 |  2 +-
 fs/9p/vfs_file.c                |  2 +-
 fs/adfs/adfs.h                  |  4 ++--
 fs/adfs/dir.c                   |  2 +-
 fs/adfs/file.c                  |  2 +-
 fs/affs/affs.h                  |  6 +++---
 fs/affs/dir.c                   |  2 +-
 fs/affs/file.c                  |  2 +-
 fs/afs/dir.c                    |  2 +-
 fs/afs/internal.h               |  4 ++--
 fs/afs/mntpt.c                  |  2 +-
 fs/afs/proc.c                   | 10 +++++-----
 fs/autofs/autofs_i.h            |  2 +-
 fs/autofs/root.c                |  2 +-
 fs/autofs4/autofs_i.h           |  4 ++--
 fs/autofs4/root.c               |  4 ++--
 fs/bad_inode.c                  |  2 +-
 fs/befs/linuxvfs.c              |  2 +-
 fs/bfs/bfs.h                    |  4 ++--
 fs/bfs/dir.c                    |  2 +-
 fs/bfs/file.c                   |  2 +-
 fs/binfmt_misc.c                |  6 +++---
 fs/block_dev.c                  |  2 +-
 fs/char_dev.c                   |  2 +-
 fs/cifs/cifsfs.c                | 10 +++++-----
 fs/cifs/cifsfs.h                | 10 +++++-----
 fs/coda/dir.c                   |  2 +-
 fs/coda/file.c                  |  2 +-
 fs/coda/pioctl.c                |  2 +-
 fs/coda/psdev.c                 |  2 +-
 fs/configfs/configfs_internal.h |  6 +++---
 fs/configfs/dir.c               |  2 +-
 fs/configfs/file.c              |  2 +-
 fs/cramfs/inode.c               |  4 ++--
 fs/debugfs/file.c               |  4 ++--
 fs/devfs/base.c                 | 12 ++++++------
 fs/efs/dir.c                    |  2 +-
 fs/eventpoll.c                  |  2 +-
 fs/ext2/dir.c                   |  2 +-
 fs/ext2/ext2.h                  |  6 +++---
 fs/ext2/file.c                  |  4 ++--
 fs/ext3/dir.c                   |  2 +-
 fs/ext3/file.c                  |  2 +-
 fs/fat/dir.c                    |  2 +-
 fs/fat/file.c                   |  2 +-
 fs/fifo.c                       |  2 +-
 fs/freevxfs/vxfs_extern.h       |  2 +-
 fs/freevxfs/vxfs_lookup.c       |  2 +-
 fs/fuse/dev.c                   |  2 +-
 fs/fuse/dir.c                   |  2 +-
 fs/fuse/file.c                  |  6 +++---
 fs/fuse/fuse_i.h                |  2 +-
 fs/hfs/dir.c                    |  2 +-
 fs/hfs/hfs_fs.h                 |  2 +-
 fs/hfs/inode.c                  |  4 ++--
 fs/hfsplus/dir.c                |  2 +-
 fs/hfsplus/inode.c              |  2 +-
 fs/hostfs/hostfs_kern.c         |  4 ++--
 fs/hpfs/dir.c                   |  2 +-
 fs/hpfs/file.c                  |  2 +-
 fs/hpfs/hpfs_fn.h               |  4 ++--
 fs/hppfs/hppfs_kern.c           |  4 ++--
 fs/hugetlbfs/inode.c            |  4 ++--
 fs/inotify.c                    |  2 +-
 fs/isofs/dir.c                  |  2 +-
 fs/isofs/isofs.h                |  2 +-
 fs/jffs/inode-v23.c             |  8 ++++----
 fs/jffs2/dir.c                  |  2 +-
 fs/jffs2/file.c                 |  2 +-
 fs/jffs2/os-linux.h             |  4 ++--
 fs/jfs/file.c                   |  2 +-
 fs/jfs/jfs_inode.h              |  4 ++--
 fs/jfs/namei.c                  |  2 +-
 fs/libfs.c                      |  2 +-
 fs/minix/dir.c                  |  2 +-
 fs/minix/file.c                 |  2 +-
 fs/minix/minix.h                |  4 ++--
 fs/ncpfs/dir.c                  |  2 +-
 fs/ncpfs/file.c                 |  2 +-
 fs/nfs/dir.c                    |  2 +-
 fs/nfs/file.c                   |  2 +-
 fs/nfsd/nfsctl.c                |  4 ++--
 fs/nfsd/stats.c                 |  2 +-
 fs/ntfs/dir.c                   |  2 +-
 fs/ntfs/file.c                  |  4 ++--
 fs/ntfs/ntfs.h                  |  6 +++---
 fs/ocfs2/dlmglue.c              |  2 +-
 fs/ocfs2/file.c                 |  4 ++--
 fs/ocfs2/file.h                 |  4 ++--
 fs/openpromfs/inode.c           |  6 +++---
 fs/pipe.c                       |  6 +++---
 fs/proc/kcore.c                 |  2 +-
 fs/proc/kmsg.c                  |  2 +-
 fs/proc/vmcore.c                |  2 +-
 fs/qnx4/dir.c                   |  2 +-
 fs/qnx4/file.c                  |  2 +-
 fs/ramfs/file-mmu.c             |  2 +-
 fs/ramfs/file-nommu.c           |  2 +-
 fs/ramfs/internal.h             |  2 +-
 fs/read_write.c                 |  2 +-
 fs/reiserfs/dir.c               |  2 +-
 fs/reiserfs/file.c              |  2 +-
 fs/reiserfs/procfs.c            |  2 +-
 fs/romfs/inode.c                |  2 +-
 fs/smbfs/dir.c                  |  2 +-
 fs/smbfs/file.c                 |  2 +-
 fs/smbfs/proto.h                |  4 ++--
 fs/sysfs/bin.c                  |  2 +-
 fs/sysfs/dir.c                  |  2 +-
 fs/sysfs/file.c                 |  2 +-
 fs/sysfs/sysfs.h                |  6 +++---
 fs/sysv/dir.c                   |  2 +-
 fs/sysv/file.c                  |  2 +-
 fs/sysv/sysv.h                  |  4 ++--
 fs/udf/dir.c                    |  2 +-
 fs/udf/file.c                   |  2 +-
 fs/udf/udfdecl.h                |  4 ++--
 fs/ufs/dir.c                    |  2 +-
 fs/ufs/file.c                   |  2 +-
 fs/xfs/linux-2.6/xfs_file.c     |  6 +++---
 fs/xfs/linux-2.6/xfs_iops.h     |  6 +++---
 122 files changed, 188 insertions(+), 188 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 43c9f7de031..f867b8d3e97 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,8 +39,8 @@
 
 extern struct file_system_type v9fs_fs_type;
 extern struct address_space_operations v9fs_addr_operations;
-extern struct file_operations v9fs_file_operations;
-extern struct file_operations v9fs_dir_operations;
+extern const struct file_operations v9fs_file_operations;
+extern const struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 766f11f1215..e32d5971039 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -204,7 +204,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations v9fs_dir_operations = {
+const struct file_operations v9fs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = v9fs_dir_readdir,
 	.open = v9fs_file_open,
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 59e74416340..083dcfcd158 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -266,7 +266,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	return total;
 }
 
-struct file_operations v9fs_file_operations = {
+const struct file_operations v9fs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = v9fs_file_read,
 	.write = v9fs_file_write,
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index f6cd01352cc..29217ff36d4 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -85,7 +85,7 @@ void __adfs_error(struct super_block *sb, const char *function,
 
 /* dir_*.c */
 extern struct inode_operations adfs_dir_inode_operations;
-extern struct file_operations adfs_dir_operations;
+extern const struct file_operations adfs_dir_operations;
 extern struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
@@ -94,7 +94,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
 
 /* file.c */
 extern struct inode_operations adfs_file_inode_operations;
-extern struct file_operations adfs_file_operations;
+extern const struct file_operations adfs_file_operations;
 
 static inline __u32 signed_asl(__u32 val, signed int shift)
 {
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0b4c3a02807..7b075fc397d 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -196,7 +196,7 @@ out:
 	return ret;
 }
 
-struct file_operations adfs_dir_operations = {
+const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= adfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 6af10885f9d..1014b9f2117 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -25,7 +25,7 @@
 
 #include "adfs.h"
 
-struct file_operations adfs_file_operations = {
+const struct file_operations adfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_mmap,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0c6799f2137..a43a876742b 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -192,9 +192,9 @@ extern void   affs_dir_truncate(struct inode *);
 extern struct inode_operations	 affs_file_inode_operations;
 extern struct inode_operations	 affs_dir_inode_operations;
 extern struct inode_operations   affs_symlink_inode_operations;
-extern struct file_operations	 affs_file_operations;
-extern struct file_operations	 affs_file_operations_ofs;
-extern struct file_operations	 affs_dir_operations;
+extern const struct file_operations	 affs_file_operations;
+extern const struct file_operations	 affs_file_operations_ofs;
+extern const struct file_operations	 affs_dir_operations;
 extern struct address_space_operations	 affs_symlink_aops;
 extern struct address_space_operations	 affs_aops;
 extern struct address_space_operations	 affs_aops_ofs;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 548efd0ee98..5d9649fa181 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -17,7 +17,7 @@
 
 static int affs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations affs_dir_operations = {
+const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= affs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index f72fb776ecd..7076262af39 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -25,7 +25,7 @@ static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
 static int affs_file_open(struct inode *inode, struct file *filp);
 static int affs_file_release(struct inode *inode, struct file *filp);
 
-struct file_operations affs_file_operations = {
+const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5c61c24dab2..a6dff6a4f20 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -32,7 +32,7 @@ static int afs_d_delete(struct dentry *dentry);
 static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, ino_t ino, unsigned dtype);
 
-struct file_operations afs_dir_file_operations = {
+const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.readdir	= afs_dir_readdir,
 };
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ab8f87c6631..72febdf9a35 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -64,7 +64,7 @@ extern struct cachefs_index_def afs_cache_cell_index_def;
  * dir.c
  */
 extern struct inode_operations afs_dir_inode_operations;
-extern struct file_operations afs_dir_file_operations;
+extern const struct file_operations afs_dir_file_operations;
 
 /*
  * file.c
@@ -105,7 +105,7 @@ extern struct cachefs_netfs afs_cache_netfs;
  * mntpt.c
  */
 extern struct inode_operations afs_mntpt_inode_operations;
-extern struct file_operations afs_mntpt_file_operations;
+extern const struct file_operations afs_mntpt_file_operations;
 extern struct afs_timer afs_mntpt_expiry_timer;
 extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 31ee06590de..4e6eeb59b83 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -32,7 +32,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
 
-struct file_operations afs_mntpt_file_operations = {
+const struct file_operations afs_mntpt_file_operations = {
 	.open		= afs_mntpt_open,
 };
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9c81b8f7eef..101d21b6c03 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -37,7 +37,7 @@ static struct seq_operations afs_proc_cells_ops = {
 	.show	= afs_proc_cells_show,
 };
 
-static struct file_operations afs_proc_cells_fops = {
+static const struct file_operations afs_proc_cells_fops = {
 	.open		= afs_proc_cells_open,
 	.read		= seq_read,
 	.write		= afs_proc_cells_write,
@@ -53,7 +53,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 				       const char __user *buf,
 				       size_t size, loff_t *_pos);
 
-static struct file_operations afs_proc_rootcell_fops = {
+static const struct file_operations afs_proc_rootcell_fops = {
 	.open		= afs_proc_rootcell_open,
 	.read		= afs_proc_rootcell_read,
 	.write		= afs_proc_rootcell_write,
@@ -77,7 +77,7 @@ static struct seq_operations afs_proc_cell_volumes_ops = {
 	.show	= afs_proc_cell_volumes_show,
 };
 
-static struct file_operations afs_proc_cell_volumes_fops = {
+static const struct file_operations afs_proc_cell_volumes_fops = {
 	.open		= afs_proc_cell_volumes_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -101,7 +101,7 @@ static struct seq_operations afs_proc_cell_vlservers_ops = {
 	.show	= afs_proc_cell_vlservers_show,
 };
 
-static struct file_operations afs_proc_cell_vlservers_fops = {
+static const struct file_operations afs_proc_cell_vlservers_fops = {
 	.open		= afs_proc_cell_vlservers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -124,7 +124,7 @@ static struct seq_operations afs_proc_cell_servers_ops = {
 	.show	= afs_proc_cell_servers_show,
 };
 
-static struct file_operations afs_proc_cell_servers_fops = {
+static const struct file_operations afs_proc_cell_servers_fops = {
 	.open		= afs_proc_cell_servers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 990c28da5ae..a62327f1bdf 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -146,7 +146,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info
 
 extern struct inode_operations autofs_root_inode_operations;
 extern struct inode_operations autofs_symlink_inode_operations;
-extern struct file_operations autofs_root_operations;
+extern const struct file_operations autofs_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 870e2cf3301..9cac08d6a87 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -26,7 +26,7 @@ static int autofs_root_rmdir(struct inode *,struct dentry *);
 static int autofs_root_mkdir(struct inode *,struct dentry *,int);
 static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 
-struct file_operations autofs_root_operations = {
+const struct file_operations autofs_root_operations = {
 	.read		= generic_read_dir,
 	.readdir	= autofs_root_readdir,
 	.ioctl		= autofs_root_ioctl,
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c70204a6126..57c4903614e 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -176,8 +176,8 @@ extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
 extern struct inode_operations autofs4_indirect_root_inode_operations;
 extern struct inode_operations autofs4_direct_root_inode_operations;
-extern struct file_operations autofs4_dir_operations;
-extern struct file_operations autofs4_root_operations;
+extern const struct file_operations autofs4_dir_operations;
+extern const struct file_operations autofs4_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c8fe43a475e..84e030c8ddd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t fil
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
-struct file_operations autofs4_root_operations = {
+const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
@@ -40,7 +40,7 @@ struct file_operations autofs4_root_operations = {
 	.ioctl		= autofs4_root_ioctl,
 };
 
-struct file_operations autofs4_dir_operations = {
+const struct file_operations autofs4_dir_operations = {
 	.open		= autofs4_dir_open,
 	.release	= autofs4_dir_close,
 	.read		= generic_read_dir,
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index e172180a1d8..80599ae3396 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -22,7 +22,7 @@ static int return_EIO(void)
 
 #define EIO_ERROR ((void *) (return_EIO))
 
-static struct file_operations bad_file_ops =
+static const struct file_operations bad_file_ops =
 {
 	.llseek		= EIO_ERROR,
 	.aio_read	= EIO_ERROR,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 044a5958782..68ebd10f345 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -64,7 +64,7 @@ static const struct super_operations befs_sops = {
 /* slab cache for befs_inode_info objects */
 static kmem_cache_t *befs_inode_cachep;
 
-static struct file_operations befs_dir_operations = {
+static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= befs_readdir,
 };
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1fbc53f14ab..9d791004b21 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -49,11 +49,11 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 
 /* file.c */
 extern struct inode_operations bfs_file_inops;
-extern struct file_operations bfs_file_operations;
+extern const struct file_operations bfs_file_operations;
 extern struct address_space_operations bfs_aops;
 
 /* dir.c */
 extern struct inode_operations bfs_dir_inops;
-extern struct file_operations bfs_dir_operations;
+extern const struct file_operations bfs_dir_operations;
 
 #endif /* _FS_BFS_BFS_H */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5af928fa044..26fad962173 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -70,7 +70,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 	return 0;	
 }
 
-struct file_operations bfs_dir_operations = {
+const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 807723b65da..d83cd74a2e4 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -17,7 +17,7 @@
 #define dprintf(x...)
 #endif
 
-struct file_operations bfs_file_operations = {
+const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 6a7b730c206..d73d75591a3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -600,7 +600,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static struct file_operations bm_entry_operations = {
+static const struct file_operations bm_entry_operations = {
 	.read		= bm_entry_read,
 	.write		= bm_entry_write,
 };
@@ -668,7 +668,7 @@ out:
 	return count;
 }
 
-static struct file_operations bm_register_operations = {
+static const struct file_operations bm_register_operations = {
 	.write		= bm_register_write,
 };
 
@@ -715,7 +715,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 	return count;
 }
 
-static struct file_operations bm_status_operations = {
+static const struct file_operations bm_status_operations = {
 	.read		= bm_status_read,
 	.write		= bm_status_write,
 };
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 17c76182f38..af88c43043d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1087,7 +1087,7 @@ struct address_space_operations def_blk_aops = {
 	.direct_IO	= blkdev_direct_IO,
 };
 
-struct file_operations def_blk_fops = {
+const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b53dffa46ba..4e1b849f912 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -406,7 +406,7 @@ static void cdev_purge(struct cdev *cdev)
  * is contain the open that then fills in the correct operations
  * depending on the special file...
  */
-struct file_operations def_chr_fops = {
+const struct file_operations def_chr_fops = {
 	.open = chrdev_open,
 };
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6b99b51d669..4bbc544857b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -583,7 +583,7 @@ struct inode_operations cifs_symlink_inode_ops = {
 #endif 
 };
 
-struct file_operations cifs_file_ops = {
+const struct file_operations cifs_file_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -607,7 +607,7 @@ struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_ops = {
+const struct file_operations cifs_file_direct_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -626,7 +626,7 @@ struct file_operations cifs_file_direct_ops = {
 	.dir_notify = cifs_dir_notify,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
-struct file_operations cifs_file_nobrl_ops = {
+const struct file_operations cifs_file_nobrl_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -649,7 +649,7 @@ struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_nobrl_ops = {
+const struct file_operations cifs_file_direct_nobrl_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -668,7 +668,7 @@ struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_dir_ops = {
+const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 821a8eb2255..74f405ae4da 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -61,10 +61,10 @@ extern struct inode_operations cifs_file_inode_ops;
 extern struct inode_operations cifs_symlink_inode_ops;
 
 /* Functions related to files and directories */
-extern struct file_operations cifs_file_ops;
-extern struct file_operations cifs_file_direct_ops; /* if directio mount */
-extern struct file_operations cifs_file_nobrl_ops;
-extern struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
+extern const struct file_operations cifs_file_ops;
+extern const struct file_operations cifs_file_direct_ops; /* if directio mount */
+extern const struct file_operations cifs_file_nobrl_ops;
+extern const struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
@@ -76,7 +76,7 @@ extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, struct dentry *, int);
 extern int cifs_flush(struct file *);
 extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
-extern struct file_operations cifs_dir_ops;
+extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
 extern int cifs_dir_notify(struct file *, unsigned long arg);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 54f76de8a68..71f2ea632e5 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -82,7 +82,7 @@ struct inode_operations coda_dir_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_dir_operations = {
+const struct file_operations coda_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= coda_readdir,
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 146a991d6eb..7c2642431fa 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -288,7 +288,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	return err;
 }
 
-struct file_operations coda_file_operations = {
+const struct file_operations coda_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= coda_file_read,
 	.write		= coda_file_write,
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 127714936c6..214822be87b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -36,7 +36,7 @@ struct inode_operations coda_ioctl_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_ioctl_operations = {
+const struct file_operations coda_ioctl_operations = {
 	.owner		= THIS_MODULE,
 	.ioctl		= coda_pioctl,
 };
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 98c74fe2e13..6c6771db36d 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -342,7 +342,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 }
 
 
-static struct file_operations coda_psdev_fops = {
+static const struct file_operations coda_psdev_fops = {
 	.owner		= THIS_MODULE,
 	.read		= coda_psdev_read,
 	.write		= coda_psdev_write,
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f70e46951b3..3f4ff7a242b 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -72,9 +72,9 @@ extern void configfs_release_fs(void);
 
 extern struct rw_semaphore configfs_rename_sem;
 extern struct super_block * configfs_sb;
-extern struct file_operations configfs_dir_operations;
-extern struct file_operations configfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations configfs_dir_operations;
+extern const struct file_operations configfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations configfs_dir_inode_operations;
 extern struct inode_operations configfs_symlink_inode_operations;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ca60e3abef4..8ed9b06a982 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1027,7 +1027,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations configfs_dir_operations = {
+const struct file_operations configfs_dir_operations = {
 	.open		= configfs_dir_open,
 	.release	= configfs_dir_close,
 	.llseek		= configfs_dir_lseek,
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3921920d871..f499803743e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -322,7 +322,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations configfs_file_operations = {
+const struct file_operations configfs_file_operations = {
 	.read		= configfs_read_file,
 	.write		= configfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index acc1b2c10a8..9efcc3a164e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -29,7 +29,7 @@
 
 static struct super_operations cramfs_ops;
 static struct inode_operations cramfs_dir_inode_operations;
-static struct file_operations cramfs_directory_operations;
+static const struct file_operations cramfs_directory_operations;
 static struct address_space_operations cramfs_aops;
 
 static DEFINE_MUTEX(read_mutex);
@@ -512,7 +512,7 @@ static struct address_space_operations cramfs_aops = {
 /*
  * A directory can only readdir
  */
-static struct file_operations cramfs_directory_operations = {
+static const struct file_operations cramfs_directory_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= cramfs_readdir,
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 40c4fc973fa..66a505422e5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -39,7 +39,7 @@ static int default_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_file_operations = {
 	.read =		default_read_file,
 	.write =	default_write_file,
 	.open =		default_open,
@@ -213,7 +213,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
 	return count;
 }
 
-static struct file_operations fops_bool = {
+static const struct file_operations fops_bool = {
 	.read =		read_file_bool,
 	.write =	write_file_bool,
 	.open =		default_open,
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index b621521e09d..52f5059c4f3 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -856,14 +856,14 @@ static int devfsd_close(struct inode *inode, struct file *file);
 #ifdef CONFIG_DEVFS_DEBUG
 static ssize_t stat_read(struct file *file, char __user *buf, size_t len,
 			 loff_t * ppos);
-static struct file_operations stat_fops = {
+static const struct file_operations stat_fops = {
 	.open = nonseekable_open,
 	.read = stat_read,
 };
 #endif
 
 /*  Devfs daemon file operations  */
-static struct file_operations devfsd_fops = {
+static const struct file_operations devfsd_fops = {
 	.open = nonseekable_open,
 	.read = devfsd_read,
 	.ioctl = devfsd_ioctl,
@@ -1842,8 +1842,8 @@ static int try_modload(struct devfs_entry *parent, struct fs_info *fs_info,
 
 static struct inode_operations devfs_iops;
 static struct inode_operations devfs_dir_iops;
-static struct file_operations devfs_fops;
-static struct file_operations devfs_dir_fops;
+static const struct file_operations devfs_fops;
+static const struct file_operations devfs_dir_fops;
 static struct inode_operations devfs_symlink_iops;
 
 static int devfs_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -2061,11 +2061,11 @@ static int devfs_open(struct inode *inode, struct file *file)
 	return err;
 }				/*  End Function devfs_open  */
 
-static struct file_operations devfs_fops = {
+static const struct file_operations devfs_fops = {
 	.open = devfs_open,
 };
 
-static struct file_operations devfs_dir_fops = {
+static const struct file_operations devfs_dir_fops = {
 	.read = generic_read_dir,
 	.readdir = devfs_readdir,
 };
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 777c614ff36..17f5b2d3c16 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -10,7 +10,7 @@
 
 static int efs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations efs_dir_operations = {
+const struct file_operations efs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= efs_readdir,
 };
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e067a06c646..242fe1a66ce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -290,7 +290,7 @@ static kmem_cache_t *pwq_cache __read_mostly;
 static struct vfsmount *eventpoll_mnt __read_mostly;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+static const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0165388c425..d672aa9f406 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -658,7 +658,7 @@ not_empty:
 	return 0;
 }
 
-struct file_operations ext2_dir_operations = {
+const struct file_operations ext2_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext2_readdir,
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 11035ac7986..9f74a62be55 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -154,12 +154,12 @@ extern void ext2_write_super (struct super_block *);
  */
 
 /* dir.c */
-extern struct file_operations ext2_dir_operations;
+extern const struct file_operations ext2_dir_operations;
 
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
-extern struct file_operations ext2_file_operations;
-extern struct file_operations ext2_xip_file_operations;
+extern const struct file_operations ext2_file_operations;
+extern const struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a484412fc78..509cceca04d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,7 +39,7 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
  */
-struct file_operations ext2_file_operations = {
+const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -56,7 +56,7 @@ struct file_operations ext2_file_operations = {
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
-struct file_operations ext2_xip_file_operations = {
+const struct file_operations ext2_xip_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 38bd3f6ec14..f37528ed222 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -39,7 +39,7 @@ static int ext3_dx_readdir(struct file * filp,
 static int ext3_release_dir (struct inode * inode,
 				struct file * filp);
 
-struct file_operations ext3_dir_operations = {
+const struct file_operations ext3_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 59098ea5671..783a796220b 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -105,7 +105,7 @@ force_commit:
 	return ret;
 }
 
-struct file_operations ext3_file_operations = {
+const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4095bc149eb..698b85bb1dd 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -741,7 +741,7 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 	return ret;
 }
 
-struct file_operations fat_dir_operations = {
+const struct file_operations fat_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= fat_readdir,
 	.ioctl		= fat_dir_ioctl,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 88aa1ae13f9..1ee25232e6a 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -112,7 +112,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 	}
 }
 
-struct file_operations fat_file_operations = {
+const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fifo.c b/fs/fifo.c
index d13fcd3ec80..889f722ee36 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -145,6 +145,6 @@ err_nocleanup:
  * is contain the open that then fills in the correct operations
  * depending on the access mode of the file...
  */
-struct file_operations def_fifo_fops = {
+const struct file_operations def_fifo_fops = {
 	.open		= fifo_open,	/* will set read or write pipe_fops */
 };
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 927acf70c59..1cf1fe8466a 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void			vxfs_clear_inode(struct inode *);
 
 /* vxfs_lookup.c */
 extern struct inode_operations	vxfs_dir_inode_ops;
-extern struct file_operations	vxfs_dir_operations;
+extern const struct file_operations	vxfs_dir_operations;
 
 /* vxfs_olt.c */
 extern int			vxfs_read_olt(struct super_block *, u_long);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 554eb455722..29cce456c7c 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -56,7 +56,7 @@ struct inode_operations vxfs_dir_inode_ops = {
 	.lookup =		vxfs_lookup,
 };
 
-struct file_operations vxfs_dir_operations = {
+const struct file_operations vxfs_dir_operations = {
 	.readdir =		vxfs_readdir,
 };
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0c9a2ee54c9..23d1f52eb1b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -922,7 +922,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations fuse_dev_operations = {
+const struct file_operations fuse_dev_operations = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= fuse_dev_read,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c72a8a97935..256355b8025 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1170,7 +1170,7 @@ static struct inode_operations fuse_dir_inode_operations = {
 	.removexattr	= fuse_removexattr,
 };
 
-static struct file_operations fuse_dir_operations = {
+static const struct file_operations fuse_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= fuse_readdir,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6f05379b0a0..975f2697e86 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
-static struct file_operations fuse_direct_io_file_operations;
+static const struct file_operations fuse_direct_io_file_operations;
 
 static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 			  struct fuse_open_out *outargp)
@@ -611,7 +611,7 @@ static int fuse_set_page_dirty(struct page *page)
 	return 0;
 }
 
-static struct file_operations fuse_file_operations = {
+static const struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -623,7 +623,7 @@ static struct file_operations fuse_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
-static struct file_operations fuse_direct_io_file_operations = {
+static const struct file_operations fuse_direct_io_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= fuse_direct_read,
 	.write		= fuse_direct_write,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4a83adfec96..a16a04fcf41 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -346,7 +346,7 @@ static inline u64 get_node_id(struct inode *inode)
 }
 
 /** Device operations */
-extern struct file_operations fuse_dev_operations;
+extern const struct file_operations fuse_dev_operations;
 
 /**
  * This is the single global spinlock which protects FUSE's structures
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 534e5a7480e..7cd8cc03aea 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -313,7 +313,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return res;
 }
 
-struct file_operations hfs_dir_operations = {
+const struct file_operations hfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfs_readdir,
 	.llseek		= generic_file_llseek,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 18ce47ab1b7..3ed8663a8db 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -169,7 +169,7 @@ extern int hfs_cat_move(u32, struct inode *, struct qstr *,
 extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
 
 /* dir.c */
-extern struct file_operations hfs_dir_operations;
+extern const struct file_operations hfs_dir_operations;
 extern struct inode_operations hfs_dir_inode_operations;
 
 /* extent.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2c564701724..2d4ced22201 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -17,7 +17,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static struct file_operations hfs_file_operations;
+static const struct file_operations hfs_file_operations;
 static struct inode_operations hfs_file_inode_operations;
 
 /*================ Variable-like macros ================*/
@@ -601,7 +601,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 }
 
 
-static struct file_operations hfs_file_operations = {
+static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 01a6fe3a395..1f9ece0de32 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -483,7 +483,7 @@ struct inode_operations hfsplus_dir_inode_operations = {
 	.rename		= hfsplus_rename,
 };
 
-struct file_operations hfsplus_dir_operations = {
+const struct file_operations hfsplus_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfsplus_readdir,
 	.ioctl          = hfsplus_ioctl,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9fbe4d2aeec..acf66dba3e0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -280,7 +280,7 @@ static struct inode_operations hfsplus_file_inode_operations = {
 	.listxattr	= hfsplus_listxattr,
 };
 
-static struct file_operations hfsplus_file_operations = {
+static const struct file_operations hfsplus_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b3ad0bd0312..bf0f8e16e43 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -384,7 +384,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
 }
 
-static struct file_operations hostfs_file_fops = {
+static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.sendfile	= generic_file_sendfile,
@@ -399,7 +399,7 @@ static struct file_operations hostfs_file_fops = {
 	.fsync		= hostfs_fsync,
 };
 
-static struct file_operations hostfs_dir_fops = {
+static const struct file_operations hostfs_dir_fops = {
 	.llseek		= generic_file_llseek,
 	.readdir	= hostfs_readdir,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 5591f9623aa..ecc9180645a 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -310,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	return ERR_PTR(-ENOENT);
 }
 
-struct file_operations hpfs_dir_ops =
+const struct file_operations hpfs_dir_ops =
 {
 	.llseek		= hpfs_dir_lseek,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7c995ac4081..d3b9fffe45a 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 	return retval;
 }
 
-struct file_operations hpfs_file_ops =
+const struct file_operations hpfs_file_ops =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 4c6473ab3b3..29b7a3e5517 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -240,7 +240,7 @@ void hpfs_set_dentry_operations(struct dentry *);
 /* dir.c */
 
 struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-extern struct file_operations hpfs_dir_ops;
+extern const struct file_operations hpfs_dir_ops;
 
 /* dnode.c */
 
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 /* file.c */
 
 int hpfs_file_fsync(struct file *, struct dentry *, int);
-extern struct file_operations hpfs_file_ops;
+extern const struct file_operations hpfs_file_ops;
 extern struct inode_operations hpfs_file_iops;
 extern struct address_space_operations hpfs_aops;
 
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index a44dc589739..2ba20cdb5ba 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -558,7 +558,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	return(default_llseek(file, off, where));
 }
 
-static struct file_operations hppfs_file_fops = {
+static const struct file_operations hppfs_file_fops = {
 	.owner		= NULL,
 	.llseek		= hppfs_llseek,
 	.read		= hppfs_read,
@@ -609,7 +609,7 @@ static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return(0);
 }
 
-static struct file_operations hppfs_dir_fops = {
+static const struct file_operations hppfs_dir_fops = {
 	.owner		= NULL,
 	.readdir	= hppfs_readdir,
 	.open		= hppfs_dir_open,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 25fa8bba8cb..3a5b4e92345 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -35,7 +35,7 @@
 
 static struct super_operations hugetlbfs_ops;
 static struct address_space_operations hugetlbfs_aops;
-struct file_operations hugetlbfs_file_operations;
+const struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
 static struct inode_operations hugetlbfs_inode_operations;
 
@@ -566,7 +566,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 		inode_init_once(&ei->vfs_inode);
 }
 
-struct file_operations hugetlbfs_file_operations = {
+const struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
diff --git a/fs/inotify.c b/fs/inotify.c
index f48a3dae071..367c487c014 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -920,7 +920,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 	return ret;
 }
 
-static struct file_operations inotify_fops = {
+static const struct file_operations inotify_fops = {
 	.poll           = inotify_poll,
 	.read           = inotify_read,
 	.release        = inotify_release,
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 7901ac9f97a..5440ea292c6 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -16,7 +16,7 @@
 
 static int isofs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations isofs_dir_operations =
+const struct file_operations isofs_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= isofs_readdir,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 439a19b1bf3..b87ba066f5e 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -175,6 +175,6 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 }
 
 extern struct inode_operations isofs_dir_inode_operations;
-extern struct file_operations isofs_dir_operations;
+extern const struct file_operations isofs_dir_operations;
 extern struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 5a4519e834d..020cc097c53 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -55,9 +55,9 @@
 static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
 
 static struct super_operations jffs_ops;
-static struct file_operations jffs_file_operations;
+static const struct file_operations jffs_file_operations;
 static struct inode_operations jffs_file_inode_operations;
-static struct file_operations jffs_dir_operations;
+static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static struct address_space_operations jffs_address_operations;
 
@@ -1629,7 +1629,7 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
 }
 
 
-static struct file_operations jffs_file_operations =
+static const struct file_operations jffs_file_operations =
 {
 	.open		= generic_file_open,
 	.llseek		= generic_file_llseek,
@@ -1649,7 +1649,7 @@ static struct inode_operations jffs_file_inode_operations =
 };
 
 
-static struct file_operations jffs_dir_operations =
+static const struct file_operations jffs_dir_operations =
 {
 	.readdir	= jffs_readdir,
 };
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index a7bf9cb2567..8bc7a5018e4 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -37,7 +37,7 @@ static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
 
-struct file_operations jffs2_dir_operations =
+const struct file_operations jffs2_dir_operations =
 {
 	.read =		generic_read_dir,
 	.readdir =	jffs2_readdir,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 935f273dc57..9f4171213e5 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -38,7 +38,7 @@ int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return 0;
 }
 
-struct file_operations jffs2_file_operations =
+const struct file_operations jffs2_file_operations =
 {
 	.llseek =	generic_file_llseek,
 	.open =		generic_file_open,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 59e7a393200..d307cf54862 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -159,11 +159,11 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c);
 void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
 
 /* dir.c */
-extern struct file_operations jffs2_dir_operations;
+extern const struct file_operations jffs2_dir_operations;
 extern struct inode_operations jffs2_dir_inode_operations;
 
 /* file.c */
-extern struct file_operations jffs2_file_operations;
+extern const struct file_operations jffs2_file_operations;
 extern struct inode_operations jffs2_file_inode_operations;
 extern struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, struct dentry *, int);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index e1ac6e497e2..1c9745be5ad 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,7 +100,7 @@ struct inode_operations jfs_file_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_file_operations = {
+const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
 	.write		= generic_file_write,
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 095d471b9f9..c3007267446 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,9 +35,9 @@ extern void jfs_set_inode_flags(struct inode *);
 
 extern struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
-extern struct file_operations jfs_dir_operations;
+extern const struct file_operations jfs_dir_operations;
 extern struct inode_operations jfs_file_inode_operations;
-extern struct file_operations jfs_file_operations;
+extern const struct file_operations jfs_file_operations;
 extern struct inode_operations jfs_symlink_inode_operations;
 extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 309cee575f7..09ea03f6227 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1519,7 +1519,7 @@ struct inode_operations jfs_dir_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_dir_operations = {
+const struct file_operations jfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= jfs_readdir,
 	.fsync		= jfs_fsync,
diff --git a/fs/libfs.c b/fs/libfs.c
index 4fdeaceb892..7145ba7a48d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -179,7 +179,7 @@ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t
 	return -EISDIR;
 }
 
-struct file_operations simple_dir_operations = {
+const struct file_operations simple_dir_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.llseek		= dcache_dir_lseek,
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 732502aabc0..69224d1fe04 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -14,7 +14,7 @@ typedef struct minix_dir_entry minix_dirent;
 
 static int minix_readdir(struct file *, void *, filldir_t);
 
-struct file_operations minix_dir_operations = {
+const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
 	.fsync		= minix_sync_file,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f1d77acb3f0..420b32882a1 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -15,7 +15,7 @@
  */
 int minix_sync_file(struct file *, struct dentry *, int);
 
-struct file_operations minix_file_operations = {
+const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e42a8bb8900..c55b77cdcc8 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -81,8 +81,8 @@ extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern struct inode_operations minix_file_inode_operations;
 extern struct inode_operations minix_dir_inode_operations;
-extern struct file_operations minix_file_operations;
-extern struct file_operations minix_dir_operations;
+extern const struct file_operations minix_file_operations;
+extern const struct file_operations minix_dir_operations;
 extern struct dentry_operations minix_dentry_operations;
 
 static inline struct minix_sb_info *minix_sb(struct super_block *sb)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index cfd76f431dc..f0860c602d8 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -49,7 +49,7 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *);
 #define ncp_symlink NULL
 #endif
 		      
-struct file_operations ncp_dir_operations =
+const struct file_operations ncp_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= ncp_readdir,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index ebdad8f6398..e6b7c67cf05 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -283,7 +283,7 @@ static int ncp_release(struct inode *inode, struct file *file) {
 	return 0;
 }
 
-struct file_operations ncp_file_operations =
+const struct file_operations ncp_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= ncp_file_read,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06c48b385c9..a23f3489416 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -54,7 +54,7 @@ static int nfs_rename(struct inode *, struct dentry *,
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
-struct file_operations nfs_dir_operations = {
+const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index dee49a0cb99..f1df2c8d925 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -49,7 +49,7 @@ static int nfs_check_flags(int flags);
 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 
-struct file_operations nfs_file_operations = {
+const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c8960aff096..3ef017b3b5b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -134,7 +134,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size
 	return simple_transaction_read(file, buf, size, pos);
 }
 
-static struct file_operations transaction_ops = {
+static const struct file_operations transaction_ops = {
 	.write		= nfsctl_transaction_write,
 	.read		= nfsctl_transaction_read,
 	.release	= simple_transaction_release,
@@ -146,7 +146,7 @@ static int exports_open(struct inode *inode, struct file *file)
 	return seq_open(file, &nfs_exports_op);
 }
 
-static struct file_operations exports_operations = {
+static const struct file_operations exports_operations = {
 	.open		= exports_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1cf955bcc52..57265d56380 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -80,7 +80,7 @@ static int nfsd_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, nfsd_proc_show, NULL);
 }
 
-static struct file_operations nfsd_proc_fops = {
+static const struct file_operations nfsd_proc_fops = {
 	.owner = THIS_MODULE,
 	.open = nfsd_proc_open,
 	.read  = seq_read,
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9d9ed3fe371..d1e2c6f9f05 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1553,7 +1553,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_dir_ops = {
+const struct file_operations ntfs_dir_ops = {
 	.llseek		= generic_file_llseek,	/* Seek inside directory. */
 	.read		= generic_read_dir,	/* Return -EISDIR. */
 	.readdir	= ntfs_readdir,		/* Read directory contents. */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f5d057e4acc..c63a83e8da9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_file_ops = {
+const struct file_operations ntfs_file_ops = {
 	.llseek		= generic_file_llseek,	 /* Seek inside file. */
 	.read		= generic_file_read,	 /* Read from file. */
 	.aio_read	= generic_file_aio_read, /* Async read from file. */
@@ -2337,6 +2337,6 @@ struct inode_operations ntfs_file_inode_ops = {
 #endif /* NTFS_RW */
 };
 
-struct file_operations ntfs_empty_file_ops = {};
+const struct file_operations ntfs_empty_file_ops = {};
 
 struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 166142960b5..bf7b3d7c093 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -60,13 +60,13 @@ extern struct kmem_cache *ntfs_index_ctx_cache;
 extern struct address_space_operations ntfs_aops;
 extern struct address_space_operations ntfs_mst_aops;
 
-extern struct  file_operations ntfs_file_ops;
+extern const struct  file_operations ntfs_file_ops;
 extern struct inode_operations ntfs_file_inode_ops;
 
-extern struct  file_operations ntfs_dir_ops;
+extern const struct  file_operations ntfs_dir_ops;
 extern struct inode_operations ntfs_dir_inode_ops;
 
-extern struct  file_operations ntfs_empty_file_ops;
+extern const struct  file_operations ntfs_empty_file_ops;
 extern struct inode_operations ntfs_empty_inode_ops;
 
 extern struct export_operations ntfs_export_ops;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 84f153aca69..64cd52860c8 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2017,7 +2017,7 @@ out:
 	return ret;
 }
 
-static struct file_operations ocfs2_dlm_debug_fops = {
+static const struct file_operations ocfs2_dlm_debug_fops = {
 	.open =		ocfs2_dlm_debug_open,
 	.release =	ocfs2_dlm_debug_release,
 	.read =		seq_read,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4b4cbadd583..34e903a6a46 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ struct inode_operations ocfs2_special_file_iops = {
 	.getattr	= ocfs2_getattr,
 };
 
-struct file_operations ocfs2_fops = {
+const struct file_operations ocfs2_fops = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.sendfile	= generic_file_sendfile,
@@ -1188,7 +1188,7 @@ struct file_operations ocfs2_fops = {
 	.aio_write	= ocfs2_file_aio_write,
 };
 
-struct file_operations ocfs2_dops = {
+const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index a5ea33b2406..740c9e7ca59 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -26,8 +26,8 @@
 #ifndef OCFS2_FILE_H
 #define OCFS2_FILE_H
 
-extern struct file_operations ocfs2_fops;
-extern struct file_operations ocfs2_dops;
+extern const struct file_operations ocfs2_fops;
+extern const struct file_operations ocfs2_dops;
 extern struct inode_operations ocfs2_file_iops;
 extern struct inode_operations ocfs2_special_file_iops;
 struct ocfs2_alloc_context;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index aeb0106890e..0f14276a2e5 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -581,17 +581,17 @@ int property_release (struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file_operations openpromfs_prop_ops = {
+static const struct file_operations openpromfs_prop_ops = {
 	.read		= property_read,
 	.write		= property_write,
 	.release	= property_release,
 };
 
-static struct file_operations openpromfs_nodenum_ops = {
+static const struct file_operations openpromfs_nodenum_ops = {
 	.read		= nodenum_read,
 };
 
-static struct file_operations openprom_operations = {
+static const struct file_operations openprom_operations = {
 	.read		= generic_read_dir,
 	.readdir	= openpromfs_readdir,
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index 4384c929094..e2f4f1d9ffc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -568,7 +568,7 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
  */
-struct file_operations read_fifo_fops = {
+const struct file_operations read_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
@@ -580,7 +580,7 @@ struct file_operations read_fifo_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-struct file_operations write_fifo_fops = {
+const struct file_operations write_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= pipe_write,
@@ -592,7 +592,7 @@ struct file_operations write_fifo_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-struct file_operations rdwr_fifo_fops = {
+const struct file_operations rdwr_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index adc2cd95169..17f6e8fa139 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -31,7 +31,7 @@ static int open_kcore(struct inode * inode, struct file * filp)
 
 static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
 
-struct file_operations proc_kcore_operations = {
+const struct file_operations proc_kcore_operations = {
 	.read		= read_kcore,
 	.open		= open_kcore,
 };
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 10d37bf2520..ff3b90b56e9 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -47,7 +47,7 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 }
 
 
-struct file_operations proc_kmsg_operations = {
+const struct file_operations proc_kmsg_operations = {
 	.read		= kmsg_read,
 	.poll		= kmsg_poll,
 	.open		= kmsg_open,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 4063fb32f78..7efa73d44c9 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -172,7 +172,7 @@ static int open_vmcore(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations proc_vmcore_operations = {
+const struct file_operations proc_vmcore_operations = {
 	.read		= read_vmcore,
 	.open		= open_vmcore,
 };
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7a8f5595c26..9031948fefd 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -81,7 +81,7 @@ out:
 	return 0;
 }
 
-struct file_operations qnx4_dir_operations =
+const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index c33963fded9..62af4b1348b 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULL's here: the current defaults are ok for
  * the qnx4 filesystem.
  */
-struct file_operations qnx4_file_operations =
+const struct file_operations qnx4_file_operations =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ada2095b9a..00a933eb820 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -32,7 +32,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write	= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.read		= generic_file_read,
 	.write		= generic_file_write,
 	.mmap		= generic_file_mmap,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b1ca234068f..f443a84b98a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -33,7 +33,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write		= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.mmap			= ramfs_nommu_mmap,
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
 	.read			= generic_file_read,
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 272c8a7120b..313237631b4 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,5 @@
 
 
 extern struct address_space_operations ramfs_aops;
-extern struct file_operations ramfs_file_operations;
+extern const struct file_operations ramfs_file_operations;
 extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 34b1bf259ef..6256ca81a71 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -19,7 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-struct file_operations generic_ro_fops = {
+const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_readonly_mmap,
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d71ac657928..973c819f803 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -18,7 +18,7 @@ static int reiserfs_readdir(struct file *, void *, filldir_t);
 static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 			      int datasync);
 
-struct file_operations reiserfs_dir_operations = {
+const struct file_operations reiserfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = reiserfs_readdir,
 	.fsync = reiserfs_dir_fsync,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index d0c1e865963..010094d14da 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1566,7 +1566,7 @@ static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
 	return generic_file_aio_write(iocb, buf, count, pos);
 }
 
-struct file_operations reiserfs_file_operations = {
+const struct file_operations reiserfs_file_operations = {
 	.read = generic_file_read,
 	.write = reiserfs_file_write,
 	.ioctl = reiserfs_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index ef6caed9336..731688e1cfe 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -470,7 +470,7 @@ static int r_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static struct file_operations r_file_operations = {
+static const struct file_operations r_file_operations = {
 	.open = r_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c2fc424d7d5..9b9eda7b335 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -463,7 +463,7 @@ static struct address_space_operations romfs_aops = {
 	.readpage = romfs_readpage
 };
 
-static struct file_operations romfs_dir_operations = {
+static const struct file_operations romfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= romfs_readdir,
 };
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 0424d06b147..34c7a11d91f 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -34,7 +34,7 @@ static int smb_rename(struct inode *, struct dentry *,
 static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
 static int smb_link(struct dentry *, struct inode *, struct dentry *);
 
-struct file_operations smb_dir_operations =
+const struct file_operations smb_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= smb_readdir,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 7042e62726a..c56bd99a970 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -401,7 +401,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return error;
 }
 
-struct file_operations smb_file_operations =
+const struct file_operations smb_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= smb_file_read,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index e866ec8660d..47664597e6b 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -35,7 +35,7 @@ extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const
 extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
 extern void smb_install_null_ops(struct smb_ops *ops);
 /* dir.c */
-extern struct file_operations smb_dir_operations;
+extern const struct file_operations smb_dir_operations;
 extern struct inode_operations smb_dir_inode_operations;
 extern struct inode_operations smb_dir_inode_operations_unix;
 extern void smb_new_dentry(struct dentry *dentry);
@@ -64,7 +64,7 @@ extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
 /* file.c */
 extern struct address_space_operations smb_file_aops;
-extern struct file_operations smb_file_operations;
+extern const struct file_operations smb_file_operations;
 extern struct inode_operations smb_file_inode_operations;
 /* ioctl.c */
 extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 78899eeab97..c16a93c353c 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -163,7 +163,7 @@ static int release(struct inode * inode, struct file * file)
 	return 0;
 }
 
-struct file_operations bin_fops = {
+const struct file_operations bin_fops = {
 	.read		= read,
 	.write		= write,
 	.mmap		= mmap,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 9ee95686444..f26880a4785 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -503,7 +503,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations sysfs_dir_operations = {
+const struct file_operations sysfs_dir_operations = {
 	.open		= sysfs_dir_open,
 	.release	= sysfs_dir_close,
 	.llseek		= sysfs_dir_lseek,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5e83e724678..830f76fa098 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -348,7 +348,7 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations sysfs_file_operations = {
+const struct file_operations sysfs_file_operations = {
 	.read		= sysfs_read_file,
 	.write		= sysfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index cf11d5b789d..32958a7c50e 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -21,9 +21,9 @@ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
 extern struct super_block * sysfs_sb;
-extern struct file_operations sysfs_dir_operations;
-extern struct file_operations sysfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations sysfs_dir_operations;
+extern const struct file_operations sysfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations sysfs_dir_inode_operations;
 extern struct inode_operations sysfs_symlink_inode_operations;
 
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index cce8b05cba5..8c66e9270dd 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -20,7 +20,7 @@
 
 static int sysv_readdir(struct file *, void *, filldir_t);
 
-struct file_operations sysv_dir_operations = {
+const struct file_operations sysv_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysv_readdir,
 	.fsync		= sysv_sync_file,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index da69abc0624..a59e303135f 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULLs here: the current defaults are OK for
  * the coh filesystem.
  */
-struct file_operations sysv_file_operations = {
+const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index b7f9b4a42aa..393a480e4de 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -159,8 +159,8 @@ extern ino_t sysv_inode_by_name(struct dentry *);
 extern struct inode_operations sysv_file_inode_operations;
 extern struct inode_operations sysv_dir_inode_operations;
 extern struct inode_operations sysv_fast_symlink_inode_operations;
-extern struct file_operations sysv_file_operations;
-extern struct file_operations sysv_dir_operations;
+extern const struct file_operations sysv_file_operations;
+extern const struct file_operations sysv_dir_operations;
 extern struct address_space_operations sysv_aops;
 extern struct super_operations sysv_sops;
 extern struct dentry_operations sysv_dentry_operations;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f5222527fe3..8c28efa3b8f 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -42,7 +42,7 @@ static int do_udf_readdir(struct inode *, struct file *, filldir_t, void *);
 
 /* readdir and lookup functions */
 
-struct file_operations udf_dir_operations = {
+const struct file_operations udf_dir_operations = {
 	.read			= generic_read_dir,
 	.readdir		= udf_readdir,
 	.ioctl			= udf_ioctl,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index a6f2acc1f15..e34b00e303f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -248,7 +248,7 @@ static int udf_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations udf_file_operations = {
+const struct file_operations udf_file_operations = {
 	.read			= generic_file_read,
 	.ioctl			= udf_ioctl,
 	.open			= generic_file_open,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1d5800e0cbe..023e19ba5a2 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -44,9 +44,9 @@ struct buffer_head;
 struct super_block;
 
 extern struct inode_operations udf_dir_inode_operations;
-extern struct file_operations udf_dir_operations;
+extern const struct file_operations udf_dir_operations;
 extern struct inode_operations udf_file_inode_operations;
-extern struct file_operations udf_file_operations;
+extern const struct file_operations udf_file_operations;
 extern struct address_space_operations udf_aops;
 extern struct address_space_operations udf_adinicb_aops;
 extern struct address_space_operations udf_symlink_aops;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 7c10c68902a..1a561202d3f 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -620,7 +620,7 @@ int ufs_empty_dir (struct inode * inode)
 	return 1;
 }
 
-struct file_operations ufs_dir_operations = {
+const struct file_operations ufs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ufs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 62ad481810e..312fd3f8631 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -31,7 +31,7 @@
  * the ufs filesystem.
  */
  
-struct file_operations ufs_file_operations = {
+const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 185567a6a56..85997b1205f 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -528,7 +528,7 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-struct file_operations xfs_file_operations = {
+const struct file_operations xfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -550,7 +550,7 @@ struct file_operations xfs_file_operations = {
 #endif
 };
 
-struct file_operations xfs_invis_file_operations = {
+const struct file_operations xfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -570,7 +570,7 @@ struct file_operations xfs_invis_file_operations = {
 };
 
 
-struct file_operations xfs_dir_file_operations = {
+const struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
 	.readdir	= xfs_file_readdir,
 	.unlocked_ioctl	= xfs_file_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index a8417d7af5f..ad6173da567 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,9 +22,9 @@ extern struct inode_operations xfs_inode_operations;
 extern struct inode_operations xfs_dir_inode_operations;
 extern struct inode_operations xfs_symlink_inode_operations;
 
-extern struct file_operations xfs_file_operations;
-extern struct file_operations xfs_dir_file_operations;
-extern struct file_operations xfs_invis_file_operations;
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+extern const struct file_operations xfs_invis_file_operations;
 
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
-- 
cgit v1.2.3


From 7f927fcc2fd1575d01efb4b76665975007945690 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 28 Mar 2006 01:56:53 -0800
Subject: [PATCH] Typo fixes

Fix a lot of typos.  Eyeballed by jmc@ in OpenBSD.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/mbcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/mbcache.c b/fs/mbcache.c
index 73e754fea2d..e4fde1ab22c 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -311,7 +311,7 @@ fail:
 /*
  * mb_cache_shrink()
  *
- * Removes all cache entires of a device from the cache. All cache entries
+ * Removes all cache entries of a device from the cache. All cache entries
  * currently in use cannot be freed, and thus remain in the cache. All others
  * are freed.
  *
-- 
cgit v1.2.3


From c41564b5af328ea4600b26119f6c9c8e1eb5c28b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 08:55:14 +1000
Subject: [XFS] We really suck at spulling.  Thanks to Chris Pascoe for fixing
 all these typos.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25539a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/mrlock.h      |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c    | 14 +++++++-------
 fs/xfs/linux-2.6/xfs_export.h  |  2 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |  2 +-
 fs/xfs/linux-2.6/xfs_vfs.h     |  2 +-
 fs/xfs/quota/xfs_dquot_item.c  |  2 +-
 fs/xfs/quota/xfs_qm.c          | 14 +++++++-------
 fs/xfs/quota/xfs_qm_syscalls.c |  2 +-
 fs/xfs/quota/xfs_trans_dquot.c |  2 +-
 fs/xfs/xfs_acl.c               |  2 +-
 fs/xfs/xfs_ag.h                |  2 +-
 fs/xfs/xfs_alloc.c             |  6 +++---
 fs/xfs/xfs_alloc.h             |  2 +-
 fs/xfs/xfs_attr.c              |  6 +++---
 fs/xfs/xfs_attr_leaf.c         |  4 ++--
 fs/xfs/xfs_behavior.c          |  4 ++--
 fs/xfs/xfs_behavior.h          |  4 ++--
 fs/xfs/xfs_buf_item.c          |  4 ++--
 fs/xfs/xfs_cap.h               |  2 +-
 fs/xfs/xfs_da_btree.c          |  2 +-
 fs/xfs/xfs_dir2_block.c        |  2 +-
 fs/xfs/xfs_dir2_leaf.c         |  2 +-
 fs/xfs/xfs_dir2_node.c         |  2 +-
 fs/xfs/xfs_dir_leaf.c          |  2 +-
 fs/xfs/xfs_fsops.c             |  2 +-
 fs/xfs/xfs_ialloc.c            |  2 +-
 fs/xfs/xfs_iget.c              |  2 +-
 fs/xfs/xfs_inode.c             | 14 +++++++-------
 fs/xfs/xfs_inode_item.c        |  2 +-
 fs/xfs/xfs_itable.c            |  4 ++--
 fs/xfs/xfs_itable.h            |  2 +-
 fs/xfs/xfs_log.c               | 22 +++++++++++-----------
 fs/xfs/xfs_log.h               |  2 +-
 fs/xfs/xfs_log_recover.c       |  4 ++--
 fs/xfs/xfs_mount.c             |  4 ++--
 fs/xfs/xfs_mount.h             |  2 +-
 fs/xfs/xfs_quota.h             |  4 ++--
 fs/xfs/xfs_trans.c             |  6 +++---
 fs/xfs/xfs_trans.h             |  2 +-
 fs/xfs/xfs_trans_inode.c       |  2 +-
 fs/xfs/xfs_vfsops.c            | 10 +++++-----
 fs/xfs/xfs_vnodeops.c          | 12 ++++++------
 42 files changed, 93 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 16b44c3c236..1b262b790d9 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -79,7 +79,7 @@ static inline void mrdemote(mrlock_t *mrp)
  * Debug-only routine, without some platform-specific asm code, we can
  * now only answer requests regarding whether we hold the lock for write
  * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positivies, so don't do that.
+ * Note: means !ismrlocked would give false positives, so don't do that.
  */
 static inline int ismrlocked(mrlock_t *mrp, int type)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c02f7c5b746..62b4553fb60 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -372,7 +372,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
  * assumes that all buffers on the page are started at the same time.
  *
  * The fix is two passes across the ioend list - one to start writeback on the
- * bufferheads, and then the second one submit them for I/O.
+ * buffer_heads, and then submit them for I/O on the second pass.
  */
 STATIC void
 xfs_submit_ioend(
@@ -699,7 +699,7 @@ xfs_convert_page(
 
 	/*
 	 * page_dirty is initially a count of buffers on the page before
-	 * EOF and is decrememted as we move each into a cleanable state.
+	 * EOF and is decremented as we move each into a cleanable state.
 	 *
 	 * Derivation:
 	 *
@@ -842,7 +842,7 @@ xfs_cluster_write(
  * page if possible.
  * The bh->b_state's cannot know if any of the blocks or which block for
  * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only vaild if the page itself isn't completely uptodate.  Some layers
+ * only valid if the page itself isn't completely uptodate.  Some layers
  * may clear the page dirty flag prior to calling write page, under the
  * assumption the entire page will be written out; by not writing out the
  * whole page the page can be reused before all valid dirty data is
@@ -892,7 +892,7 @@ xfs_page_state_convert(
 
 	/*
 	 * page_dirty is initially a count of buffers on the page before
-	 * EOF and is decrememted as we move each into a cleanable state.
+	 * EOF and is decremented as we move each into a cleanable state.
 	 *
 	 * Derivation:
 	 *
@@ -1339,9 +1339,9 @@ xfs_end_io_direct(
 	/*
 	 * Non-NULL private data means we need to issue a transaction to
 	 * convert a range from unwritten to written extents.  This needs
-	 * to happen from process contect but aio+dio I/O completion
+	 * to happen from process context but aio+dio I/O completion
 	 * happens from irq context so we need to defer it to a workqueue.
-	 * This is not nessecary for synchronous direct I/O, but we do
+	 * This is not necessary for synchronous direct I/O, but we do
 	 * it anyway to keep the code uniform and simpler.
 	 *
 	 * The core direct I/O code might be changed to always call the
@@ -1358,7 +1358,7 @@ xfs_end_io_direct(
 	}
 
 	/*
-	 * blockdev_direct_IO can return an error even afer the I/O
+	 * blockdev_direct_IO can return an error even after the I/O
 	 * completion handler was called.  Thus we need to protect
 	 * against double-freeing.
 	 */
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index e5b0559700a..e794ca4efc7 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -54,7 +54,7 @@
  * Note, the NFS filehandle also includes an fsid portion which
  * may have an inode number in it.  That number is hardcoded to
  * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesytem with 64bit
+ * practice this means when exporting an XFS filesystem with 64bit
  * inodes you should either export the mountpoint (rather than
  * a subdirectory) or use the "fsid" export option.
  */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 0169360475c..84ddf189389 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -681,7 +681,7 @@ start:
 		eventsent = 1;
 
 		/*
-		 * The iolock was dropped and reaquired in XFS_SEND_DATA
+		 * The iolock was dropped and reacquired in XFS_SEND_DATA
 		 * so we have to recheck the size when appending.
 		 * We will only "goto start;" once, since having sent the
 		 * event prevents another call to XFS_SEND_DATA, which is
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 8fed356db05..841200c0309 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,7 +92,7 @@ typedef enum {
 #define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE		0x0100  /* quiesce fileystem for a snapshot */
+#define SYNC_QUIESCE		0x0100  /* quiesce filesystem for a snapshot */
 
 typedef int	(*vfs_mount_t)(bhv_desc_t *,
 				struct xfs_mount_args *, struct cred *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index e4e5f05b841..546f48af882 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -221,7 +221,7 @@ xfs_qm_dqunpin_wait(
  * as possible.
  *
  * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * search the buffer cache can be a time consuming thing, and AIL_LOCK is a
  * spinlock.
  */
 STATIC void
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1fb757ef3f4..73c1e5e80c0 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -289,7 +289,7 @@ xfs_qm_rele_quotafs_ref(
 
 /*
  * This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * structure and start the global quota manager (xfs_Gqm) if it hasn't done
  * so already.	Note that the superblock has not been read in yet.
  */
 void
@@ -807,7 +807,7 @@ xfs_qm_dqattach_one(
  * Given a udquot and gdquot, attach a ptr to the group dquot in the
  * udquot as a hint for future lookups. The idea sounds simple, but the
  * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
+ * already and getting rid of that gets us into lock ordering constraints.
  * The process is complicated more by the fact that the dquots may or may not
  * be locked on entry.
  */
@@ -1094,10 +1094,10 @@ xfs_qm_sync(
 			}
 			/*
 			 * If we can't grab the flush lock then if the caller
-			 * really wanted us to give this our best shot,
+			 * really wanted us to give this our best shot, so
 			 * see if we can give a push to the buffer before we wait
 			 * on the flush lock. At this point, we know that
-			 * eventhough the dquot is being flushed,
+			 * even though the dquot is being flushed,
 			 * it has (new) dirty data.
 			 */
 			xfs_qm_dqflock_pushbuf_wait(dqp);
@@ -1491,7 +1491,7 @@ xfs_qm_reset_dqcounts(
 		/*
 		 * Do a sanity check, and if needed, repair the dqblk. Don't
 		 * output any warnings because it's perfectly possible to
-		 * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
 		 */
 		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
 				      "xfs_quotacheck");
@@ -1580,7 +1580,7 @@ xfs_qm_dqiterate(
 
 	error = 0;
 	/*
-	 * This looks racey, but we can't keep an inode lock across a
+	 * This looks racy, but we can't keep an inode lock across a
 	 * trans_reserve. But, this gets called during quotacheck, and that
 	 * happens only at mount time which is single threaded.
 	 */
@@ -1824,7 +1824,7 @@ xfs_qm_dqusage_adjust(
 	 * we have to start from the beginning anyway.
 	 * Once we're done, we'll log all the dquot bufs.
 	 *
-	 * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
 	 */
 	if (XFS_IS_UQUOTA_ON(mp)) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 676884394aa..c55db463bbf 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -912,7 +912,7 @@ xfs_qm_export_dquot(
 
 	/*
 	 * Internally, we don't reset all the timers when quota enforcement
-	 * gets turned off. No need to confuse the userlevel code,
+	 * gets turned off. No need to confuse the user level code,
 	 * so return zeroes in that case.
 	 */
 	if (! XFS_IS_QUOTA_ENFORCED(mp)) {
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 3290975d31f..d8e131ec0aa 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -804,7 +804,7 @@ xfs_trans_reserve_quota_bydquots(
 	}
 
 	/*
-	 * Didnt change anything critical, so, no need to log
+	 * Didn't change anything critical, so, no need to log
 	 */
 	return (0);
 }
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4ff0f4e41c6..2539af34eb6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -395,7 +395,7 @@ xfs_acl_allow_set(
  * The access control process to determine the access permission:
  *	if uid == file owner id, use the file owner bits.
  *	if gid == file owner group id, use the file group bits.
- *	scan ACL for a maching user or group, and use matched entry
+ *	scan ACL for a matching user or group, and use matched entry
  *	permission. Use total permissions of all matching group entries,
  *	until all acl entries are exhausted. The final permission produced
  *	by matching acl entry or entries needs to be & with group permission.
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a96e2ffce0c..dc2361dd740 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -179,7 +179,7 @@ typedef struct xfs_perag
 {
 	char		pagf_init;	/* this agf's entry is initialized */
 	char		pagi_init;	/* this agi's entry is initialized */
-	char		pagf_metadata;	/* the agf is prefered to be metadata */
+	char		pagf_metadata;	/* the agf is preferred to be metadata */
 	char		pagi_inodeok;	/* The agi is ok for inodes */
 	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f4328e1e2a7..64ee07db0d5 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -511,7 +511,7 @@ STATIC void
 xfs_alloc_trace_busy(
 	char		*name,		/* function tag string */
 	char		*str,		/* additional string */
-	xfs_mount_t	*mp,		/* file system mount poing */
+	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
 	xfs_agblock_t	agbno,		/* a.g. relative block number */
 	xfs_extlen_t	len,		/* length of extent */
@@ -1843,7 +1843,7 @@ xfs_alloc_fix_freelist(
 	} else
 		agbp = NULL;
 
-	/* If this is a metadata prefered pag and we are user data
+	/* If this is a metadata preferred pag and we are user data
 	 * then try somewhere else if we are not being asked to
 	 * try harder at this point
 	 */
@@ -2458,7 +2458,7 @@ error0:
 /*
  * AG Busy list management
  * The busy list contains block ranges that have been freed but whose
- * transacations have not yet hit disk.  If any block listed in a busy
+ * transactions have not yet hit disk.  If any block listed in a busy
  * list is reused, the transaction that freed it must be forced to disk
  * before continuing to use the block.
  *
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 3546dea27b7..2d1f8928b26 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -68,7 +68,7 @@ typedef struct xfs_alloc_arg {
 	xfs_alloctype_t	otype;		/* original allocation type */
 	char		wasdel;		/* set if allocation was prev delayed */
 	char		wasfromfl;	/* set if allocation is from freelist */
-	char		isfl;		/* set if is freelist blocks - !actg */
+	char		isfl;		/* set if is freelist blocks - !acctg */
 	char		userdata;	/* set if this is user data */
 } xfs_alloc_arg_t;
 
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 093fac476bd..b6e1e02bbb2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -294,7 +294,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
 	xfs_trans_ihold(args.trans, dp);
 
 	/*
-	 * If the attribute list is non-existant or a shortform list,
+	 * If the attribute list is non-existent or a shortform list,
 	 * upgrade it to a single-leaf-block attribute list.
 	 */
 	if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
@@ -1584,7 +1584,7 @@ out:
  * Fill in the disk block numbers in the state structure for the buffers
  * that are attached to the state structure.
  * This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commit's has released these buffers.
+ * after some set of transaction commits have released these buffers.
  */
 STATIC int
 xfs_attr_fillstate(xfs_da_state_t *state)
@@ -1631,7 +1631,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 /*
  * Reattach the buffers to the state structure based on the disk block
  * numbers stored in the state structure.
- * This is done after some set of transaction commit's has released those
+ * This is done after some set of transaction commits have released those
  * buffers from our grip.
  */
 STATIC int
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 717682747bd..9462be86aa1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -524,7 +524,7 @@ xfs_attr_shortform_compare(const void *a, const void *b)
 
 /*
  * Copy out entries of shortform attribute lists for attr_list().
- * Shortform atrtribute lists are not stored in hashval sorted order.
+ * Shortform attribute lists are not stored in hashval sorted order.
  * If the output buffer is not large enough to hold them all, then we
  * we have to calculate each entries' hashvalue and sort them before
  * we can begin returning them to the user.
@@ -1541,7 +1541,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
index 9880adae393..f4fe3715a80 100644
--- a/fs/xfs/xfs_behavior.c
+++ b/fs/xfs/xfs_behavior.c
@@ -31,7 +31,7 @@
  * The behavior chain is ordered based on the 'position' number which
  * lives in the first field of the ops vector (higher numbers first).
  *
- * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Attempts to insert duplicate ops result in an EINVAL return code.
  * Otherwise, return 0 to indicate success.
  */
 int
@@ -84,7 +84,7 @@ bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
 
 /*
  * Remove a behavior descriptor from a position in a behavior chain;
- * the postition is guaranteed not to be the first position.
+ * the position is guaranteed not to be the first position.
  * Should only be called by the bhv_remove() macro.
  */
 void
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 2cd89bb5ab1..1d8ff103201 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -39,7 +39,7 @@
  * behaviors is synchronized with operations-in-progress (oip's) so that
  * the oip's always see a consistent view of the chain.
  *
- * The term "interpostion" is used to refer to the act of inserting
+ * The term "interposition" is used to refer to the act of inserting
  * a behavior such that it interposes on (i.e., is inserted in front
  * of) a particular other behavior.  A key example of this is when a
  * system implementing distributed single system image wishes to
@@ -51,7 +51,7 @@
  *
  * Behavior synchronization is logic which is necessary under certain
  * circumstances that there is no conflict between ongoing operations
- * traversing the behavior chain and those dunamically modifying the
+ * traversing the behavior chain and those dynamically modifying the
  * behavior chain.  Because behavior synchronization adds extra overhead
  * to virtual operation invocation, we want to restrict, as much as
  * we can, the requirement for this extra code, to those situations
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 07e2324152b..5fed15682dd 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -98,12 +98,12 @@ xfs_buf_item_flush_log_debug(
 }
 
 /*
- * This function is called to verify that our caller's have logged
+ * This function is called to verify that our callers have logged
  * all the bytes that they changed.
  *
  * It does this by comparing the original copy of the buffer stored in
  * the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which miscompare are set in the bli_logged
+ * and ensuring that all bytes which mismatch are set in the bli_logged
  * array of the buf log item.
  */
 STATIC void
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
index 433ec537f9b..d0035c6e951 100644
--- a/fs/xfs/xfs_cap.h
+++ b/fs/xfs/xfs_cap.h
@@ -38,7 +38,7 @@ typedef struct xfs_cap_set {
 /*
  * For Linux, we take the bitfields directly from capability.h
  * and no longer attempt to keep this attribute ondisk compatible
- * with IRIX.  Since this attribute is only set on exectuables,
+ * with IRIX.  Since this attribute is only set on executables,
  * it just doesn't make much sense to try.  We do use a different
  * named attribute though, to avoid confusion.
  */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4bae3a76c67..8988b905117 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -840,7 +840,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index bd5cee6aa51..972ded59547 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -533,7 +533,7 @@ xfs_dir2_block_getdents(
 
 	/*
 	 * Reached the end of the block.
-	 * Set the offset to a nonexistent block 1 and return.
+	 * Set the offset to a non-existent block 1 and return.
 	 */
 	*eofp = 1;
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 08648b18265..0f5e2f2ce6e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -515,7 +515,7 @@ xfs_dir2_leaf_addname(
 			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
 			       XFS_DIR2_NULL_DATAPTR);
 			/*
-			 * Copy entries down to copver the stale entry
+			 * Copy entries down to cover the stale entry
 			 * and make room for the new entry.
 			 */
 			if (highstale - index > 0)
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index af556f16a0c..ac511ab9c52 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -830,7 +830,7 @@ xfs_dir2_leafn_rebalance(
 		state->inleaf = 1;
 		blk2->index = 0;
 		cmn_err(CE_ALERT,
-			"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: "
+			"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
 			"blk1->index %d\n",
 			blk1->index);
 	}
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index ee88751c3be..6d711869262 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -1341,7 +1341,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
 	/*
 	 * Check for the degenerate case of the block being empty.
 	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
 	 * to merge with the forward block unless it is NULL.
 	 */
 	if (count == 0) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 56caa88713a..dfa3527b20a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -477,7 +477,7 @@ xfs_fs_counts(
  *
  * xfs_reserve_blocks is called to set m_resblks
  * in the in-core mount table. The number of unused reserved blocks
- * is kept in m_resbls_avail.
+ * is kept in m_resblks_avail.
  *
  * Reserve the requested number of blocks if available. Otherwise return
  * as many as possible to satisfy the request. The actual number
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0024892841a..20c39a6e6a0 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1023,7 +1023,7 @@ xfs_difree(
 	rec.ir_freecount++;
 
 	/*
-	 * When an inode cluster is free, it becomes elgible for removal
+	 * When an inode cluster is free, it becomes eligible for removal
 	 */
 	if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
 	    (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3ce35a6f700..bb33113eef9 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -509,7 +509,7 @@ retry:
 		} else {
 			/*
 			 * If the inode is not fully constructed due to
-			 * filehandle mistmatches wait for the inode to go
+			 * filehandle mismatches wait for the inode to go
 			 * away and try again.
 			 *
 			 * iget_locked will call __wait_on_freeing_inode
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 88a517fad07..48146bdc6bd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -160,7 +160,7 @@ xfs_inotobp(
 	xfs_dinode_t	*dip;
 
 	/*
-	 * Call the space managment code to find the location of the
+	 * Call the space management code to find the location of the
 	 * inode on disk.
 	 */
 	imap.im_blkno = 0;
@@ -837,7 +837,7 @@ xfs_dic2xflags(
 
 /*
  * Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode coresponding to the given
+ * to a newly allocated in-core inode corresponding to the given
  * inode number.
  *
  * Initialize the inode's attributes and extent pointers if it
@@ -2723,7 +2723,7 @@ xfs_ipin(
 /*
  * Decrement the pin count of the given inode, and wake up
  * anyone in xfs_iwait_unpin() if the count goes to 0.  The
- * inode must have been previoulsy pinned with a call to xfs_ipin().
+ * inode must have been previously pinned with a call to xfs_ipin().
  */
 void
 xfs_iunpin(
@@ -3690,7 +3690,7 @@ void
 xfs_iext_add(
 	xfs_ifork_t	*ifp,		/* inode fork pointer */
 	xfs_extnum_t	idx,		/* index to begin adding exts */
-	int		ext_diff)	/* nubmer of extents to add */
+	int		ext_diff)	/* number of extents to add */
 {
 	int		byte_diff;	/* new bytes being added */
 	int		new_size;	/* size of extents after adding */
@@ -4038,7 +4038,7 @@ xfs_iext_remove_indirect(
 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
 	xfs_extnum_t	nex1;		/* number of extents before idx */
 	xfs_extnum_t	nex2;		/* extents after idx + count */
-	int		nlists;		/* entries in indirecton array */
+	int		nlists;		/* entries in indirection array */
 	int		page_idx = idx;	/* index in target extent list */
 
 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
@@ -4291,9 +4291,9 @@ xfs_iext_bno_to_ext(
 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
 	xfs_bmbt_rec_t	*ep = NULL;	/* pointer to target extent */
 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
-	int		high;		/* upper boundry in search */
+	int		high;		/* upper boundary in search */
 	xfs_extnum_t	idx = 0;	/* index of target extent */
-	int		low;		/* lower boundry in search */
+	int		low;		/* lower boundary in search */
 	xfs_extnum_t	nextents;	/* number of file extents */
 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 36aa1fcb90a..7497a481b2f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -580,7 +580,7 @@ xfs_inode_item_unpin_remove(
  * been or is in the process of being flushed, then (ideally) we'd like to
  * see if the inode's buffer is still incore, and if so give it a nudge.
  * We delay doing so until the pushbuf routine, though, to avoid holding
- * the AIL lock across a call to the blackhole which is the buffercache.
+ * the AIL lock across a call to the blackhole which is the buffer cache.
  * Also we don't want to sleep in any device strategy routines, which can happen
  * if we do the subsequent bawrite in here.
  */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 32247b6bfee..94068d014f2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -272,7 +272,7 @@ xfs_bulkstat(
 	size_t			statstruct_size, /* sizeof struct filling */
 	char			__user *ubuffer, /* buffer with inode stats */
 	int			flags,	/* defined in xfs_itable.h */
-	int			*done)	/* 1 if there're more stats to get */
+	int			*done)	/* 1 if there are more stats to get */
 {
 	xfs_agblock_t		agbno=0;/* allocation group block number */
 	xfs_buf_t		*agbp;	/* agi header buffer */
@@ -676,7 +676,7 @@ xfs_bulkstat_single(
 	xfs_mount_t		*mp,	/* mount point for filesystem */
 	xfs_ino_t		*lastinop, /* inode to return */
 	char			__user *buffer, /* buffer with inode stats */
-	int			*done)	/* 1 if there're more stats to get */
+	int			*done)	/* 1 if there are more stats to get */
 {
 	int			count;	/* count value for bulkstat call */
 	int			error;	/* return value */
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 047d834ed21..11eb4e1b18c 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -60,7 +60,7 @@ xfs_bulkstat(
 	size_t		statstruct_size,/* sizeof struct that we're filling */
 	char		__user *ubuffer,/* buffer with inode stats */
 	int		flags,		/* flag to control access method */
-	int		*done);		/* 1 if there're more stats to get */
+	int		*done);		/* 1 if there are more stats to get */
 
 int
 xfs_bulkstat_single(
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9176995160e..32e841d2f26 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -59,7 +59,7 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
 				int		num_bblks);
 STATIC int	 xlog_space_left(xlog_t *log, int cycle, int bytes);
 STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void	 xlog_unalloc_log(xlog_t *log);
+STATIC void	 xlog_dealloc_log(xlog_t *log);
 STATIC int	 xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
 			    int nentries, xfs_log_ticket_t tic,
 			    xfs_lsn_t *start_lsn,
@@ -304,7 +304,7 @@ xfs_log_done(xfs_mount_t	*mp,
 	if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
 	    (flags & XFS_LOG_REL_PERM_RESERV)) {
 		/*
-		 * Release ticket if not permanent reservation or a specifc
+		 * Release ticket if not permanent reservation or a specific
 		 * request has been made to release a permanent reservation.
 		 */
 		xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
@@ -511,7 +511,7 @@ xfs_log_mount(xfs_mount_t	*mp,
 			vfsp->vfs_flag |= VFS_RDONLY;
 		if (error) {
 			cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
-			xlog_unalloc_log(mp->m_log);
+			xlog_dealloc_log(mp->m_log);
 			return error;
 		}
 	}
@@ -667,7 +667,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 *
 		 * Go through the motions of sync'ing and releasing
 		 * the iclog, even though no I/O will actually happen,
-		 * we need to wait for other log I/O's that may already
+		 * we need to wait for other log I/Os that may already
 		 * be in progress.  Do this as a separate section of
 		 * code so we'll know if we ever get stuck here that
 		 * we're in this odd situation of trying to unmount
@@ -704,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 void
 xfs_log_unmount_dealloc(xfs_mount_t *mp)
 {
-	xlog_unalloc_log(mp->m_log);
+	xlog_dealloc_log(mp->m_log);
 }
 
 /*
@@ -1492,7 +1492,7 @@ xlog_sync(xlog_t		*log,
 		ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
 		ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
 
-		/* account for internal log which does't start at block #0 */
+		/* account for internal log which doesn't start at block #0 */
 		XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
 		XFS_BUF_WRITE(bp);
 		if ((error = XFS_bwrite(bp))) {
@@ -1506,10 +1506,10 @@ xlog_sync(xlog_t		*log,
 
 
 /*
- * Unallocate a log structure
+ * Deallocate a log structure
  */
 void
-xlog_unalloc_log(xlog_t *log)
+xlog_dealloc_log(xlog_t *log)
 {
 	xlog_in_core_t	*iclog, *next_iclog;
 	xlog_ticket_t	*tic, *next_tic;
@@ -1539,7 +1539,7 @@ xlog_unalloc_log(xlog_t *log)
 	if ((log->l_ticket_cnt != log->l_ticket_tcnt)  &&
 	    !XLOG_FORCED_SHUTDOWN(log)) {
 		xfs_fs_cmn_err(CE_WARN, log->l_mp,
-			"xlog_unalloc_log: (cnt: %d, total: %d)",
+			"xlog_dealloc_log: (cnt: %d, total: %d)",
 			log->l_ticket_cnt, log->l_ticket_tcnt);
 		/* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
 
@@ -1562,7 +1562,7 @@ xlog_unalloc_log(xlog_t *log)
 #endif
 	log->l_mp->m_log = NULL;
 	kmem_free(log, sizeof(xlog_t));
-}	/* xlog_unalloc_log */
+}	/* xlog_dealloc_log */
 
 /*
  * Update counters atomically now that memcpy is done.
@@ -2829,7 +2829,7 @@ xlog_state_release_iclog(xlog_t		*log,
 
 	/*
 	 * We let the log lock go, so it's possible that we hit a log I/O
-	 * error or someother SHUTDOWN condition that marks the iclog
+	 * error or some other SHUTDOWN condition that marks the iclog
 	 * as XLOG_STATE_IOERROR before the bwrite. However, we know that
 	 * this iclog has consistent data, so we ignore IOERROR
 	 * flags after this point.
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 4b2ac88dbb8..eacb3d4987f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -27,7 +27,7 @@
 
 #ifdef __KERNEL__
 /*
- * By comparing each compnent, we don't have to worry about extra
+ * By comparing each component, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
 static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index add13f507ed..1f0016b0b4e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -583,7 +583,7 @@ xlog_find_head(
 		 *        x | x ... | x - 1 | x
 		 * Another case that fits this picture would be
 		 *        x | x + 1 | x ... | x
-		 * In this case the head really is somwhere at the end of the
+		 * In this case the head really is somewhere at the end of the
 		 * log, as one of the latest writes at the beginning was
 		 * incomplete.
 		 * One more case is
@@ -2799,7 +2799,7 @@ xlog_recover_do_trans(
 		 * we don't need to worry about the block number being
 		 * truncated in > 1 TB buffers because in user-land,
 		 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
-		 * the blkno's will get through the user-mode buffer
+		 * the blknos will get through the user-mode buffer
 		 * cache properly.  The only bad case is o32 kernels
 		 * where xfs_daddr_t is 32-bits but mount will warn us
 		 * off a > 1 TB filesystem before we get here.
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 20e8abc16d1..72e7e78bfff 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -393,7 +393,7 @@ xfs_initialize_perag(
 				break;
 			}
 
-			/* This ag is prefered for inodes */
+			/* This ag is preferred for inodes */
 			pag = &mp->m_perag[index];
 			pag->pagi_inodeok = 1;
 			if (index < max_metadata)
@@ -1728,7 +1728,7 @@ xfs_mount_log_sbunit(
  * We cannot use the hotcpu_register() function because it does
  * not allow notifier instances. We need a notifier per filesystem
  * as we need to be able to identify the filesystem to balance
- * the counters out. This is acheived by having a notifier block
+ * the counters out. This is achieved by having a notifier block
  * embedded in the xfs_mount_t and doing pointer magic to get the
  * mount pointer from the notifier block address.
  */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index ebd73960e9d..66cbee79864 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -379,7 +379,7 @@ typedef struct xfs_mount {
 #endif
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
-	int			m_sinoalign;	/* stripe unit inode alignmnt */
+	int			m_sinoalign;	/* stripe unit inode alignment */
 	int			m_attr_magicpct;/* 37% of the blocksize */
 	int			m_dir_magicpct;	/* 37% of the dir blocksize */
 	__uint8_t		m_mk_sharedro;	/* mark shared ro on unmount */
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 82a08baf437..4f6a034de7f 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -31,7 +31,7 @@
 typedef __uint32_t	xfs_dqid_t;
 
 /*
- * Eventhough users may not have quota limits occupying all 64-bits,
+ * Even though users may not have quota limits occupying all 64-bits,
  * they may need 64-bit accounting. Hence, 64-bit quota-counters,
  * and quota-limits. This is a waste in the common case, but hey ...
  */
@@ -246,7 +246,7 @@ typedef struct xfs_qoff_logformat {
 #ifdef __KERNEL__
 /*
  * This check is done typically without holding the inode lock;
- * that may seem racey, but it is harmless in the context that it is used.
+ * that may seem racy, but it is harmless in the context that it is used.
  * The inode cannot go inactive as long a reference is kept, and
  * therefore if dquot(s) were attached, they'll stay consistent.
  * If, for example, the ownership of the inode changes while
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 2918956553a..8d056cef5d1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -490,7 +490,7 @@ xfs_trans_mod_sb(
 	case XFS_TRANS_SB_RES_FREXTENTS:
 		/*
 		 * The allocation has already been applied to the
-		 * in-core superblocks's counter.  This should only
+		 * in-core superblock's counter.  This should only
 		 * be applied to the on-disk superblock.
 		 */
 		ASSERT(delta < 0);
@@ -611,7 +611,7 @@ xfs_trans_apply_sb_deltas(
 
 	if (whole)
 		/*
-		 * Log the whole thing, the fields are discontiguous.
+		 * Log the whole thing, the fields are noncontiguous.
 		 */
 		xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_sb_t) - 1);
 	else
@@ -669,7 +669,7 @@ xfs_trans_unreserve_and_mod_sb(
 	/*
 	 * Apply any superblock modifications to the in-core version.
 	 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
-	 * explicity NOT applied to the in-core superblock.
+	 * explicitly NOT applied to the in-core superblock.
 	 * The idea is that that has already been done.
 	 */
 	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e48befa4e33..100d9a4b38e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -354,7 +354,7 @@ typedef struct xfs_trans {
 	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
 						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
-	struct xfs_dquot_acct   *t_dqinfo;	/* accting info for dquots */
+	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
 	xfs_trans_callback_t	t_callback;	/* transaction callback */
 	void			*t_callarg;	/* callback arg */
 	unsigned int		t_flags;	/* misc flags */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index e341409172d..7c5894d59f8 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -272,7 +272,7 @@ xfs_trans_log_inode(
 	 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
 	 * routines in the eventual clearing of the ilf_fields bits.
 	 * See the big comment in xfs_iflush() for an explanation of
-	 * this coorination mechanism.
+	 * this coordination mechanism.
 	 */
 	flags |= ip->i_itemp->ili_last_fields;
 	ip->i_itemp->ili_format.ilf_fields |= flags;
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index d4ec4dfaf19..504d2a80747 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -880,10 +880,10 @@ xfs_statvfs(
  *		       determine if they should be flushed sync, async, or
  *		       delwri.
  *      SYNC_CLOSE   - This flag is passed when the system is being
- *		       unmounted.  We should sync and invalidate everthing.
+ *		       unmounted.  We should sync and invalidate everything.
  *      SYNC_FSDATA  - This indicates that the caller would like to make
  *		       sure the superblock is safe on disk.  We can ensure
- *		       this by simply makeing sure the log gets flushed
+ *		       this by simply making sure the log gets flushed
  *		       if SYNC_BDFLUSH is set, and by actually writing it
  *		       out otherwise.
  *
@@ -908,7 +908,7 @@ xfs_sync(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
@@ -1090,7 +1090,7 @@ xfs_sync_inodes(
 		 * If this is just vfs_sync() or pflushd() calling
 		 * then we can skip inodes for which it looks like
 		 * there is nothing to do.  Since we don't have the
-		 * inode locked this is racey, but these are periodic
+		 * inode locked this is racy, but these are periodic
 		 * calls so it doesn't matter.  For the others we want
 		 * to know for sure, so we at least try to lock them.
 		 */
@@ -1429,7 +1429,7 @@ xfs_sync_inodes(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 0f0a64e81db..de49601919c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -848,7 +848,7 @@ xfs_setattr(
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 * This is slightly sub-optimal in that truncates require
-	 * two sync transactions instead of one for wsync filesytems.
+	 * two sync transactions instead of one for wsync filesystems.
 	 * One for the truncate and one for the timestamps since we
 	 * don't want to change the timestamps unless we're sure the
 	 * truncate worked.  Truncates are less than 1% of the laddis
@@ -1170,7 +1170,7 @@ xfs_fsync(
 
 		/*
 		 * If this inode is on the RT dev we need to flush that
-		 * cache aswell.
+		 * cache as well.
 		 */
 		if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
 			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
@@ -1380,7 +1380,7 @@ xfs_inactive_symlink_rmt(
 	 */
 	ntp = xfs_trans_dup(tp);
 	/*
-	 * Commit the transaction containing extent freeing and EFD's.
+	 * Commit the transaction containing extent freeing and EFDs.
 	 * If we get an error on the commit here or on the reserve below,
 	 * we need to unlock the inode since the new transaction doesn't
 	 * have the inode attached.
@@ -2023,7 +2023,7 @@ xfs_create(
 	XFS_QM_DQRELE(mp, gdqp);
 
 	/*
-	 * Propogate the fact that the vnode changed after the
+	 * Propagate the fact that the vnode changed after the
 	 * xfs_inode locks have been released.
 	 */
 	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
@@ -2370,7 +2370,7 @@ xfs_remove(
 	 * for a log reservation. Since we'll have to wait for the
 	 * inactive code to complete before returning from xfs_iget,
 	 * we need to make sure that we don't have log space reserved
-	 * when we call xfs_iget.  Instead we get an unlocked referece
+	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
 	error = xfs_get_dir_entry(dentry, &ip);
@@ -3020,7 +3020,7 @@ xfs_rmdir(
 	 * for a log reservation.  Since we'll have to wait for the
 	 * inactive code to complete before returning from xfs_iget,
 	 * we need to make sure that we don't have log space reserved
-	 * when we call xfs_iget.  Instead we get an unlocked referece
+	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
 	error = xfs_get_dir_entry(dentry, &cdp);
-- 
cgit v1.2.3


From e0edd5962bd83d319aaa50b39580dc30299a7fe3 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 08:55:47 +1000
Subject: [XFS] Fix compiler warning and small code inconsistencies in compat
 ioctl32 land.

SGI-PV: 904196
SGI-Modid: xfs-linux-melb:xfs-kern:25590a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b6321abd9a8..251bfe451a3 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -72,7 +72,7 @@ xfs_ioctl32_flock(
 	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
 	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
 		return -EFAULT;
-	
+
 	return (unsigned long)p;
 }
 
@@ -107,11 +107,15 @@ xfs_ioctl32_bulkstat(
 #endif
 
 STATIC long
-xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(
+	int		mode,
+	struct file	*file,
+	unsigned	cmd,
+	unsigned long	arg)
 {
+	struct inode	*inode = file->f_dentry->d_inode;
+	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
-	struct		inode *inode = f->f_dentry->d_inode;
-	vnode_t		*vp = vn_to_inode(inode);
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
@@ -189,7 +193,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 
-	VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+	VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error);
 	VMODIFY(vp);
 
 	return error;
@@ -197,18 +201,18 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 
 long
 xfs_file_compat_ioctl(
-	struct file		*f,
+	struct file		*file,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return xfs_compat_ioctl(0, f, cmd, arg);
+	return xfs_compat_ioctl(0, file, cmd, arg);
 }
 
 long
 xfs_file_compat_invis_ioctl(
-	struct file		*f,
+	struct file		*file,
 	unsigned		cmd,
 	unsigned long		arg)
 {
-	return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+	return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
 }
-- 
cgit v1.2.3


From 3c674e74238cb2484169e3f84f687c66887086b6 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 09:26:15 +1000
Subject: Fixes a regression from the recent "remove ->get_blocks() support"
 change.  inode->i_blkbits should be used when making a get_block_t request of
 a filesystem instead of dio->blkbits, as that does not indicate the
 filesystem block size all the time (depends on request alignment - see start
 of __blockdev_direct_IO).

Signed-off-by: Nathan Scott <nathans@sgi.com>
Acked-by: Badari Pulavarty <pbadari@us.ibm.com>
---
 fs/direct-io.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9d1d2aa73e4..910a8ed74b5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -524,8 +524,6 @@ static int get_more_blocks(struct dio *dio)
 	 */
 	ret = dio->page_errors;
 	if (ret == 0) {
-		map_bh->b_state = 0;
-		map_bh->b_size = 0;
 		BUG_ON(dio->block_in_file >= dio->final_block_in_request);
 		fs_startblk = dio->block_in_file >> dio->blkfactor;
 		dio_count = dio->final_block_in_request - dio->block_in_file;
@@ -534,6 +532,9 @@ static int get_more_blocks(struct dio *dio)
 		if (dio_count & blkmask)	
 			fs_count++;
 
+		map_bh->b_state = 0;
+		map_bh->b_size = fs_count << dio->inode->i_blkbits;
+
 		create = dio->rw == WRITE;
 		if (dio->lock_type == DIO_LOCKING) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
@@ -542,13 +543,13 @@ static int get_more_blocks(struct dio *dio)
 		} else if (dio->lock_type == DIO_NO_LOCKING) {
 			create = 0;
 		}
+
 		/*
 		 * For writes inside i_size we forbid block creations: only
 		 * overwrites are permitted.  We fall back to buffered writes
 		 * at a higher level for inside-i_size block-instantiating
 		 * writes.
 		 */
-		map_bh->b_size = fs_count << dio->blkbits;
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
-- 
cgit v1.2.3


From 3ccb8b5f650e80b7cc7ef76289348472e026b6ac Mon Sep 17 00:00:00 2001
From: Glen Overby <overby@sgi.com>
Date: Wed, 29 Mar 2006 09:52:28 +1000
Subject: [XFS] A change to inode chunk allocation to try allocating the new
 chunk contiguous with the most recently allocated chunk.  On a striped
 filesystem, this will fill a stripe unit with inodes before allocating new
 inodes in another stripe unit.

SGI-PV: 951416
SGI-Modid: xfs-linux-melb:xfs-kern:208488a

Signed-off-by: Glen Overby <overby@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 108 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 20c39a6e6a0..4eeb856183b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -136,7 +136,7 @@ xfs_ialloc_ag_alloc(
 	int		ninodes;	/* num inodes per buf */
 	xfs_agino_t	thisino;	/* current inode number, for loop */
 	int		version;	/* inode version number to use */
-	int		isaligned;	/* inode allocation at stripe unit */
+	int		isaligned = 0;	/* inode allocation at stripe unit */
 					/* boundary */
 
 	args.tp = tp;
@@ -152,47 +152,75 @@ xfs_ialloc_ag_alloc(
 		return XFS_ERROR(ENOSPC);
 	args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
 	/*
-	 * Set the alignment for the allocation.
-	 * If stripe alignment is turned on then align at stripe unit
-	 * boundary.
-	 * If the cluster size is smaller than a filesystem block
-	 * then we're doing I/O for inodes in filesystem block size pieces,
-	 * so don't need alignment anyway.
-	 */
-	isaligned = 0;
-	if (args.mp->m_sinoalign) {
-		ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
-		args.alignment = args.mp->m_dalign;
-		isaligned = 1;
-	} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
-	    args.mp->m_sb.sb_inoalignmt >=
-	    XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-		args.alignment = args.mp->m_sb.sb_inoalignmt;
-	else
-		args.alignment = 1;
+	 * First try to allocate inodes contiguous with the last-allocated
+	 * chunk of inodes.  If the filesystem is striped, this will fill
+	 * an entire stripe unit with inodes.
+ 	 */
 	agi = XFS_BUF_TO_AGI(agbp);
-	/*
-	 * Need to figure out where to allocate the inode blocks.
-	 * Ideally they should be spaced out through the a.g.
-	 * For now, just allocate blocks up front.
-	 */
-	args.agbno = be32_to_cpu(agi->agi_root);
-	args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
-				    args.agbno);
-	/*
-	 * Allocate a fixed-size extent of inodes.
-	 */
-	args.type = XFS_ALLOCTYPE_NEAR_BNO;
-	args.mod = args.total = args.wasdel = args.isfl = args.userdata =
-		args.minalignslop = 0;
-	args.prod = 1;
-	/*
-	 * Allow space for the inode btree to split.
-	 */
-	args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
-	if ((error = xfs_alloc_vextent(&args)))
-		return error;
+	newino = be32_to_cpu(agi->agi_newino);
+	if(likely(newino != NULLAGINO)) {
+		args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+				XFS_IALLOC_BLOCKS(args.mp);
+		args.fsbno = XFS_AGB_TO_FSB(args.mp,
+				be32_to_cpu(agi->agi_seqno), args.agbno);
+		args.type = XFS_ALLOCTYPE_THIS_BNO;
+		args.mod = args.total = args.wasdel = args.isfl =
+			args.userdata = args.minalignslop = 0;
+		args.prod = 1;
+		args.alignment = 1;
+		/*
+		 * Allow space for the inode btree to split.
+		 */
+		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	} else
+		args.fsbno = NULLFSBLOCK;
 
+	if (unlikely(args.fsbno == NULLFSBLOCK)) {
+		/*
+		 * Set the alignment for the allocation.
+		 * If stripe alignment is turned on then align at stripe unit
+		 * boundary.
+		 * If the cluster size is smaller than a filesystem block 
+		 * then we're doing I/O for inodes in filesystem block size 
+		 * pieces, so don't need alignment anyway.
+		 */
+		isaligned = 0;
+		if (args.mp->m_sinoalign) {
+			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+			args.alignment = args.mp->m_dalign;
+			isaligned = 1;
+		} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+			   args.mp->m_sb.sb_inoalignmt >= 
+			   XFS_B_TO_FSBT(args.mp,
+			  	XFS_INODE_CLUSTER_SIZE(args.mp)))
+				args.alignment = args.mp->m_sb.sb_inoalignmt;
+		else
+			args.alignment = 1;
+		/*
+		 * Need to figure out where to allocate the inode blocks.
+		 * Ideally they should be spaced out through the a.g.
+		 * For now, just allocate blocks up front.
+		 */
+		args.agbno = be32_to_cpu(agi->agi_root);
+		args.fsbno = XFS_AGB_TO_FSB(args.mp,
+				be32_to_cpu(agi->agi_seqno), args.agbno);
+		/*
+		 * Allocate a fixed-size extent of inodes.
+		 */
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.mod = args.total = args.wasdel = args.isfl =
+			args.userdata = args.minalignslop = 0;
+		args.prod = 1;
+		/*
+		 * Allow space for the inode btree to split.
+		 */
+		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+		if ((error = xfs_alloc_vextent(&args)))
+			return error;
+	}
+ 
 	/*
 	 * If stripe alignment is turned on, then try again with cluster
 	 * alignment.
-- 
cgit v1.2.3


From 0b7e56a450a4800c5f48f3a345a5a7de2f38041c Mon Sep 17 00:00:00 2001
From: Mandy Kirkconnell <alkirkco@sgi.com>
Date: Wed, 29 Mar 2006 09:53:03 +1000
Subject: [XFS] Remove unused/obsoleted function: xfs_bmap_do_search_extents()

SGI-PV: 951415
SGI-Modid: xfs-linux-melb:xfs-kern:208490a

Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_bmap.c | 107 ------------------------------------------------------
 fs/xfs/xfs_bmap.h |   8 ----
 2 files changed, 115 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 2d702e4a74a..d384e489705 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3467,113 +3467,6 @@ done:
 	return error;
 }
 
-xfs_bmbt_rec_t *			/* pointer to found extent entry */
-xfs_bmap_do_search_extents(
-	xfs_bmbt_rec_t	*base,		/* base of extent list */
-	xfs_extnum_t	lastx,		/* last extent index used */
-	xfs_extnum_t	nextents,	/* number of file extents */
-	xfs_fileoff_t	bno,		/* block number searched for */
-	int		*eofp,		/* out: end of file found */
-	xfs_extnum_t	*lastxp,	/* out: last extent index */
-	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
-	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
-{
-	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
-	xfs_bmbt_irec_t	got;		/* extent list entry, decoded */
-	int		high;		/* high index of binary search */
-	int		low;		/* low index of binary search */
-
-	/*
-	 * Initialize the extent entry structure to catch access to
-	 * uninitialized br_startblock field.
-	 */
-	got.br_startoff = 0xffa5a5a5a5a5a5a5LL;
-	got.br_blockcount = 0xa55a5a5a5a5a5a5aLL;
-	got.br_state = XFS_EXT_INVALID;
-
-#if XFS_BIG_BLKNOS
-	got.br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
-	got.br_startblock = 0xffffa5a5;
-#endif
-
-	if (lastx != NULLEXTNUM && lastx < nextents)
-		ep = base + lastx;
-	else
-		ep = NULL;
-	prevp->br_startoff = NULLFILEOFF;
-	if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
-	    bno < got.br_startoff +
-		  (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
-		*eofp = 0;
-	else if (ep && lastx < nextents - 1 &&
-		 bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
-		 bno < got.br_startoff +
-		       (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
-		lastx++;
-		ep++;
-		*eofp = 0;
-	} else if (nextents == 0)
-		*eofp = 1;
-	else if (bno == 0 &&
-		 (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
-		ep = base;
-		lastx = 0;
-		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-		*eofp = 0;
-	} else {
-		low = 0;
-		high = nextents - 1;
-		/* binary search the extents array */
-		while (low <= high) {
-			XFS_STATS_INC(xs_cmp_exlist);
-			lastx = (low + high) >> 1;
-			ep = base + lastx;
-			got.br_startoff = xfs_bmbt_get_startoff(ep);
-			got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-			if (bno < got.br_startoff)
-				high = lastx - 1;
-			else if (bno >= got.br_startoff + got.br_blockcount)
-				low = lastx + 1;
-			else {
-				got.br_startblock = xfs_bmbt_get_startblock(ep);
-				got.br_state = xfs_bmbt_get_state(ep);
-				*eofp = 0;
-				*lastxp = lastx;
-				*gotp = got;
-				return ep;
-			}
-		}
-		if (bno >= got.br_startoff + got.br_blockcount) {
-			lastx++;
-			if (lastx == nextents) {
-				*eofp = 1;
-				got.br_startblock = xfs_bmbt_get_startblock(ep);
-				got.br_state = xfs_bmbt_get_state(ep);
-				*prevp = got;
-				ep = NULL;
-			} else {
-				*eofp = 0;
-				xfs_bmbt_get_all(ep, prevp);
-				ep++;
-				got.br_startoff = xfs_bmbt_get_startoff(ep);
-				got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-			}
-		} else {
-			*eofp = 0;
-			if (ep > base)
-				xfs_bmbt_get_all(ep - 1, prevp);
-		}
-	}
-	if (ep) {
-		got.br_startblock = xfs_bmbt_get_startblock(ep);
-		got.br_state = xfs_bmbt_get_state(ep);
-	}
-	*lastxp = lastx;
-	*gotp = got;
-	return ep;
-}
-
 /*
  * Search the extent records for the entry containing block bno.
  * If bno lies in a hole, point to the next entry.  If bno lies
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 011ccaa9a1c..f83399c89ce 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -362,14 +362,6 @@ xfs_bmbt_rec_t *
 xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
 			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
 
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
-			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
-			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_H__ */
-- 
cgit v1.2.3


From c25366680bab32efcbb5eda5f3c202099ba27b81 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Wed, 29 Mar 2006 10:44:40 +1000
Subject: [XFS] Cleanup in XFS after recent get_block_t interface tweaks.

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 53 ++++++++++++++++++++-------------------------
 fs/xfs/linux-2.6/xfs_aops.h |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 3 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 62b4553fb60..6cbbd165c60 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1223,10 +1223,9 @@ free_buffers:
 }
 
 STATIC int
-__xfs_get_block(
+__xfs_get_blocks(
 	struct inode		*inode,
 	sector_t		iblock,
-	unsigned long		blocks,
 	struct buffer_head	*bh_result,
 	int			create,
 	int			direct,
@@ -1236,22 +1235,17 @@ __xfs_get_block(
 	xfs_iomap_t		iomap;
 	xfs_off_t		offset;
 	ssize_t			size;
-	int			retpbbm = 1;
+	int			niomap = 1;
 	int			error;
 
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
-	if (blocks)
-		size = (ssize_t) min_t(xfs_off_t, LONG_MAX,
-					(xfs_off_t)blocks << inode->i_blkbits);
-	else
-		size = 1 << inode->i_blkbits;
-
+	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+	size = bh_result->b_size;
 	VOP_BMAP(vp, offset, size,
-		create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+		create ? flags : BMAPI_READ, &iomap, &niomap, error);
 	if (error)
 		return -error;
-
-	if (retpbbm == 0)
+	if (niomap == 0)
 		return 0;
 
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
@@ -1271,12 +1265,16 @@ __xfs_get_block(
 		}
 	}
 
-	/* If this is a realtime file, data might be on a new device */
+	/*
+	 * If this is a realtime file, data may be on a different device.
+	 * to that pointed to from the buffer_head b_bdev currently.
+	 */
 	bh_result->b_bdev = iomap.iomap_target->bt_bdev;
 
-	/* If we previously allocated a block out beyond eof and
-	 * we are now coming back to use it then we will need to
-	 * flag it as new even if it has a disk address.
+	/*
+	 * If we previously allocated a block out beyond eof and we are
+	 * now coming back to use it then we will need to flag it as new
+	 * even if it has a disk address.
 	 */
 	if (create &&
 	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
@@ -1292,26 +1290,24 @@ __xfs_get_block(
 		}
 	}
 
-	if (blocks) {
+	if (direct || size > (1 << inode->i_blkbits)) {
 		ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
 		offset = min_t(xfs_off_t,
-				iomap.iomap_bsize - iomap.iomap_delta,
-				(xfs_off_t)blocks << inode->i_blkbits);
-		bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
+				iomap.iomap_bsize - iomap.iomap_delta, size);
+		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
 	}
 
 	return 0;
 }
 
 int
-xfs_get_block(
+xfs_get_blocks(
 	struct inode		*inode,
 	sector_t		iblock,
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock,
-				bh_result->b_size >> inode->i_blkbits,
+	return __xfs_get_blocks(inode, iblock,
 				bh_result, create, 0, BMAPI_WRITE);
 }
 
@@ -1322,8 +1318,7 @@ xfs_get_blocks_direct(
 	struct buffer_head	*bh_result,
 	int			create)
 {
-	return __xfs_get_block(inode, iblock,
-				bh_result->b_size >> inode->i_blkbits,
+	return __xfs_get_blocks(inode, iblock,
 				bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
@@ -1405,7 +1400,7 @@ xfs_vm_prepare_write(
 	unsigned int		from,
 	unsigned int		to)
 {
-	return block_prepare_write(page, from, to, xfs_get_block);
+	return block_prepare_write(page, from, to, xfs_get_blocks);
 }
 
 STATIC sector_t
@@ -1422,7 +1417,7 @@ xfs_vm_bmap(
 	VOP_RWLOCK(vp, VRWLOCK_READ);
 	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
 	VOP_RWUNLOCK(vp, VRWLOCK_READ);
-	return generic_block_bmap(mapping, block, xfs_get_block);
+	return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
 STATIC int
@@ -1430,7 +1425,7 @@ xfs_vm_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, xfs_get_block);
+	return mpage_readpage(page, xfs_get_blocks);
 }
 
 STATIC int
@@ -1440,7 +1435,7 @@ xfs_vm_readpages(
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
 }
 
 STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 795699f121d..60716543c68 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -41,6 +41,6 @@ typedef struct xfs_ioend {
 } xfs_ioend_t;
 
 extern struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index af487437bd7..149237304fb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -708,7 +708,7 @@ STATIC void
 xfs_vn_truncate(
 	struct inode	*inode)
 {
-	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
+	block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks);
 }
 
 STATIC int
-- 
cgit v1.2.3


From fef23e7fbb11a0a78cd61935f7056bc2b237995a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 28 Mar 2006 16:10:58 -0800
Subject: [PATCH] exec: allow init to exec from any thread.

After looking at the problem of init calling exec some more I figured out
an easy way to make the code work.

The actual symptom without out this patch is that all threads will die
except pid == 1, and the thread calling exec.  The thread calling exec will
wait forever for pid == 1 to die.

Since pid == 1 does not install a handler for SIGKILL it will never die.

This modifies the tests for init from current->pid == 1 to the equivalent
current == child_reaper.  And then it causes exec in the ugly case to
modify child_reaper.

The only weird symptom is that you wind up with an init process that
doesn't have the oldest start time on the box.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index c7397c46ad6..d0ecea0781f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -660,12 +660,23 @@ static int de_thread(struct task_struct *tsk)
 		struct dentry *proc_dentry1, *proc_dentry2;
 		unsigned long ptrace;
 
+		leader = current->group_leader;
+		/*
+		 * If our leader is the child_reaper become
+		 * the child_reaper and resend SIGKILL signal.
+		 */
+		if (unlikely(leader == child_reaper)) {
+			write_lock(&tasklist_lock);
+			child_reaper = current;
+			zap_other_threads(current);
+			write_unlock(&tasklist_lock);
+		}
+
 		/*
 		 * Wait for the thread group leader to be a zombie.
 		 * It should already be zombie at this point, most
 		 * of the time.
 		 */
-		leader = current->group_leader;
 		while (leader->exit_state != EXIT_ZOMBIE)
 			yield();
 
-- 
cgit v1.2.3


From 1434261c07bcebd5ef8b8a18f919fdee533b84e0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:10:59 -0800
Subject: [PATCH] simplify exec from init's subthread

I think it is enough to take tasklist_lock for reading while changing
child_reaper:

	Reparenting needs write_lock(tasklist_lock)

	Only one thread in a thread group can do exec()

	sighand->siglock garantees that get_signal_to_deliver()
	will not see a stale value of child_reaper.

This means that we can change child_reaper earlier, without calling
zap_other_threads() twice.

"child_reaper = current" is a NOOP when init does exec from main thread, we
don't care.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index d0ecea0781f..dd194923c52 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -616,6 +616,15 @@ static int de_thread(struct task_struct *tsk)
 		kmem_cache_free(sighand_cachep, newsighand);
 		return -EAGAIN;
 	}
+
+	/*
+	 * child_reaper ignores SIGKILL, change it now.
+	 * Reparenting needs write_lock on tasklist_lock,
+	 * so it is safe to do it under read_lock.
+	 */
+	if (unlikely(current->group_leader == child_reaper))
+		child_reaper = current;
+
 	zap_other_threads(current);
 	read_unlock(&tasklist_lock);
 
@@ -660,23 +669,12 @@ static int de_thread(struct task_struct *tsk)
 		struct dentry *proc_dentry1, *proc_dentry2;
 		unsigned long ptrace;
 
-		leader = current->group_leader;
-		/*
-		 * If our leader is the child_reaper become
-		 * the child_reaper and resend SIGKILL signal.
-		 */
-		if (unlikely(leader == child_reaper)) {
-			write_lock(&tasklist_lock);
-			child_reaper = current;
-			zap_other_threads(current);
-			write_unlock(&tasklist_lock);
-		}
-
 		/*
 		 * Wait for the thread group leader to be a zombie.
 		 * It should already be zombie at this point, most
 		 * of the time.
 		 */
+		leader = current->group_leader;
 		while (leader->exit_state != EXIT_ZOMBIE)
 			yield();
 
-- 
cgit v1.2.3


From d73d65293e3e2de7e916a89c8da30be0948afab7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 28 Mar 2006 16:11:03 -0800
Subject: [PATCH] pidhash: kill switch_exec_pids

switch_exec_pids is only called from de_thread by way of exec, and it is
only called when we are exec'ing from a non thread group leader.

Currently switch_exec_pids gives the leader the pid of the thread and
unhashes and rehashes all of the process groups.  The leader is already in
the EXIT_DEAD state so no one cares about it's pids.  The only concern for
the leader is that __unhash_process called from release_task will function
correctly.  If we don't touch the leader at all we know that
__unhash_process will work fine so there is no need to touch the leader.

For the task becomming the thread group leader, we just need to give it the
pid of the old thread group leader, add it to the task list, and attach it
to the session and the process group of the thread group.

Currently de_thread is also adding the task to the task list which is just
silly.

Currently the only leader of __detach_pid besides detach_pid is
switch_exec_pids because of the ugly extra work that was being
performed.

So this patch removes switch_exec_pids because it is doing too much, it is
creating an unnecessary special case in pid.c, duing work duplicated in
de_thread, and generally obscuring what it is going on.

The necessary work is added to de_thread, and it seems to be a little
clearer there what is going on.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index dd194923c52..db0769447d3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -708,7 +708,17 @@ static int de_thread(struct task_struct *tsk)
 		remove_parent(current);
 		remove_parent(leader);
 
-		switch_exec_pids(leader, current);
+
+		/* Become a process group leader with the old leader's pid.
+		 * Note: The old leader also uses thispid until release_task
+		 *       is called.  Odd but simple and correct.
+		 */
+		detach_pid(current, PIDTYPE_PID);
+		current->pid = leader->pid;
+		attach_pid(current, PIDTYPE_PID,  current->pid);
+		attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
+		attach_pid(current, PIDTYPE_SID,  current->signal->session);
+		list_add_tail(&current->tasks, &init_task.tasks);
 
 		current->parent = current->real_parent = leader->real_parent;
 		leader->parent = leader->real_parent = child_reaper;
@@ -722,8 +732,6 @@ static int de_thread(struct task_struct *tsk)
 			__ptrace_link(current, parent);
 		}
 
-		list_del(&current->tasks);
-		list_add_tail(&current->tasks, &init_task.tasks);
 		current->exit_signal = SIGCHLD;
 
 		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
-- 
cgit v1.2.3


From 8fafabd86f1b75ed3cc6a6ffbe6c3e53e3d8457d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:05 -0800
Subject: [PATCH] remove add_parent()'s parent argument

add_parent(p, parent) is always called with parent == p->parent, and it makes
no sense to do it differently.  This patch removes this argument.

No changes in affected .o files.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index db0769447d3..9046ad2b061 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -725,8 +725,8 @@ static int de_thread(struct task_struct *tsk)
 		current->group_leader = current;
 		leader->group_leader = leader;
 
-		add_parent(current, current->parent);
-		add_parent(leader, leader->parent);
+		add_parent(current);
+		add_parent(leader);
 		if (ptrace) {
 			current->ptrace = ptrace;
 			__ptrace_link(current, parent);
-- 
cgit v1.2.3


From aa1757f90bea3f598b6e5d04d922a6a60200f1da Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:12 -0800
Subject: [PATCH] convert sighand_cache to use SLAB_DESTROY_BY_RCU

This patch borrows a clever Hugh's 'struct anon_vma' trick.

Without tasklist_lock held we can't trust task->sighand until we locked it
and re-checked that it is still the same.

But this means we don't need to defer 'kmem_cache_free(sighand)'.  We can
return the memory to slab immediately, all we need is to be sure that
sighand->siglock can't dissapear inside rcu protected section.

To do so we need to initialize ->siglock inside ctor function,
SLAB_DESTROY_BY_RCU does the rest.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 9046ad2b061..950ebd43cdc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -768,7 +768,6 @@ no_thread_group:
 		/*
 		 * Move our state over to newsighand and switch it in.
 		 */
-		spin_lock_init(&newsighand->siglock);
 		atomic_set(&newsighand->count, 1);
 		memcpy(newsighand->action, oldsighand->action,
 		       sizeof(newsighand->action));
@@ -785,7 +784,7 @@ no_thread_group:
 		write_unlock_irq(&tasklist_lock);
 
 		if (atomic_dec_and_test(&oldsighand->count))
-			sighand_free(oldsighand);
+			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(current));
-- 
cgit v1.2.3


From 5274f052e7b3dbd81935772eb551dfd0325dfa9d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:15:30 +0200
Subject: [PATCH] Introduce sys_splice() system call

This adds support for the sys_splice system call. Using a pipe as a
transport, it can connect to files or sockets (latter as output only).

From the splice.c comments:

   "splice": joining two ropes together by interweaving their strands.

   This is the "extended pipe" functionality, where a pipe is used as
   an arbitrary in-memory buffer. Think of a pipe as a small kernel
   buffer that you can use to transfer data from one end to the other.

   The traditional unix read/write is extended with a "splice()" operation
   that transfers data buffers to or from a pipe buffer.

   Named by Larry McVoy, original implementation from Linus, extended by
   Jens to support splicing to files and fixing the initial implementation
   bugs.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Makefile        |   2 +-
 fs/ext2/file.c     |   2 +
 fs/ext3/file.c     |   2 +
 fs/pipe.c          |  33 ++-
 fs/reiserfs/file.c |   2 +
 fs/splice.c        | 612 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 647 insertions(+), 6 deletions(-)
 create mode 100644 fs/splice.c

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index 080b3867be4..f3a4f707717 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o
+		ioprio.o pnode.o drop_caches.o splice.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 509cceca04d..23e2c7ccec1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,6 +53,8 @@ const struct file_operations ext2_file_operations = {
 	.readv		= generic_file_readv,
 	.writev		= generic_file_writev,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 783a796220b..1efefb630ea 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -119,6 +119,8 @@ const struct file_operations ext3_file_operations = {
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/pipe.c b/fs/pipe.c
index e2f4f1d9ffc..2414bf270db 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -15,6 +15,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/uio.h>
 #include <linux/highmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -94,11 +95,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
 {
 	struct page *page = buf->page;
 
-	if (info->tmp_page) {
-		__free_page(page);
+	/*
+	 * If nobody else uses this page, and we don't already have a
+	 * temporary page, let's keep track of it as a one-deep
+	 * allocation cache
+	 */
+	if (page_count(page) == 1 && !info->tmp_page) {
+		info->tmp_page = page;
 		return;
 	}
-	info->tmp_page = page;
+
+	/*
+	 * Otherwise just release our reference to it
+	 */
+	page_cache_release(page);
 }
 
 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
@@ -152,6 +162,11 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 				chars = total_len;
 
 			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				if (!ret)
+					ret = PTR_ERR(addr);
+				break;
+			}
 			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
 			ops->unmap(info, buf);
 			if (unlikely(error)) {
@@ -254,8 +269,16 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 		struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			void *addr = ops->map(filp, info, buf);
-			int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
+			void *addr;
+			int error;
+
+			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				error = PTR_ERR(addr);
+				goto out;
+			}
+			error = pipe_iov_copy_from_user(offset + addr, iov,
+							chars);
 			ops->unmap(info, buf);
 			ret = error;
 			do_wakeup = 1;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 010094d14da..cf6e1cf4035 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1576,6 +1576,8 @@ const struct file_operations reiserfs_file_operations = {
 	.sendfile = generic_file_sendfile,
 	.aio_read = generic_file_aio_read,
 	.aio_write = reiserfs_aio_write,
+	.splice_read = generic_file_splice_read,
+	.splice_write = generic_file_splice_write,
 };
 
 struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/splice.c b/fs/splice.c
new file mode 100644
index 00000000000..efa47c1c4e1
--- /dev/null
+++ b/fs/splice.c
@@ -0,0 +1,612 @@
+/*
+ * "splice": joining two ropes together by interweaving their strands.
+ *
+ * This is the "extended pipe" functionality, where a pipe is used as
+ * an arbitrary in-memory buffer. Think of a pipe as a small kernel
+ * buffer that you can use to transfer data from one end to the other.
+ *
+ * The traditional unix read/write is extended with a "splice()" operation
+ * that transfers data buffers to or from a pipe buffer.
+ *
+ * Named by Larry McVoy, original implementation from Linus, extended by
+ * Jens to support splicing to files and fixing the initial implementation
+ * bugs.
+ *
+ * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org>
+ *
+ */
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mm_inline.h>
+
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+	unsigned int len, total_len;	/* current and remaining length */
+	unsigned int flags;		/* splice flags */
+	struct file *file;		/* file to read/write */
+	loff_t pos;			/* file position */
+};
+
+static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
+					struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+	buf->page = NULL;
+}
+
+static void *page_cache_pipe_buf_map(struct file *file,
+				     struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	lock_page(page);
+
+	if (!PageUptodate(page)) {
+		unlock_page(page);
+		return ERR_PTR(-EIO);
+	}
+
+	if (!page->mapping) {
+		unlock_page(page);
+		return ERR_PTR(-ENODATA);
+	}
+
+	return kmap(buf->page);
+}
+
+static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
+				      struct pipe_buffer *buf)
+{
+	unlock_page(buf->page);
+	kunmap(buf->page);
+}
+
+static struct pipe_buf_operations page_cache_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = page_cache_pipe_buf_map,
+	.unmap = page_cache_pipe_buf_unmap,
+	.release = page_cache_pipe_buf_release,
+};
+
+static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
+			    int nr_pages, unsigned long offset,
+			    unsigned long len)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, i;
+
+	ret = 0;
+	do_wakeup = 0;
+	i = 0;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs;
+
+		if (!PIPE_READERS(*inode)) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		bufs = info->nrbufs;
+		if (bufs < PIPE_BUFFERS) {
+			int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1);
+			struct pipe_buffer *buf = info->bufs + newbuf;
+			struct page *page = pages[i++];
+			unsigned long this_len;
+
+			this_len = PAGE_CACHE_SIZE - offset;
+			if (this_len > len)
+				this_len = len;
+
+			buf->page = page;
+			buf->offset = offset;
+			buf->len = this_len;
+			buf->ops = &page_cache_pipe_buf_ops;
+			info->nrbufs = ++bufs;
+			do_wakeup = 1;
+
+			ret += this_len;
+			len -= this_len;
+			offset = 0;
+			if (!--nr_pages)
+				break;
+			if (!len)
+				break;
+			if (bufs < PIPE_BUFFERS)
+				continue;
+
+			break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO,
+				    POLL_IN);
+			do_wakeup = 0;
+		}
+
+		PIPE_WAITING_WRITERS(*inode)++;
+		pipe_wait(inode);
+		PIPE_WAITING_WRITERS(*inode)--;
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
+	}
+
+	while (i < nr_pages)
+		page_cache_release(pages[i++]);
+
+	return ret;
+}
+
+static int __generic_file_splice_read(struct file *in, struct inode *pipe,
+				      size_t len)
+{
+	struct address_space *mapping = in->f_mapping;
+	unsigned int offset, nr_pages;
+	struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS];
+	struct page *page;
+	pgoff_t index, pidx;
+	int i, j;
+
+	index = in->f_pos >> PAGE_CACHE_SHIFT;
+	offset = in->f_pos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	if (nr_pages > PIPE_BUFFERS)
+		nr_pages = PIPE_BUFFERS;
+
+	/*
+	 * initiate read-ahead on this page range
+	 */
+	do_page_cache_readahead(mapping, in, index, nr_pages);
+
+	/*
+	 * Get as many pages from the page cache as possible..
+	 * Start IO on the page cache entries we create (we
+	 * can assume that any pre-existing ones we find have
+	 * already had IO started on them).
+	 */
+	i = find_get_pages(mapping, index, nr_pages, pages);
+
+	/*
+	 * common case - we found all pages and they are contiguous,
+	 * kick them off
+	 */
+	if (i && (pages[i - 1]->index == index + i - 1))
+		goto splice_them;
+
+	/*
+	 * fill shadow[] with pages at the right locations, so we only
+	 * have to fill holes
+	 */
+	memset(shadow, 0, i * sizeof(struct page *));
+	for (j = 0, pidx = index; j < i; pidx++, j++)
+		shadow[pages[j]->index - pidx] = pages[j];
+
+	/*
+	 * now fill in the holes
+	 */
+	for (i = 0, pidx = index; i < nr_pages; pidx++, i++) {
+		int error;
+
+		if (shadow[i])
+			continue;
+
+		/*
+		 * no page there, look one up / create it
+		 */
+		page = find_or_create_page(mapping, pidx,
+						   mapping_gfp_mask(mapping));
+		if (!page)
+			break;
+
+		if (PageUptodate(page))
+			unlock_page(page);
+		else {
+			error = mapping->a_ops->readpage(in, page);
+
+			if (unlikely(error)) {
+				page_cache_release(page);
+				break;
+			}
+		}
+		shadow[i] = page;
+	}
+
+	if (!i) {
+		for (i = 0; i < nr_pages; i++) {
+			 if (shadow[i])
+				page_cache_release(shadow[i]);
+		}
+		return 0;
+	}
+
+	memcpy(pages, shadow, i * sizeof(struct page *));
+
+	/*
+	 * Now we splice them into the pipe..
+	 */
+splice_them:
+	return move_to_pipe(pipe, pages, i, offset, len);
+}
+
+ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
+				 size_t len, unsigned int flags)
+{
+	ssize_t spliced;
+	int ret;
+
+	ret = 0;
+	spliced = 0;
+	while (len) {
+		ret = __generic_file_splice_read(in, pipe, len);
+
+		if (ret <= 0)
+			break;
+
+		in->f_pos += ret;
+		len -= ret;
+		spliced += ret;
+	}
+
+	if (spliced)
+		return spliced;
+
+	return ret;
+}
+
+/*
+ * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage().
+ */
+static int pipe_to_sendpage(struct pipe_inode_info *info,
+			    struct pipe_buffer *buf, struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	loff_t pos = sd->pos;
+	unsigned int offset;
+	ssize_t ret;
+	void *ptr;
+
+	/*
+	 * sub-optimal, but we are limited by the pipe ->map. we don't
+	 * need a kmap'ed buffer here, we just want to make sure we
+	 * have the page pinned if the pipe page originates from the
+	 * page cache
+	 */
+	ptr = buf->ops->map(file, info, buf);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	offset = pos & ~PAGE_CACHE_MASK;
+
+	ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,
+					sd->len < sd->total_len);
+
+	buf->ops->unmap(info, buf);
+	if (ret == sd->len)
+		return 0;
+
+	return -EIO;
+}
+
+/*
+ * This is a little more tricky than the file -> pipe splicing. There are
+ * basically three cases:
+ *
+ *	- Destination page already exists in the address space and there
+ *	  are users of it. For that case we have no other option that
+ *	  copying the data. Tough luck.
+ *	- Destination page already exists in the address space, but there
+ *	  are no users of it. Make sure it's uptodate, then drop it. Fall
+ *	  through to last case.
+ *	- Destination page does not exist, we can add the pipe page to
+ *	  the page cache and avoid the copy.
+ *
+ * For now we just do the slower thing and always copy pages over, it's
+ * easier than migrating pages from the pipe to the target file. For the
+ * case of doing file | file splicing, the migrate approach had some LRU
+ * nastiness...
+ */
+static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	struct address_space *mapping = file->f_mapping;
+	unsigned int offset;
+	struct page *page;
+	char *src, *dst;
+	pgoff_t index;
+	int ret;
+
+	/*
+	 * after this, page will be locked and unmapped
+	 */
+	src = buf->ops->map(file, info, buf);
+	if (IS_ERR(src))
+		return PTR_ERR(src);
+
+	index = sd->pos >> PAGE_CACHE_SHIFT;
+	offset = sd->pos & ~PAGE_CACHE_MASK;
+
+find_page:
+	ret = -ENOMEM;
+	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
+	if (!page)
+		goto out;
+
+	/*
+	 * If the page is uptodate, it is also locked. If it isn't
+	 * uptodate, we can mark it uptodate if we are filling the
+	 * full page. Otherwise we need to read it in first...
+	 */
+	if (!PageUptodate(page)) {
+		if (sd->len < PAGE_CACHE_SIZE) {
+			ret = mapping->a_ops->readpage(file, page);
+			if (unlikely(ret))
+				goto out;
+
+			lock_page(page);
+
+			if (!PageUptodate(page)) {
+				/*
+				 * page got invalidated, repeat
+				 */
+				if (!page->mapping) {
+					unlock_page(page);
+					page_cache_release(page);
+					goto find_page;
+				}
+				ret = -EIO;
+				goto out;
+			}
+		} else {
+			WARN_ON(!PageLocked(page));
+			SetPageUptodate(page);
+		}
+	}
+
+	ret = mapping->a_ops->prepare_write(file, page, 0, sd->len);
+	if (ret)
+		goto out;
+
+	dst = kmap_atomic(page, KM_USER0);
+	memcpy(dst + offset, src + buf->offset, sd->len);
+	flush_dcache_page(page);
+	kunmap_atomic(dst, KM_USER0);
+
+	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
+	if (ret < 0)
+		goto out;
+
+	set_page_dirty(page);
+	ret = write_one_page(page, 0);
+out:
+	if (ret < 0)
+		unlock_page(page);
+	page_cache_release(page);
+	buf->ops->unmap(info, buf);
+	return ret;
+}
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+			   struct splice_desc *);
+
+static ssize_t move_from_pipe(struct inode *inode, struct file *out,
+			      size_t len, unsigned int flags,
+			      splice_actor *actor)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, err;
+	struct splice_desc sd;
+
+	ret = 0;
+	do_wakeup = 0;
+
+	sd.total_len = len;
+	sd.flags = flags;
+	sd.file = out;
+	sd.pos = out->f_pos;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs = info->nrbufs;
+
+		if (bufs) {
+			int curbuf = info->curbuf;
+			struct pipe_buffer *buf = info->bufs + curbuf;
+			struct pipe_buf_operations *ops = buf->ops;
+
+			sd.len = buf->len;
+			if (sd.len > sd.total_len)
+				sd.len = sd.total_len;
+
+			err = actor(info, buf, &sd);
+			if (err) {
+				if (!ret && err != -ENODATA)
+					ret = err;
+
+				break;
+			}
+
+			ret += sd.len;
+			buf->offset += sd.len;
+			buf->len -= sd.len;
+			if (!buf->len) {
+				buf->ops = NULL;
+				ops->release(info, buf);
+				curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1);
+				info->curbuf = curbuf;
+				info->nrbufs = --bufs;
+				do_wakeup = 1;
+			}
+
+			sd.pos += sd.len;
+			sd.total_len -= sd.len;
+			if (!sd.total_len)
+				break;
+		}
+
+		if (bufs)
+			continue;
+		if (!PIPE_WRITERS(*inode))
+			break;
+		if (!PIPE_WAITING_WRITERS(*inode)) {
+			if (ret)
+				break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT);
+			do_wakeup = 0;
+		}
+
+		pipe_wait(inode);
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
+	}
+
+	mutex_lock(&out->f_mapping->host->i_mutex);
+	out->f_pos = sd.pos;
+	mutex_unlock(&out->f_mapping->host->i_mutex);
+	return ret;
+
+}
+
+ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
+				  size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_file);
+}
+
+ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+				size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_sendpage);
+}
+
+static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
+			   unsigned int flags)
+{
+	loff_t pos;
+	int ret;
+
+	if (!out->f_op || !out->f_op->splice_write)
+		return -EINVAL;
+
+	if (!(out->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	pos = out->f_pos;
+	ret = rw_verify_area(WRITE, out, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return out->f_op->splice_write(pipe, out, len, flags);
+}
+
+static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
+			 unsigned int flags)
+{
+	loff_t pos, isize, left;
+	int ret;
+
+	if (!in->f_op || !in->f_op->splice_read)
+		return -EINVAL;
+
+	if (!(in->f_mode & FMODE_READ))
+		return -EBADF;
+
+	pos = in->f_pos;
+	ret = rw_verify_area(READ, in, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	isize = i_size_read(in->f_mapping->host);
+	if (unlikely(in->f_pos >= isize))
+		return 0;
+	
+	left = isize - in->f_pos;
+	if (left < len)
+		len = left;
+
+	return in->f_op->splice_read(in, pipe, len, flags);
+}
+
+static long do_splice(struct file *in, struct file *out, size_t len,
+		      unsigned int flags)
+{
+	struct inode *pipe;
+
+	pipe = in->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_from(pipe, out, len, flags);
+
+	pipe = out->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_to(in, pipe, len, flags);
+
+	return -EINVAL;
+}
+
+asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
+{
+	long error;
+	struct file *in, *out;
+	int fput_in, fput_out;
+
+	if (unlikely(!len))
+		return 0;
+
+	error = -EBADF;
+	in = fget_light(fdin, &fput_in);
+	if (in) {
+		if (in->f_mode & FMODE_READ) {
+			out = fget_light(fdout, &fput_out);
+			if (out) {
+				if (out->f_mode & FMODE_WRITE)
+					error = do_splice(in, out, len, flags);
+				fput_light(out, fput_out);
+			}
+		}
+
+		fput_light(in, fput_in);
+	}
+
+	return error;
+}
-- 
cgit v1.2.3


From 5abc97aa25b2c41413b3a520faee83f2282d9f18 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:16:46 +0200
Subject: [PATCH] splice: add support for SPLICE_F_MOVE flag

This enables the caller to migrate pages from one address space page
cache to another.  In buzz word marketing, you can do zero-copy file
copies!

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c   |   8 ++++
 fs/splice.c | 121 +++++++++++++++++++++++++++++++++++++++++-------------------
 2 files changed, 92 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 2414bf270db..109a102c150 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -121,11 +121,19 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer
 	kunmap(buf->page);
 }
 
+static int anon_pipe_buf_steal(struct pipe_inode_info *info,
+			       struct pipe_buffer *buf)
+{
+	buf->stolen = 1;
+	return 0;
+}
+
 static struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = anon_pipe_buf_map,
 	.unmap = anon_pipe_buf_unmap,
 	.release = anon_pipe_buf_release,
+	.steal = anon_pipe_buf_steal,
 };
 
 static ssize_t
diff --git a/fs/splice.c b/fs/splice.c
index efa47c1c4e1..4a026f95884 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -21,6 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/mm_inline.h>
+#include <linux/swap.h>
 
 /*
  * Passed to the actors
@@ -32,11 +33,37 @@ struct splice_desc {
 	loff_t pos;			/* file position */
 };
 
+static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	WARN_ON(!PageLocked(page));
+	WARN_ON(!PageUptodate(page));
+
+	if (!remove_mapping(page_mapping(page), page))
+		return 1;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		BUG_ON(!PageLRU(page));
+		__ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	buf->stolen = 1;
+	return 0;
+}
+
 static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
 	buf->page = NULL;
+	buf->stolen = 0;
 }
 
 static void *page_cache_pipe_buf_map(struct file *file,
@@ -63,7 +90,8 @@ static void *page_cache_pipe_buf_map(struct file *file,
 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
 				      struct pipe_buffer *buf)
 {
-	unlock_page(buf->page);
+	if (!buf->stolen)
+		unlock_page(buf->page);
 	kunmap(buf->page);
 }
 
@@ -72,6 +100,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.map = page_cache_pipe_buf_map,
 	.unmap = page_cache_pipe_buf_unmap,
 	.release = page_cache_pipe_buf_release,
+	.steal = page_cache_pipe_buf_steal,
 };
 
 static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
@@ -336,8 +365,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	struct address_space *mapping = file->f_mapping;
 	unsigned int offset;
 	struct page *page;
-	char *src, *dst;
 	pgoff_t index;
+	char *src;
 	int ret;
 
 	/*
@@ -350,40 +379,54 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	index = sd->pos >> PAGE_CACHE_SHIFT;
 	offset = sd->pos & ~PAGE_CACHE_MASK;
 
-find_page:
-	ret = -ENOMEM;
-	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
-	if (!page)
-		goto out;
-
 	/*
-	 * If the page is uptodate, it is also locked. If it isn't
-	 * uptodate, we can mark it uptodate if we are filling the
-	 * full page. Otherwise we need to read it in first...
+	 * reuse buf page, if SPLICE_F_MOVE is set
 	 */
-	if (!PageUptodate(page)) {
-		if (sd->len < PAGE_CACHE_SIZE) {
-			ret = mapping->a_ops->readpage(file, page);
-			if (unlikely(ret))
-				goto out;
-
-			lock_page(page);
-
-			if (!PageUptodate(page)) {
-				/*
-				 * page got invalidated, repeat
-				 */
-				if (!page->mapping) {
-					unlock_page(page);
-					page_cache_release(page);
-					goto find_page;
+	if (sd->flags & SPLICE_F_MOVE) {
+		if (buf->ops->steal(info, buf))
+			goto find_page;
+
+		page = buf->page;
+		if (add_to_page_cache_lru(page, mapping, index,
+						mapping_gfp_mask(mapping)))
+			goto find_page;
+	} else {
+find_page:
+		ret = -ENOMEM;
+		page = find_or_create_page(mapping, index,
+						mapping_gfp_mask(mapping));
+		if (!page)
+			goto out;
+
+		/*
+		 * If the page is uptodate, it is also locked. If it isn't
+		 * uptodate, we can mark it uptodate if we are filling the
+		 * full page. Otherwise we need to read it in first...
+		 */
+		if (!PageUptodate(page)) {
+			if (sd->len < PAGE_CACHE_SIZE) {
+				ret = mapping->a_ops->readpage(file, page);
+				if (unlikely(ret))
+					goto out;
+
+				lock_page(page);
+
+				if (!PageUptodate(page)) {
+					/*
+					 * page got invalidated, repeat
+					 */
+					if (!page->mapping) {
+						unlock_page(page);
+						page_cache_release(page);
+						goto find_page;
+					}
+					ret = -EIO;
+					goto out;
 				}
-				ret = -EIO;
-				goto out;
+			} else {
+				WARN_ON(!PageLocked(page));
+				SetPageUptodate(page);
 			}
-		} else {
-			WARN_ON(!PageLocked(page));
-			SetPageUptodate(page);
 		}
 	}
 
@@ -391,10 +434,13 @@ find_page:
 	if (ret)
 		goto out;
 
-	dst = kmap_atomic(page, KM_USER0);
-	memcpy(dst + offset, src + buf->offset, sd->len);
-	flush_dcache_page(page);
-	kunmap_atomic(dst, KM_USER0);
+	if (!buf->stolen) {
+		char *dst = kmap_atomic(page, KM_USER0);
+
+		memcpy(dst + offset, src + buf->offset, sd->len);
+		flush_dcache_page(page);
+		kunmap_atomic(dst, KM_USER0);
+	}
 
 	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
 	if (ret < 0)
@@ -405,7 +451,8 @@ find_page:
 out:
 	if (ret < 0)
 		unlock_page(page);
-	page_cache_release(page);
+	if (!buf->stolen)
+		page_cache_release(page);
 	buf->ops->unmap(info, buf);
 	return ret;
 }
-- 
cgit v1.2.3