From 1868f4aa5a4a72bbe0b7db6c1d4ee666824c3895 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 4 May 2005 15:29:35 -0500
Subject: JFS: fix sparse warnings by moving extern declarations to headers

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/file.c           |  9 +++------
 fs/jfs/inode.c          | 11 ++---------
 fs/jfs/jfs_debug.c      | 10 ----------
 fs/jfs/jfs_debug.h      | 15 ++++++++++----
 fs/jfs/jfs_extent.c     |  7 +------
 fs/jfs/jfs_imap.c       |  6 +-----
 fs/jfs/jfs_inode.c      |  1 +
 fs/jfs/jfs_inode.h      | 19 +++++++++++++++++-
 fs/jfs/jfs_logmgr.c     |  9 +--------
 fs/jfs/jfs_logmgr.h     |  2 ++
 fs/jfs/jfs_metapage.c   |  4 ++--
 fs/jfs/jfs_metapage.h   |  6 ++++--
 fs/jfs/jfs_superblock.h | 11 +++++++++++
 fs/jfs/jfs_txnmgr.c     | 40 +------------------------------------
 fs/jfs/jfs_txnmgr.h     | 52 +++++++++++++++++++++----------------------------
 fs/jfs/namei.c          | 28 ++++++--------------------
 fs/jfs/super.c          | 37 +----------------------------------
 fs/jfs/symlink.c        |  3 ++-
 18 files changed, 89 insertions(+), 181 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a87b06fa8ff..c2c19c9ed9a 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,16 +19,13 @@
 
 #include <linux/fs.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_dmap.h"
 #include "jfs_txnmgr.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
 #include "jfs_debug.h"
 
-
-extern int jfs_commit_inode(struct inode *, int);
-extern void jfs_truncate(struct inode *);
-
 int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 24a689179af..2137138c59b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_imap.h"
 #include "jfs_extent.h"
@@ -30,14 +31,6 @@
 #include "jfs_debug.h"
 
 
-extern struct inode_operations jfs_dir_inode_operations;
-extern struct inode_operations jfs_file_inode_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
-extern struct file_operations jfs_dir_operations;
-extern struct file_operations jfs_file_operations;
-struct address_space_operations jfs_aops;
-extern int freeZeroLink(struct inode *);
-
 void jfs_read_inode(struct inode *inode)
 {
 	if (diRead(inode)) { 
@@ -136,7 +129,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (test_cflag(COMMIT_Freewmap, inode))
-		freeZeroLink(inode);
+		jfs_free_zero_link(inode);
 
 	diFree(inode);
 
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 91a0a889ebc..4caea6b43b9 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -58,8 +58,6 @@ void dump_mem(char *label, void *data, int length)
 
 static struct proc_dir_entry *base;
 #ifdef CONFIG_JFS_DEBUG
-extern read_proc_t jfs_txanchor_read;
-
 static int loglevel_read(char *page, char **start, off_t off,
 			 int count, int *eof, void *data)
 {
@@ -97,14 +95,6 @@ static int loglevel_write(struct file *file, const char __user *buffer,
 }
 #endif
 
-
-#ifdef CONFIG_JFS_STATISTICS
-extern read_proc_t jfs_lmstats_read;
-extern read_proc_t jfs_txstats_read;
-extern read_proc_t jfs_xtstat_read;
-extern read_proc_t jfs_mpstat_read;
-#endif
-
 static struct {
 	const char	*name;
 	read_proc_t	*read_fn;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index a38079ae1e0..ddffbbd4d95 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -31,7 +31,9 @@
  * CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined
  */
 #if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS))
-	#define PROC_FS_JFS
+#define PROC_FS_JFS
+extern void jfs_proc_init(void);
+extern void jfs_proc_clean(void);
 #endif
 
 /*
@@ -65,8 +67,8 @@
 
 extern int jfsloglevel;
 
-/* dump memory contents */
 extern void dump_mem(char *label, void *data, int length);
+extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 
 /* information message: e.g., configuration, major event */
 #define jfs_info(fmt, arg...) do {			\
@@ -110,6 +112,11 @@ extern void dump_mem(char *label, void *data, int length);
  *	----------
  */
 #ifdef	CONFIG_JFS_STATISTICS
+extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *);
+
 #define	INCREMENT(x)		((x)++)
 #define	DECREMENT(x)		((x)--)
 #define	HIGHWATERMARK(x,y)	((x) = max((x), (y)))
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 1953acb7926..4879603daa1 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
 #include "jfs_extent.h"
@@ -33,12 +34,6 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
 #endif
 static s64 extRoundDown(s64 nb);
 
-/*
- * external references
- */
-extern int jfs_commit_inode(struct inode *, int);
-
-
 #define DPD(a)          (printk("(a): %d\n",(a)))
 #define DPC(a)          (printk("(a): %c\n",(a)))
 #define DPL1(a)					\
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 7acff2ce3c8..971af2977ef 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -47,6 +47,7 @@
 #include <linux/quotaops.h>
 
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_dinode.h"
 #include "jfs_dmap.h"
@@ -68,11 +69,6 @@
 #define AG_LOCK(imap,agno)		down(&imap->im_aglock[agno])
 #define AG_UNLOCK(imap,agno)		up(&imap->im_aglock[agno])
 
-/*
- * external references
- */
-extern struct address_space_operations jfs_aops;
-
 /*
  * forward references
  */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 84f2459b219..2af5efbfd06 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_imap.h"
 #include "jfs_dinode.h"
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 3df91fbfe78..b54bac576cb 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -1,5 +1,5 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2001
+ *   Copyright (C) International Business Machines Corp., 2000-2001
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,5 +19,22 @@
 #define _H_JFS_INODE
 
 extern struct inode *ialloc(struct inode *, umode_t);
+extern int jfs_fsync(struct file *, struct dentry *, int);
+extern void jfs_read_inode(struct inode *);
+extern int jfs_commit_inode(struct inode *, int);
+extern int jfs_write_inode(struct inode*, int);
+extern void jfs_delete_inode(struct inode *);
+extern void jfs_dirty_inode(struct inode *);
+extern void jfs_truncate(struct inode *);
+extern void jfs_truncate_nolock(struct inode *, loff_t);
+extern void jfs_free_zero_link(struct inode *);
+extern struct dentry *jfs_get_parent(struct dentry *dentry);
 
+extern struct address_space_operations jfs_aops;
+extern struct inode_operations jfs_dir_inode_operations;
+extern struct file_operations jfs_dir_operations;
+extern struct inode_operations jfs_file_inode_operations;
+extern struct file_operations jfs_file_operations;
+extern struct inode_operations jfs_symlink_inode_operations;
+extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index dfa1200daa6..c5cc03bcae6 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -71,6 +71,7 @@
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
+#include "jfs_superblock.h"
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
 
@@ -166,14 +167,6 @@ static LIST_HEAD(jfs_external_logs);
 static struct jfs_log *dummy_log = NULL;
 static DECLARE_MUTEX(jfs_log_sem);
 
-/*
- * external references
- */
-extern void txLazyUnlock(struct tblock * tblk);
-extern int jfs_stop_threads;
-extern struct completion jfsIOwait;
-extern int jfs_tlocks_low;
-
 /*
  * forward references
  */
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 51291fbc420..747114cd38b 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -507,6 +507,8 @@ extern int lmLogClose(struct super_block *sb);
 extern int lmLogShutdown(struct jfs_log * log);
 extern int lmLogInit(struct jfs_log * log);
 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
+extern int lmGroupCommit(struct jfs_log *, struct tblock *);
+extern int jfsIOWait(void *);
 extern void jfs_flush_journal(struct jfs_log * log, int wait);
 extern void jfs_syncpt(struct jfs_log *log);
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 41bf078dce0..aa1cc7be3a5 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -726,12 +726,12 @@ void force_metapage(struct metapage *mp)
 	page_cache_release(page);
 }
 
-extern void hold_metapage(struct metapage *mp)
+void hold_metapage(struct metapage *mp)
 {
 	lock_page(mp->page);
 }
 
-extern void put_metapage(struct metapage *mp)
+void put_metapage(struct metapage *mp)
 {
 	if (mp->count || mp->nohomeok) {
 		/* Someone else will release this */
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 991e9fb84c7..f0b7d3282b0 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -58,6 +58,8 @@ struct metapage {
 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
 
 /* function prototypes */
+extern int metapage_init(void);
+extern void metapage_exit(void);
 extern struct metapage *__get_metapage(struct inode *inode,
 				  unsigned long lblock, unsigned int size,
 				  int absolute, unsigned long new);
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index ab0566f70cf..fcf781bf31c 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -109,5 +109,16 @@ struct jfs_superblock {
 extern int readSuper(struct super_block *, struct buffer_head **);
 extern int updateSuper(struct super_block *, uint);
 extern void jfs_error(struct super_block *, const char *, ...);
+extern int jfs_mount(struct super_block *);
+extern int jfs_mount_rw(struct super_block *, int);
+extern int jfs_umount(struct super_block *);
+extern int jfs_umount_rw(struct super_block *);
+
+extern int jfs_stop_threads;
+extern struct completion jfsIOwait;
+extern wait_queue_head_t jfs_IO_thread_wait;
+extern wait_queue_head_t jfs_commit_thread_wait;
+extern wait_queue_head_t jfs_sync_thread_wait;
+extern int jfs_extendfs(struct super_block *, s64, int);
 
 #endif /*_H_JFS_SUPERBLOCK */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index e93d01aa12c..8cbaaff1d5f 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -42,7 +42,6 @@
  * hold on to mp+lock thru update of maps
  */
 
-
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
@@ -51,6 +50,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
 #include "jfs_dinode.h"
@@ -109,7 +109,6 @@ static int TxLockHWM;		/* High water mark for number of txLocks used */
 static int TxLockVHWM;		/* Very High water mark */
 struct tlock *TxLock;           /* transaction lock table */
 
-
 /*
  *      transaction management lock
  */
@@ -149,7 +148,6 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 
 #define TXN_WAKEUP(event) wake_up_all(event)
 
-
 /*
  *      statistics
  */
@@ -161,16 +159,6 @@ static struct {
 	int waitlock;		/* 4: # of tlock wait */
 } stattx;
 
-
-/*
- * external references
- */
-extern int lmGroupCommit(struct jfs_log *, struct tblock *);
-extern int jfs_commit_inode(struct inode *, int);
-extern int jfs_stop_threads;
-
-extern struct completion jfsIOwait;
-
 /*
  * forward references
  */
@@ -358,7 +346,6 @@ void txExit(void)
 	TxBlock = NULL;
 }
 
-
 /*
  * NAME:        txBegin()
  *
@@ -460,7 +447,6 @@ tid_t txBegin(struct super_block *sb, int flag)
 	return t;
 }
 
-
 /*
  * NAME:        txBeginAnon()
  *
@@ -503,7 +489,6 @@ void txBeginAnon(struct super_block *sb)
 	TXN_UNLOCK();
 }
 
-
 /*
  *      txEnd()
  *
@@ -592,7 +577,6 @@ wakeup:
 	TXN_WAKEUP(&TxAnchor.freewait);
 }
 
-
 /*
  *      txLock()
  *
@@ -868,7 +852,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	return NULL;
 }
 
-
 /*
  * NAME:        txRelease()
  *
@@ -908,7 +891,6 @@ static void txRelease(struct tblock * tblk)
 	TXN_UNLOCK();
 }
 
-
 /*
  * NAME:        txUnlock()
  *
@@ -996,7 +978,6 @@ static void txUnlock(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txMaplock()
  *
@@ -1069,7 +1050,6 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 	return tlck;
 }
 
-
 /*
  *      txLinelock()
  *
@@ -1103,8 +1083,6 @@ struct linelock *txLinelock(struct linelock * tlock)
 	return linelock;
 }
 
-
-
 /*
  *              transaction commit management
  *              -----------------------------
@@ -1373,7 +1351,6 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	return rc;
 }
 
-
 /*
  * NAME:        txLog()
  *
@@ -1437,7 +1414,6 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
 	return rc;
 }
 
-
 /*
  *      diLog()
  *
@@ -1465,7 +1441,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	if (tlck->type & tlckENTRY) {
 		/* log after-image for logredo(): */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-//              *pxd = mp->cm_pxd;
 		PXDaddress(pxd, mp->index);
 		PXDlength(pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -1552,7 +1527,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return rc;
 }
 
-
 /*
  *      dataLog()
  *
@@ -1599,7 +1573,6 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return 0;
 }
 
-
 /*
  *      dtLog()
  *
@@ -1639,7 +1612,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
 		else
 			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
-//              *pxd = mp->cm_pxd;
 		PXDaddress(pxd, mp->index);
 		PXDlength(pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -1704,7 +1676,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return;
 }
 
-
 /*
  *      xtLog()
  *
@@ -1760,7 +1731,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * applying the after-image to the meta-data page.
 		 */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-//              *page_pxd = mp->cm_pxd;
 		PXDaddress(page_pxd, mp->index);
 		PXDlength(page_pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -2093,7 +2063,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return;
 }
 
-
 /*
  *      mapLog()
  *
@@ -2180,7 +2149,6 @@ void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 }
 
-
 /*
  *      txEA()
  *
@@ -2233,7 +2201,6 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 	}
 }
 
-
 /*
  *      txForce()
  *
@@ -2300,7 +2267,6 @@ void txForce(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txUpdateMap()
  *
@@ -2437,7 +2403,6 @@ static void txUpdateMap(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txAllocPMap()
  *
@@ -2509,7 +2474,6 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 	}
 }
 
-
 /*
  *      txFreeMap()
  *
@@ -2611,7 +2575,6 @@ void txFreeMap(struct inode *ip,
 	}
 }
 
-
 /*
  *      txFreelock()
  *
@@ -2652,7 +2615,6 @@ void txFreelock(struct inode *ip)
 	TXN_UNLOCK();
 }
 
-
 /*
  *      txAbort()
  *
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index b71b82c2df0..59ad0f6b723 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -285,34 +285,26 @@ struct commit {
 /*
  * external declarations
  */
-extern struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage *mp,
-			    int flag);
-
-extern struct tlock *txMaplock(tid_t tid, struct inode *ip, int flag);
-
-extern int txCommit(tid_t tid, int nip, struct inode **iplist, int flag);
-
-extern tid_t txBegin(struct super_block *sb, int flag);
-
-extern void txBeginAnon(struct super_block *sb);
-
-extern void txEnd(tid_t tid);
-
-extern void txAbort(tid_t tid, int dirty);
-
-extern struct linelock *txLinelock(struct linelock * tlock);
-
-extern void txFreeMap(struct inode *ip, struct maplock * maplock,
-		      struct tblock * tblk, int maptype);
-
-extern void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea);
-
-extern void txFreelock(struct inode *ip);
-
-extern int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-		 struct tlock * tlck);
-
-extern void txQuiesce(struct super_block *sb);
-
-extern void txResume(struct super_block *sb);
+extern int jfs_tlocks_low;
+
+extern int txInit(void);
+extern void txExit(void);
+extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int);
+extern struct tlock *txMaplock(tid_t, struct inode *, int);
+extern int txCommit(tid_t, int, struct inode **, int);
+extern tid_t txBegin(struct super_block *, int);
+extern void txBeginAnon(struct super_block *);
+extern void txEnd(tid_t);
+extern void txAbort(tid_t, int);
+extern struct linelock *txLinelock(struct linelock *);
+extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int);
+extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *);
+extern void txFreelock(struct inode *);
+extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *,
+		 struct tlock *);
+extern void txQuiesce(struct super_block *);
+extern void txResume(struct super_block *);
+extern void txLazyUnlock(struct tblock *);
+extern int jfs_lazycommit(void *);
+extern int jfs_sync(void *);
 #endif				/* _H_JFS_TXNMGR */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 8413a368f44..1cae14e741e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -31,20 +31,9 @@
 #include "jfs_acl.h"
 #include "jfs_debug.h"
 
-extern struct inode_operations jfs_file_inode_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
-extern struct file_operations jfs_file_operations;
-extern struct address_space_operations jfs_aops;
-
-extern int jfs_fsync(struct file *, struct dentry *, int);
-extern void jfs_truncate_nolock(struct inode *, loff_t);
-extern int jfs_init_acl(struct inode *, struct inode *);
-
 /*
  * forward references
  */
-struct inode_operations jfs_dir_inode_operations;
-struct file_operations jfs_dir_operations;
 struct dentry_operations jfs_ci_dentry_operations;
 
 static s64 commitZeroLink(tid_t, struct inode *);
@@ -655,7 +644,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
 
 
 /*
- * NAME:	freeZeroLink()
+ * NAME:	jfs_free_zero_link()
  *
  * FUNCTION:    for non-directory, called by iClose(),
  *		free resources of a file from cache and WORKING map 
@@ -663,15 +652,12 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
  *		while associated with a pager object,
  *
  * PARAMETER:	ip	- pointer to inode of file.
- *
- * RETURN:	0 -ok
  */
-int freeZeroLink(struct inode *ip)
+void jfs_free_zero_link(struct inode *ip)
 {
-	int rc = 0;
 	int type;
 
-	jfs_info("freeZeroLink: ip = 0x%p", ip);
+	jfs_info("jfs_free_zero_link: ip = 0x%p", ip);
 
 	/* return if not reg or symbolic link or if size is
 	 * already ok.
@@ -684,10 +670,10 @@ int freeZeroLink(struct inode *ip)
 	case S_IFLNK:
 		/* if its contained in inode nothing to do */
 		if (ip->i_size < IDATASIZE)
-			return 0;
+			return;
 		break;
 	default:
-		return 0;
+		return;
 	}
 
 	/*
@@ -737,9 +723,7 @@ int freeZeroLink(struct inode *ip)
 	 * free xtree/data blocks from working block map;
 	 */
 	if (ip->i_size)
-		rc = xtTruncate(0, ip, 0, COMMIT_WMAP);
-
-	return rc;
+		xtTruncate(0, ip, 0, COMMIT_WMAP);
 }
 
 /*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5e774ed7fb6..810a3653d8b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -28,6 +28,7 @@
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
+#include "jfs_inode.h"
 #include "jfs_metapage.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
@@ -62,37 +63,6 @@ module_param(jfsloglevel, int, 0644);
 MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
 #endif
 
-/*
- * External declarations
- */
-extern int jfs_mount(struct super_block *);
-extern int jfs_mount_rw(struct super_block *, int);
-extern int jfs_umount(struct super_block *);
-extern int jfs_umount_rw(struct super_block *);
-
-extern int jfsIOWait(void *);
-extern int jfs_lazycommit(void *);
-extern int jfs_sync(void *);
-
-extern void jfs_read_inode(struct inode *inode);
-extern void jfs_dirty_inode(struct inode *inode);
-extern void jfs_delete_inode(struct inode *inode);
-extern int jfs_write_inode(struct inode *inode, int wait);
-
-extern struct dentry *jfs_get_parent(struct dentry *dentry);
-extern int jfs_extendfs(struct super_block *, s64, int);
-
-extern struct dentry_operations jfs_ci_dentry_operations;
-
-#ifdef PROC_FS_JFS		/* see jfs_debug.h */
-extern void jfs_proc_init(void);
-extern void jfs_proc_clean(void);
-#endif
-
-extern wait_queue_head_t jfs_IO_thread_wait;
-extern wait_queue_head_t jfs_commit_thread_wait;
-extern wait_queue_head_t jfs_sync_thread_wait;
-
 static void jfs_handle_error(struct super_block *sb)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
@@ -593,11 +563,6 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-extern int metapage_init(void);
-extern int txInit(void);
-extern void txExit(void);
-extern void metapage_exit(void);
-
 static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index ef4c07ee92b..287d8d6c3cf 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -1,5 +1,5 @@
 /*
- *   Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_xattr.h"
 
 static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-- 
cgit v1.2.3


From dcc9871270aa3b1bbe2e61cc9c1d80e9b2e8099d Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 4 May 2005 15:30:51 -0500
Subject: JFS: cleanup - remove unneeded sanity check

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 69007fd546e..cced2fed9d0 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -272,7 +272,6 @@ int dbMount(struct inode *ipbmap)
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
-	int i;
 
 	if (!(mounterror || isReadOnly(ipbmap)))
 		dbSync(ipbmap);
@@ -282,14 +281,6 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
 	 */
 	truncate_inode_pages(ipbmap->i_mapping, 0);
 
-	/*
-	 * Sanity Check
-	 */
-	for (i = 0; i < bmp->db_numag; i++)
-		if (atomic_read(&bmp->db_active[i]))
-			printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n",
-			       i, atomic_read(&bmp->db_active[i]));
-
 	/* free the memory for the in-memory bmap. */
 	kfree(bmp);
 
-- 
cgit v1.2.3


From 7a694ca74958b97ae2d437c8a730bddd9e9792c3 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 4 May 2005 15:31:14 -0500
Subject: JFS: Fix sparse warning

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_metapage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index aa1cc7be3a5..6c5485d16c3 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 	}
 }
 
-static inline struct metapage *alloc_metapage(int gfp_mask)
+static inline struct metapage *alloc_metapage(unsigned int gfp_mask)
 {
 	return mempool_alloc(metapage_mempool, gfp_mask);
 }
-- 
cgit v1.2.3


From 259692bd5a2b2c2d351dd90748ba4126bc2a21b9 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Mon, 9 May 2005 10:47:14 -0500
Subject: JFS: Remove redundant kfree() NULL pointer checks

kfree() can handle a NULL pointer, don't worry about passing it one.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c   | 6 ++----
 fs/jfs/xattr.c | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 8d2a9ab981d..30a2bf9eeda 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -70,8 +70,7 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 		if (!IS_ERR(acl))
 			*p_acl = posix_acl_dup(acl);
 	}
-	if (value)
-		kfree(value);
+	kfree(value);
 	return acl;
 }
 
@@ -112,8 +111,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	}
 	rc = __jfs_setxattr(inode, ea_name, value, size, 0);
 out:
-	if (value)
-		kfree(value);
+	kfree(value);
 
 	if (!rc) {
 		if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 7a9ffd5d03d..6016373701a 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -946,8 +946,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
       out:
 	up_write(&JFS_IP(inode)->xattr_sem);
 
-	if (os2name)
-		kfree(os2name);
+	kfree(os2name);
 
 	return rc;
 }
@@ -1042,8 +1041,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
       out:
 	up_read(&JFS_IP(inode)->xattr_sem);
 
-	if (os2name)
-		kfree(os2name);
+	kfree(os2name);
 
 	return size;
 }
-- 
cgit v1.2.3


From c2731509cfb538b9b38feaf657fab2334ea45253 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 2 Jun 2005 12:18:20 -0500
Subject: JFS: kernel BUG at fs/jfs/jfs_txnmgr.c:859

add_missing_indices() must set tlck->type to tlckBTROOT when modifying
a root btree root to avoid a trap in txRelease()

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dtree.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index ac41f72d6d5..8676aee3ae4 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -2931,6 +2931,9 @@ static void add_missing_indices(struct inode *inode, s64 bn)
 	ASSERT(p->header.flag & BT_LEAF);
 
 	tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
+	if (BT_IS_ROOT(mp))
+		tlck->type |= tlckBTROOT;
+
 	dtlck = (struct dt_lock *) &tlck->lock;
 
 	stbl = DT_GETSTBL(p);
-- 
cgit v1.2.3


From 72e3148a6e987974e3e949c5668e5ca812d7c818 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 3 Jun 2005 14:09:54 -0500
Subject: JFS: Fix compiler warning in jfs_logmgr.c

fs/jfs/jfs_logmgr.c: In function `jfs_flush_journal':
fs/jfs/jfs_logmgr.c:1632: warning: unused variable `mp'

Some debug code in jfs_flush_journal does nothing when CONFIG_JFS_DEBUG
is not defined.  Place the whole code segment within an ifdef to avoid
unnecessary code to be compiled and the warning to be issued.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_logmgr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index c5cc03bcae6..7c8387ed419 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1617,6 +1617,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 		}
 	}
 	assert(list_empty(&log->cqueue));
+
+#ifdef CONFIG_JFS_DEBUG
 	if (!list_empty(&log->synclist)) {
 		struct logsyncblk *lp;
 
@@ -1631,9 +1633,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 				dump_mem("orphan tblock", lp,
 					 sizeof(struct tblock));
 		}
-//		current->state = TASK_INTERRUPTIBLE;
-//		schedule();
 	}
+#endif
 	//assert(list_empty(&log->synclist));
 	clear_bit(log_FLUSH, &log->flag);
 }
-- 
cgit v1.2.3


From e3a15db2415579d5136b9ba9b52fe27c66da8780 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:31:08 -0500
Subject: [PATCH] sysfs_{create|remove}_link should take const char *

sysfs: make sysfs_{create|remove}_link to take const char * name.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index dfdf7017435..fae57c83a72 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -43,7 +43,7 @@ static void fill_object_path(struct kobject * kobj, char * buffer, int length)
 	}
 }
 
-static int sysfs_add_link(struct dentry * parent, char * name, struct kobject * target)
+static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
 {
 	struct sysfs_dirent * parent_sd = parent->d_fsdata;
 	struct sysfs_symlink * sl;
@@ -79,7 +79,7 @@ exit1:
  *	@target:	object we're pointing to.
  *	@name:		name of the symlink.
  */
-int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name)
+int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
 	struct dentry * dentry = kobj->dentry;
 	int error = 0;
@@ -99,13 +99,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * nam
  *	@name:	name of the symlink to remove.
  */
 
-void sysfs_remove_link(struct kobject * kobj, char * name)
+void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
 static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
-				   char *path)
+				 char *path)
 {
 	char * s;
 	int depth, size;
-- 
cgit v1.2.3


From c76d0abd07a9c9cf72bbb5b641e1e97f92ea8f3e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Fri, 29 Apr 2005 01:22:00 -0500
Subject: [PATCH] sysfs: if show/store is missing return -EIO

sysfs: if attribute does not implement show or store method
       read/write should return -EIO instead of 0 or -EINVAL.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/bin.c  | 4 ++--
 fs/sysfs/file.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d4aaa88d021..78899eeab97 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -25,7 +25,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 	struct kobject * kobj = to_kobj(dentry->d_parent);
 
 	if (!attr->read)
-		return -EINVAL;
+		return -EIO;
 
 	return attr->read(kobj, buffer, off, count);
 }
@@ -71,7 +71,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 	struct kobject *kobj = to_kobj(dentry->d_parent);
 
 	if (!attr->write)
-		return -EINVAL;
+		return -EIO;
 
 	return attr->write(kobj, buffer, offset, count);
 }
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 364208071e1..1481cae7d99 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -23,7 +23,7 @@ subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
 {
 	struct subsystem * s = to_subsys(kobj);
 	struct subsys_attribute * sattr = to_sattr(attr);
-	ssize_t ret = 0;
+	ssize_t ret = -EIO;
 
 	if (sattr->show)
 		ret = sattr->show(s,page);
@@ -36,7 +36,7 @@ subsys_attr_store(struct kobject * kobj, struct attribute * attr,
 {
 	struct subsystem * s = to_subsys(kobj);
 	struct subsys_attribute * sattr = to_sattr(attr);
-	ssize_t ret = 0;
+	ssize_t ret = -EIO;
 
 	if (sattr->store)
 		ret = sattr->store(s,page,count);
-- 
cgit v1.2.3


From 1db560afe629b682c45a7f4ba7edf98b4ee28518 Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Wed, 23 Mar 2005 10:02:26 -0800
Subject: [PATCH] class: convert the remaining class_simple users in the kernel
 to usee the new class api

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/coda/psdev.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index ef001a9313e..3d1cce3653b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -61,7 +61,7 @@ unsigned long coda_timeout = 30; /* .. secs, then signals will dequeue */
 
 
 struct venus_comm coda_comms[MAX_CODADEVS];
-static struct class_simple *coda_psdev_class;
+static struct class *coda_psdev_class;
 
 /*
  * Device operations
@@ -363,14 +363,14 @@ static int init_coda_psdev(void)
 		     CODA_PSDEV_MAJOR);
               return -EIO;
 	}
-	coda_psdev_class = class_simple_create(THIS_MODULE, "coda");
+	coda_psdev_class = class_create(THIS_MODULE, "coda");
 	if (IS_ERR(coda_psdev_class)) {
 		err = PTR_ERR(coda_psdev_class);
 		goto out_chrdev;
 	}		
 	devfs_mk_dir ("coda");
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_add(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), 
+		class_device_create(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i),
 				NULL, "cfs%d", i);
 		err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i),
 				S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i);
@@ -382,8 +382,8 @@ static int init_coda_psdev(void)
 
 out_class:
 	for (i = 0; i < MAX_CODADEVS; i++) 
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
-	class_simple_destroy(coda_psdev_class);
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
+	class_destroy(coda_psdev_class);
 out_chrdev:
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 out:
@@ -425,10 +425,10 @@ static int __init init_coda(void)
 	return 0;
 out:
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
 		devfs_remove("coda/%d", i);
 	}
-	class_simple_destroy(coda_psdev_class);
+	class_destroy(coda_psdev_class);
 	devfs_remove("coda");
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 	coda_sysctl_clean();
@@ -447,10 +447,10 @@ static void __exit exit_coda(void)
                 printk("coda: failed to unregister filesystem\n");
         }
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
 		devfs_remove("coda/%d", i);
 	}
-	class_simple_destroy(coda_psdev_class);
+	class_destroy(coda_psdev_class);
 	devfs_remove("coda");
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 	coda_sysctl_clean();
-- 
cgit v1.2.3


From acaefc25d21f850e47ecc5098d1e0bc442c526be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 May 2005 14:40:59 +0200
Subject: [PATCH] libfs: add simple attribute files

Based on the discussion about spufs attributes, this is my suggestion
for a more generic attribute file support that can be used by both
debugfs and spufs.

Simple attribute files behave similarly to sequential files from
a kernel programmers perspective in that a standard set of file
operations is provided and only an open operation needs to
be written that registers file specific get() and set() functions.

These operations are defined as

void foo_set(void *data, u64 val); and
u64 foo_get(void *data);

where data is the inode->u.generic_ip pointer of the file and the
operations just need to make send of that pointer. The infrastructure
makes sure this works correctly with concurrent access and partial
read calls.

A macro named DEFINE_SIMPLE_ATTRIBUTE is provided to further simplify
using the attributes.

This patch already contains the changes for debugfs to use attributes
for its internal file operations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/file.c |  67 ++++++++++++++++--------------------
 fs/libfs.c        | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 548556ff250..efc97d9b786 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -45,44 +45,15 @@ struct file_operations debugfs_file_operations = {
 	.open =		default_open,
 };
 
-#define simple_type(type, format, temptype, strtolfn)				\
-static ssize_t read_file_##type(struct file *file, char __user *user_buf,	\
-				size_t count, loff_t *ppos)			\
-{										\
-	char buf[32];								\
-	type *val = file->private_data;						\
-										\
-	snprintf(buf, sizeof(buf), format "\n", *val);				\
-	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));\
-}										\
-static ssize_t write_file_##type(struct file *file, const char __user *user_buf,\
-				 size_t count, loff_t *ppos)			\
-{										\
-	char *endp;								\
-	char buf[32];								\
-	int buf_size;								\
-	type *val = file->private_data;						\
-	temptype tmp;								\
-										\
-	memset(buf, 0x00, sizeof(buf));						\
-	buf_size = min(count, (sizeof(buf)-1));					\
-	if (copy_from_user(buf, user_buf, buf_size))				\
-		return -EFAULT;							\
-										\
-	tmp = strtolfn(buf, &endp, 0);						\
-	if ((endp == buf) || ((type)tmp != tmp))				\
-		return -EINVAL;							\
-	*val = tmp;								\
-	return count;								\
-}										\
-static struct file_operations fops_##type = {					\
-	.read =		read_file_##type,					\
-	.write =	write_file_##type,					\
-	.open =		default_open,						\
-};
-simple_type(u8, "%c", unsigned long, simple_strtoul);
-simple_type(u16, "%hi", unsigned long, simple_strtoul);
-simple_type(u32, "%i", unsigned long, simple_strtoul);
+static void debugfs_u8_set(void *data, u64 val)
+{
+	*(u8 *)data = val;
+}
+static u64 debugfs_u8_get(void *data)
+{
+	return *(u8 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
 
 /**
  * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
@@ -116,6 +87,16 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u8);
 
+static void debugfs_u16_set(void *data, u64 val)
+{
+	*(u16 *)data = val;
+}
+static u64 debugfs_u16_get(void *data)
+{
+	return *(u16 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
+
 /**
  * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
  *
@@ -148,6 +129,16 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u16);
 
+static void debugfs_u32_set(void *data, u64 val)
+{
+	*(u32 *)data = val;
+}
+static u64 debugfs_u32_get(void *data)
+{
+	return *(u32 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
+
 /**
  * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
  *
diff --git a/fs/libfs.c b/fs/libfs.c
index f90b2959592..5025563e737 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -519,6 +519,102 @@ int simple_transaction_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/* Simple attribute files */
+
+struct simple_attr {
+	u64 (*get)(void *);
+	void (*set)(void *, u64);
+	char get_buf[24];	/* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;	/* format for read operation */
+	struct semaphore sem;	/* protects access to these buffers */
+};
+
+/* simple_attr_open is called by an actual attribute open file operation
+ * to set the attribute specific access operations. */
+int simple_attr_open(struct inode *inode, struct file *file,
+		     u64 (*get)(void *), void (*set)(void *, u64),
+		     const char *fmt)
+{
+	struct simple_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->u.generic_ip;
+	attr->fmt = fmt;
+	init_MUTEX(&attr->sem);
+
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+int simple_attr_close(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+/* read from the buffer that is filled with the get function */
+ssize_t simple_attr_read(struct file *file, char __user *buf,
+			 size_t len, loff_t *ppos)
+{
+	struct simple_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+
+	if (!attr->get)
+		return -EACCES;
+
+	down(&attr->sem);
+	if (*ppos) /* continued read */
+		size = strlen(attr->get_buf);
+	else	  /* first read */
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt,
+				 (unsigned long long)attr->get(attr->data));
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+	up(&attr->sem);
+	return ret;
+}
+
+/* interpret the buffer as a number to call the set function with */
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+			  size_t len, loff_t *ppos)
+{
+	struct simple_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+
+	if (!attr->set)
+		return -EACCES;
+
+	down(&attr->sem);
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	up(&attr->sem);
+	return ret;
+}
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
@@ -547,3 +643,7 @@ EXPORT_SYMBOL(simple_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);
+EXPORT_SYMBOL_GPL(simple_attr_open);
+EXPORT_SYMBOL_GPL(simple_attr_close);
+EXPORT_SYMBOL_GPL(simple_attr_read);
+EXPORT_SYMBOL_GPL(simple_attr_write);
-- 
cgit v1.2.3


From 6fa5c828c7fb6beef7035864bd2b18e7386fbdd5 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Tue, 31 May 2005 10:38:12 +0530
Subject: [PATCH] sysfs-iattr: attach sysfs_dirent before new inode

o The following patch makes sure to attach sysfs_dirent to the dentry before
  allocation a new inode through sysfs_create(). This change is done as
  preparatory work for implementing ->i_op->setattr() functionality for
  sysfs objects.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index fe198210bc2..5cf3270014c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -101,18 +101,19 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	down(&p->d_inode->i_sem);
 	*d = sysfs_get_dentry(p,n);
 	if (!IS_ERR(*d)) {
-		error = sysfs_create(*d, mode, init_dir);
+		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
 		if (!error) {
-			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
-						SYSFS_DIR);
+			error = sysfs_create(*d, mode, init_dir);
 			if (!error) {
 				p->d_inode->i_nlink++;
 				(*d)->d_op = &sysfs_dentry_ops;
 				d_rehash(*d);
 			}
 		}
-		if (error && (error != -EEXIST))
+		if (error && (error != -EEXIST)) {
+			sysfs_put((*d)->d_fsdata);
 			d_drop(*d);
+		}
 		dput(*d);
 	} else
 		error = PTR_ERR(*d);
@@ -171,17 +172,19 @@ static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
                 init = init_file;
         }
 
+	dentry->d_fsdata = sysfs_get(sd);
+	sd->s_dentry = dentry;
 	error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
-	if (error)
+	if (error) {
+		sysfs_put(sd);
 		return error;
+	}
 
         if (bin_attr) {
 		dentry->d_inode->i_size = bin_attr->size;
 		dentry->d_inode->i_fop = &bin_fops;
 	}
 	dentry->d_op = &sysfs_dentry_ops;
-	dentry->d_fsdata = sysfs_get(sd);
-	sd->s_dentry = dentry;
 	d_rehash(dentry);
 
 	return 0;
@@ -191,13 +194,15 @@ static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
 {
 	int err = 0;
 
+	dentry->d_fsdata = sysfs_get(sd);
+	sd->s_dentry = dentry;
 	err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
 	if (!err) {
 		dentry->d_op = &sysfs_dentry_ops;
-		dentry->d_fsdata = sysfs_get(sd);
-		sd->s_dentry = dentry;
 		d_rehash(dentry);
-	}
+	} else
+		sysfs_put(sd);
+
 	return err;
 }
 
-- 
cgit v1.2.3


From 988d186de5b6966a71a8cc52e6cb4895fd2f7799 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Tue, 31 May 2005 10:39:14 +0530
Subject: [PATCH] sysfs-iattr: add sysfs_setattr

o This adds ->i_op->setattr VFS method for sysfs inodes. The changed
  attribues are saved in the persistent sysfs_dirent structure as a pointer
  to struct iattr. The struct iattr is allocated only for those sysfs_dirent's
  for which default attributes are getting changed. Thanks to Jon Smirl for
  this suggestion.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   |  1 +
 fs/sysfs/inode.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/sysfs/sysfs.h |  2 ++
 3 files changed, 68 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5cf3270014c..37d7a6875d8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -233,6 +233,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 
 struct inode_operations sysfs_dir_inode_operations = {
 	.lookup		= sysfs_lookup,
+	.setattr	= sysfs_setattr,
 };
 
 static void remove_dir(struct dentry * d)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index aff7b2dfa8e..400c90be568 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -26,6 +26,71 @@ static struct backing_dev_info sysfs_backing_dev_info = {
 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
 
+static struct inode_operations sysfs_inode_operations ={
+	.setattr	= sysfs_setattr,
+};
+
+int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
+{
+	struct inode * inode = dentry->d_inode;
+	struct sysfs_dirent * sd = dentry->d_fsdata;
+	struct iattr * sd_iattr;
+	unsigned int ia_valid = iattr->ia_valid;
+	int error;
+
+	if (!sd)
+		return -EINVAL;
+
+	sd_iattr = sd->s_iattr;
+
+	error = inode_change_ok(inode, iattr);
+	if (error)
+		return error;
+
+	error = inode_setattr(inode, iattr);
+	if (error)
+		return error;
+
+	if (!sd_iattr) {
+		/* setting attributes for the first time, allocate now */
+		sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+		if (!sd_iattr)
+			return -ENOMEM;
+		/* assign default attributes */
+		memset(sd_iattr, 0, sizeof(struct iattr));
+		sd_iattr->ia_mode = sd->s_mode;
+		sd_iattr->ia_uid = 0;
+		sd_iattr->ia_gid = 0;
+		sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
+		sd->s_iattr = sd_iattr;
+	}
+
+	/* attributes were changed atleast once in past */
+
+	if (ia_valid & ATTR_UID)
+		sd_iattr->ia_uid = iattr->ia_uid;
+	if (ia_valid & ATTR_GID)
+		sd_iattr->ia_gid = iattr->ia_gid;
+	if (ia_valid & ATTR_ATIME)
+		sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MTIME)
+		sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_CTIME)
+		sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+		sd_iattr->ia_mode = mode;
+	}
+
+	return error;
+}
+
 struct inode * sysfs_new_inode(mode_t mode)
 {
 	struct inode * inode = new_inode(sysfs_sb);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a8a24a0c0b3..e8e9f0c2573 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -17,6 +17,7 @@ extern void sysfs_remove_subdir(struct dentry *);
 
 extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
 extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
+extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
 extern struct super_block * sysfs_sb;
@@ -75,6 +76,7 @@ static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
 		kobject_put(sl->target_kobj);
 		kfree(sl);
 	}
+	kfree(sd->s_iattr);
 	kmem_cache_free(sysfs_dir_cachep, sd);
 }
 
-- 
cgit v1.2.3


From 8215534ce7d073423bfa9c17405c43ab7636ca03 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Tue, 31 May 2005 10:39:52 +0530
Subject: [PATCH] sysfs-iattr: set inode attributes

o Following patch sets the attributes for newly allocated inodes for sysfs
  objects. If the object has non-default attributes, inode attributes are
  set as saved in sysfs_dirent->s_iattr, pointer to struct iattr.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/inode.c | 37 +++++++++++++++++++++++++++++++------
 fs/sysfs/mount.c |  4 +++-
 fs/sysfs/sysfs.h |  2 +-
 3 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 400c90be568..565cac1d420 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -91,18 +91,42 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 	return error;
 }
 
-struct inode * sysfs_new_inode(mode_t mode)
+static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+{
+	inode->i_mode = mode;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+}
+
+static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
+{
+	inode->i_mode = iattr->ia_mode;
+	inode->i_uid = iattr->ia_uid;
+	inode->i_gid = iattr->ia_gid;
+	inode->i_atime = iattr->ia_atime;
+	inode->i_mtime = iattr->ia_mtime;
+	inode->i_ctime = iattr->ia_ctime;
+}
+
+struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
 {
 	struct inode * inode = new_inode(sysfs_sb);
 	if (inode) {
-		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
 		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_mapping->a_ops = &sysfs_aops;
 		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+		inode->i_op = &sysfs_inode_operations;
+
+		if (sd->s_iattr) {
+			/* sysfs_dirent has non-default attributes
+			 * get them for the new inode from persistent copy
+			 * in sysfs_dirent
+			 */
+			set_inode_attr(inode, sd->s_iattr);
+		} else
+			set_default_inode_attr(inode, mode);
 	}
 	return inode;
 }
@@ -113,7 +137,8 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 	struct inode * inode = NULL;
 	if (dentry) {
 		if (!dentry->d_inode) {
-			if ((inode = sysfs_new_inode(mode))) {
+			struct sysfs_dirent * sd = dentry->d_fsdata;
+			if ((inode = sysfs_new_inode(mode, sd))) {
 				if (dentry->d_parent && dentry->d_parent->d_inode) {
 					struct inode *p_inode = dentry->d_parent->d_inode;
 					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 5c805bb1a4b..f1117e885bd 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -28,6 +28,7 @@ static struct sysfs_dirent sysfs_root = {
 	.s_children	= LIST_HEAD_INIT(sysfs_root.s_children),
 	.s_element	= NULL,
 	.s_type		= SYSFS_ROOT,
+	.s_iattr	= NULL,
 };
 
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
@@ -42,7 +43,8 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 	sysfs_sb = sb;
 
-	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+				 &sysfs_root);
 	if (inode) {
 		inode->i_op = &sysfs_dir_inode_operations;
 		inode->i_fop = &sysfs_dir_operations;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index e8e9f0c2573..29da6f5f07c 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -2,7 +2,7 @@
 extern struct vfsmount * sysfs_mount;
 extern kmem_cache_t *sysfs_dir_cachep;
 
-extern struct inode * sysfs_new_inode(mode_t mode);
+extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
-- 
cgit v1.2.3


From 9d9d27fb651a7c95a46f276bacb4329db47470a6 Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Tue, 14 Jun 2005 09:54:54 -0400
Subject: [PATCH] SYSFS: fix PAGE_SIZE check

Without this change I can't set an attribute exactly PAGE_SIZE in
length. There is no need for zero termination because the interface
uses lengths.

From: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1481cae7d99..849aac11546 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -182,7 +182,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 		return -ENOMEM;
 
 	if (count >= PAGE_SIZE)
-		count = PAGE_SIZE - 1;
+		count = PAGE_SIZE;
 	error = copy_from_user(buffer->page,buf,count);
 	buffer->needs_read_fill = 1;
 	return error ? -EFAULT : count;
-- 
cgit v1.2.3


From b74e2159c9849fb97659c6dc47ad706f702b22b9 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 13:21:49 +1000
Subject: [XFS] Add a get/set interface for XFS project identifiers.

SGI-PV: 932952
SGI-Modid: xfs-linux:xfs-kern:21938a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c   | 22 ++++++++++++++++++++++
 fs/xfs/linux-2.6/xfs_ioctl32.c |  2 ++
 fs/xfs/linux-2.6/xfs_vnode.h   |  2 +-
 fs/xfs/xfs_fs.h                |  2 ++
 fs/xfs/xfs_types.h             |  2 +-
 5 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 69809eef8a5..df17d93bd09 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -777,6 +777,8 @@ xfs_ioctl(
 	case XFS_IOC_GETVERSION:
 	case XFS_IOC_GETXFLAGS:
 	case XFS_IOC_SETXFLAGS:
+	case XFS_IOC_GETPROJID:
+	case XFS_IOC_SETPROJID:
 	case XFS_IOC_FSGETXATTR:
 	case XFS_IOC_FSSETXATTR:
 	case XFS_IOC_FSGETXATTRA:
@@ -1258,6 +1260,26 @@ xfs_ioc_xattr(
 		return 0;
 	}
 
+	case XFS_IOC_GETPROJID: {
+		va.va_mask = XFS_AT_PROJID;
+		VOP_GETATTR(vp, &va, 0, NULL, error);
+		if (error)
+			return -error;
+		if (copy_to_user(arg, &va.va_projid, sizeof(va.va_projid)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SETPROJID: {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		va.va_mask = XFS_AT_PROJID;
+		if (copy_from_user(&va.va_projid, arg, sizeof(va.va_projid)))
+			return -XFS_ERROR(EFAULT);
+		VOP_SETATTR(vp, &va, 0, NULL, error);
+		return -error;
+	}
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0f8f1384eb3..be72aca5944 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -100,6 +100,8 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 	case XFS_IOC_GETBMAP:
 	case XFS_IOC_GETBMAPA:
 	case XFS_IOC_GETBMAPX:
+	case XFS_IOC_SETPROJID:
+	case XFS_IOC_GETPROJID:
 /* not handled
 	case XFS_IOC_FD_TO_HANDLE:
 	case XFS_IOC_PATH_TO_HANDLE:
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 00466c3194a..a6e57c647be 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -426,7 +426,7 @@ typedef struct vattr {
 	u_long		va_extsize;	/* file extent size */
 	u_long		va_nextents;	/* number of extents in file */
 	u_long		va_anextents;	/* number of attr extents in file */
-	int		va_projid;	/* project id */
+	prid_t		va_projid;	/* project id */
 } vattr_t;
 
 /*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6ee8443bf9d..a7bd4687fa5 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -477,6 +477,8 @@ typedef struct xfs_handle {
 /*	XFS_IOC_SETBIOSIZE ---- deprecated 46	   */
 /*	XFS_IOC_GETBIOSIZE ---- deprecated 47	   */
 #define XFS_IOC_GETBMAPX	_IOWR('X', 56, struct getbmap)
+#define XFS_IOC_SETPROJID	_IOWR('X', 57, __uint32_t)
+#define XFS_IOC_GETPROJID	_IOWR('X', 58, __uint32_t)
 
 /*
  * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index e4bf711e48f..16f5371ce10 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -55,7 +55,7 @@ typedef signed long long int	__int64_t;
 typedef unsigned long long int	__uint64_t;
 
 typedef enum { B_FALSE,B_TRUE }	boolean_t;
-typedef __int64_t		prid_t;		/* project ID */
+typedef __uint32_t		prid_t;		/* project ID */
 typedef __uint32_t		inst_t;		/* an instruction */
 
 typedef __s64			xfs_off_t;	/* <file offset> type */
-- 
cgit v1.2.3


From 6fac0cb46bc4c50d6cbc5998ad206435f39fb00f Mon Sep 17 00:00:00 2001
From: Dean Roehrich <roehrich@sgi.com>
Date: Tue, 21 Jun 2005 14:07:45 +1000
Subject: [XFS] coordinate mmap calls with xfs_dm_punch_hole

SGI-PV: 933551
SGI-Modid: xfs-linux:xfs-kern:190622a

Signed-off-by: Dean Roehrich <roehrich@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 27 ++++++++++++++++++++++++---
 fs/xfs/xfs_dmapi.h          |  4 ++++
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 24fa3b101b9..f1ce4323f56 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -57,7 +57,9 @@
 #include <linux/smp_lock.h>
 
 static struct vm_operations_struct linvfs_file_vm_ops;
-
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct linvfs_dmapi_file_vm_ops;
+#endif
 
 STATIC inline ssize_t
 __linvfs_read(
@@ -388,6 +390,14 @@ done:
 	return -error;
 }
 
+#ifdef CONFIG_XFS_DMAPI
+STATIC void
+linvfs_mmap_close(
+	struct vm_area_struct	*vma)
+{
+	xfs_dm_mm_put(vma);
+}
+#endif /* CONFIG_XFS_DMAPI */
 
 STATIC int
 linvfs_file_mmap(
@@ -399,16 +409,19 @@ linvfs_file_mmap(
 	vattr_t		va = { .va_mask = XFS_AT_UPDATIME };
 	int		error;
 
+	vma->vm_ops = &linvfs_file_vm_ops;
+
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
 		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 
 		error = -XFS_SEND_MMAP(mp, vma, 0);
 		if (error)
 			return error;
+#ifdef CONFIG_XFS_DMAPI
+		vma->vm_ops = &linvfs_dmapi_file_vm_ops;
+#endif
 	}
 
-	vma->vm_ops = &linvfs_file_vm_ops;
-
 	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
 	if (!error)
 		vn_revalidate(vp);	/* update Linux inode flags */
@@ -609,7 +622,15 @@ struct file_operations linvfs_dir_operations = {
 static struct vm_operations_struct linvfs_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
+};
+
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
+	.close		= linvfs_mmap_close,
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
 	.mprotect	= linvfs_mprotect,
 #endif
 };
+#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 55ae3e67d24..16cf9f7a478 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -209,4 +209,8 @@ void xfs_dm_exit(struct file_system_type *);
 #define XFS_DM_EXIT(fstype)
 #endif
 
+#define HAVE_XFS_DM_MM
+int xfs_dm_mm_get(struct vm_area_struct *vma);
+void xfs_dm_mm_put(struct vm_area_struct *vma);
+
 #endif  /* __XFS_DMAPI_H__ */
-- 
cgit v1.2.3


From 23ea4032c875fc75c2363a9bcaba87cfb134ff68 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:14:01 +1000
Subject: [XFS] rename various pagebuf symbols to xfsbuf

SGI-PV: 908809
SGI-Modid: xfs-linux:xfs-kern:192348a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 143 ++++++++++++++++++++++++---------------------
 1 file changed, 77 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 997963e5362..f5676ed5136 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -61,12 +61,13 @@
  * File wide globals
  */
 
-STATIC kmem_cache_t *pagebuf_cache;
+STATIC kmem_cache_t *pagebuf_zone;
 STATIC kmem_shaker_t pagebuf_shake;
-STATIC int pagebuf_daemon_wakeup(int, unsigned int);
+STATIC int xfsbufd_wakeup(int, unsigned int);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
-STATIC struct workqueue_struct *pagebuf_logio_workqueue;
-STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+STATIC struct workqueue_struct *xfslogd_workqueue;
+STATIC struct workqueue_struct *xfsdatad_workqueue;
 
 /*
  * Pagebuf debugging
@@ -123,9 +124,9 @@ ktrace_t *pagebuf_trace_buf;
 
 
 #define pagebuf_allocate(flags) \
-	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+	kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
 #define pagebuf_deallocate(pb) \
-	kmem_zone_free(pagebuf_cache, (pb));
+	kmem_zone_free(pagebuf_zone, (pb));
 
 /*
  * Page Region interfaces.
@@ -425,7 +426,7 @@ _pagebuf_lookup_pages(
 					__FUNCTION__, gfp_mask);
 
 			XFS_STATS_INC(pb_page_retries);
-			pagebuf_daemon_wakeup(0, gfp_mask);
+			xfsbufd_wakeup(0, gfp_mask);
 			blk_congestion_wait(WRITE, HZ/50);
 			goto retry;
 		}
@@ -1136,8 +1137,8 @@ pagebuf_iodone(
 	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
 		if (schedule) {
 			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
-			queue_work(dataio ? pagebuf_dataio_workqueue :
-				pagebuf_logio_workqueue, &pb->pb_iodone_work);
+			queue_work(dataio ? xfsdatad_workqueue :
+				xfslogd_workqueue, &pb->pb_iodone_work);
 		} else {
 			pagebuf_iodone_work(pb);
 		}
@@ -1742,27 +1743,27 @@ pagebuf_runall_queues(
 }
 
 /* Defines for pagebuf daemon */
-STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
-STATIC struct task_struct *pagebuf_daemon_task;
-STATIC int pagebuf_daemon_active;
-STATIC int force_flush;
-STATIC int force_sleep;
+STATIC DECLARE_COMPLETION(xfsbufd_done);
+STATIC struct task_struct *xfsbufd_task;
+STATIC int xfsbufd_active;
+STATIC int xfsbufd_force_flush;
+STATIC int xfsbufd_force_sleep;
 
 STATIC int
-pagebuf_daemon_wakeup(
+xfsbufd_wakeup(
 	int			priority,
 	unsigned int		mask)
 {
-	if (force_sleep)
+	if (xfsbufd_force_sleep)
 		return 0;
-	force_flush = 1;
+	xfsbufd_force_flush = 1;
 	barrier();
-	wake_up_process(pagebuf_daemon_task);
+	wake_up_process(xfsbufd_task);
 	return 0;
 }
 
 STATIC int
-pagebuf_daemon(
+xfsbufd(
 	void			*data)
 {
 	struct list_head	tmp;
@@ -1774,17 +1775,17 @@ pagebuf_daemon(
 	daemonize("xfsbufd");
 	current->flags |= PF_MEMALLOC;
 
-	pagebuf_daemon_task = current;
-	pagebuf_daemon_active = 1;
+	xfsbufd_task = current;
+	xfsbufd_active = 1;
 	barrier();
 
 	INIT_LIST_HEAD(&tmp);
 	do {
 		if (unlikely(current->flags & PF_FREEZE)) {
-			force_sleep = 1;
+			xfsbufd_force_sleep = 1;
 			refrigerator(PF_FREEZE);
 		} else {
-			force_sleep = 0;
+			xfsbufd_force_sleep = 0;
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1797,7 +1798,7 @@ pagebuf_daemon(
 			ASSERT(pb->pb_flags & PBF_DELWRI);
 
 			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
-				if (!force_flush &&
+				if (!xfsbufd_force_flush &&
 				    time_before(jiffies,
 						pb->pb_queuetime + age)) {
 					pagebuf_unlock(pb);
@@ -1824,10 +1825,10 @@ pagebuf_daemon(
 		if (as_list_len > 0)
 			purge_addresses();
 
-		force_flush = 0;
-	} while (pagebuf_daemon_active);
+		xfsbufd_force_flush = 0;
+	} while (xfsbufd_active);
 
-	complete_and_exit(&pagebuf_daemon_done, 0);
+	complete_and_exit(&xfsbufd_done, 0);
 }
 
 /*
@@ -1844,8 +1845,8 @@ xfs_flush_buftarg(
 	xfs_buf_t		*pb, *n;
 	int			pincount = 0;
 
-	pagebuf_runall_queues(pagebuf_dataio_workqueue);
-	pagebuf_runall_queues(pagebuf_logio_workqueue);
+	pagebuf_runall_queues(xfsdatad_workqueue);
+	pagebuf_runall_queues(xfslogd_workqueue);
 
 	INIT_LIST_HEAD(&tmp);
 	spin_lock(&pbd_delwrite_lock);
@@ -1898,43 +1899,43 @@ xfs_flush_buftarg(
 }
 
 STATIC int
-pagebuf_daemon_start(void)
+xfs_buf_daemons_start(void)
 {
-	int		rval;
+	int		error = -ENOMEM;
 
-	pagebuf_logio_workqueue = create_workqueue("xfslogd");
-	if (!pagebuf_logio_workqueue)
-		return -ENOMEM;
+	xfslogd_workqueue = create_workqueue("xfslogd");
+	if (!xfslogd_workqueue)
+		goto out;
 
-	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
-	if (!pagebuf_dataio_workqueue) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		return -ENOMEM;
-	}
+	xfsdatad_workqueue = create_workqueue("xfsdatad");
+	if (!xfsdatad_workqueue)
+		goto out_destroy_xfslogd_workqueue;
 
-	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
-	if (rval < 0) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		destroy_workqueue(pagebuf_dataio_workqueue);
-	}
+	error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES);
+	if (error < 0)
+		goto out_destroy_xfsdatad_workqueue;
+	return 0;
 
-	return rval;
+ out_destroy_xfsdatad_workqueue:
+	destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+	destroy_workqueue(xfslogd_workqueue);
+ out:
+	return error;
 }
 
 /*
- * pagebuf_daemon_stop
- *
  * Note: do not mark as __exit, it is called from pagebuf_terminate.
  */
 STATIC void
-pagebuf_daemon_stop(void)
+xfs_buf_daemons_stop(void)
 {
-	pagebuf_daemon_active = 0;
+	xfsbufd_active = 0;
 	barrier();
-	wait_for_completion(&pagebuf_daemon_done);
+	wait_for_completion(&xfsbufd_done);
 
-	destroy_workqueue(pagebuf_logio_workqueue);
-	destroy_workqueue(pagebuf_dataio_workqueue);
+	destroy_workqueue(xfslogd_workqueue);
+	destroy_workqueue(xfsdatad_workqueue);
 }
 
 /*
@@ -1944,27 +1945,37 @@ pagebuf_daemon_stop(void)
 int __init
 pagebuf_init(void)
 {
-	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (pagebuf_cache == NULL) {
-		printk("XFS: couldn't init xfs_buf_t cache\n");
-		pagebuf_terminate();
-		return -ENOMEM;
-	}
+	int		error = -ENOMEM;
+
+	pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+	if (!pagebuf_zone)
+		goto out;
 
 #ifdef PAGEBUF_TRACE
 	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
 #endif
 
-	pagebuf_daemon_start();
+	error = xfs_buf_daemons_start();
+	if (!error)
+		goto out_free_buf_zone;
 
-	pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
-	if (pagebuf_shake == NULL) {
-		pagebuf_terminate();
-		return -ENOMEM;
+	pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
+	if (!pagebuf_shake) {
+		error = -ENOMEM;
+		goto out_stop_daemons;
 	}
 
 	return 0;
+
+ out_stop_daemons:
+	xfs_buf_daemons_stop();
+ out_free_buf_zone:
+#ifdef PAGEBUF_TRACE
+	ktrace_free(pagebuf_trace_buf);
+#endif
+	kmem_zone_destroy(pagebuf_zone);
+ out:
+	return error;
 }
 
 
@@ -1976,12 +1987,12 @@ pagebuf_init(void)
 void
 pagebuf_terminate(void)
 {
-	pagebuf_daemon_stop();
+	xfs_buf_daemons_stop();
 
 #ifdef PAGEBUF_TRACE
 	ktrace_free(pagebuf_trace_buf);
 #endif
 
-	kmem_zone_destroy(pagebuf_cache);
+	kmem_zone_destroy(pagebuf_zone);
 	kmem_shake_deregister(pagebuf_shake);
 }
-- 
cgit v1.2.3


From 02de1f0abfc60aa4fead65eee4118d05667c93c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:33:48 +1000
Subject: [XFS] fix some more compiler warnings in the vnode tracing code

SGI-PV: 934679
SGI-Modid: xfs-linux:xfs-kern:192570a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vnode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index a832d165f24..250cad54e89 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -411,13 +411,13 @@ vn_remove(
 /*  0 */		(void *)(__psint_t)(vk),		\
 /*  1 */		(void *)(s),				\
 /*  2 */		(void *)(__psint_t) line,		\
-/*  3 */		(void *)(vn_count(vp)), \
+/*  3 */		(void *)(__psint_t)(vn_count(vp)),	\
 /*  4 */		(void *)(ra),				\
 /*  5 */		(void *)(__psunsigned_t)(vp)->v_flag,	\
 /*  6 */		(void *)(__psint_t)current_cpu(),	\
 /*  7 */		(void *)(__psint_t)current_pid(),	\
 /*  8 */		(void *)__return_address,		\
-/*  9 */		0, 0, 0, 0, 0, 0, 0)
+/*  9 */		NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 
 /*
  * Vnode tracing code.
-- 
cgit v1.2.3


From cf9937c6c6c7edb6650411d1cf3cb57f072b1277 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:35:24 +1000
Subject: [XFS] Fix pagebuf slab initialization

SGI-PV: 908809
SGI-Modid: xfs-linux:xfs-kern:192756a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f5676ed5136..049f8711617 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1956,7 +1956,7 @@ pagebuf_init(void)
 #endif
 
 	error = xfs_buf_daemons_start();
-	if (!error)
+	if (error)
 		goto out_free_buf_zone;
 
 	pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
-- 
cgit v1.2.3


From 4372d6e10349d4e8b012588f86f15c740c73a7c4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:36:00 +1000
Subject: [XFS] Remove dead code.  Patch from Adrian Bunk

SGI-PV: 936255
SGI-Modid: xfs-linux:xfs-kern:192759a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot.c | 62 ------------------------------------------------
 fs/xfs/quota/xfs_dquot.h |  1 -
 fs/xfs/xfs_bmap_btree.c  | 14 -----------
 fs/xfs/xfs_bmap_btree.h  |  8 -------
 fs/xfs/xfs_fsops.c       | 26 --------------------
 fs/xfs/xfs_inode.c       | 45 -----------------------------------
 fs/xfs/xfs_rename.c      | 18 --------------
 fs/xfs/xfs_trans.c       | 19 ---------------
 fs/xfs/xfs_trans.h       |  3 ---
 fs/xfs/xfs_trans_inode.c | 18 --------------
 10 files changed, 214 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 740d20d3318..c961ef7d847 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -370,68 +370,6 @@ xfs_qm_adjust_dqtimers(
 	}
 }
 
-/*
- * Increment or reset warnings of a given dquot.
- */
-int
-xfs_qm_dqwarn(
-	xfs_disk_dquot_t	*d,
-	uint			flags)
-{
-	int	warned;
-
-	/*
-	 * root's limits are not real limits.
-	 */
-	if (!d->d_id)
-		return (0);
-
-	warned = 0;
-	if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
-	    (INT_GET(d->d_bcount, ARCH_CONVERT) >=
-	     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_bwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if (!d->d_blk_softlimit ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) <
-		     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-			d->d_bwarns = 0;
-		}
-	}
-
-	if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 &&
-	    (INT_GET(d->d_icount, ARCH_CONVERT) >=
-	     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_iwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if (!d->d_ino_softlimit ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) <
-		     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-			d->d_iwarns = 0;
-		}
-	}
-#ifdef QUOTADEBUG
-	if (INT_GET(d->d_iwarns, ARCH_CONVERT))
-		cmn_err(CE_DEBUG,
-			"--------@@Inode warnings running : %Lu >= %Lu",
-			INT_GET(d->d_icount, ARCH_CONVERT),
-			INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_bwarns, ARCH_CONVERT))
-		cmn_err(CE_DEBUG,
-			"--------@@Blks warnings running : %Lu >= %Lu",
-			INT_GET(d->d_bcount, ARCH_CONVERT),
-			INT_GET(d->d_blk_softlimit, ARCH_CONVERT));
-#endif
-	return (warned);
-}
-
-
 /*
  * initialize a buffer full of dquots and log the whole thing
  */
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 0c3fe3175ba..35aeeafe479 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -211,7 +211,6 @@ extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
 					xfs_disk_dquot_t *);
 extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 					xfs_disk_dquot_t *);
-extern int		xfs_qm_dqwarn(xfs_disk_dquot_t *, uint);
 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 163305a79fc..09c413576ba 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2331,20 +2331,6 @@ xfs_bmbt_lookup_ge(
 	return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
 }
 
-int					/* error */
-xfs_bmbt_lookup_le(
-	xfs_btree_cur_t	*cur,
-	xfs_fileoff_t	off,
-	xfs_fsblock_t	bno,
-	xfs_filblks_t	len,
-	int		*stat)		/* success/failure */
-{
-	cur->bc_rec.b.br_startoff = off;
-	cur->bc_rec.b.br_startblock = bno;
-	cur->bc_rec.b.br_blockcount = len;
-	return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat);
-}
-
 /*
  * Give the bmap btree a new root block.  Copy the old broot contents
  * down into a real block and make the broot point to it.
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 843ff12b4bf..0a40cf126c2 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -580,14 +580,6 @@ xfs_bmbt_lookup_ge(
 	xfs_filblks_t,
 	int *);
 
-int
-xfs_bmbt_lookup_le(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t,
-	xfs_fsblock_t,
-	xfs_filblks_t,
-	int *);
-
 /*
  * Give the bmap btree a new root block.  Copy the old broot contents
  * down into a real block and make the broot point to it.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 21213057c27..ca535d61319 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -559,32 +559,6 @@ xfs_reserve_blocks(
 	return(0);
 }
 
-void
-xfs_fs_log_dummy(xfs_mount_t *mp)
-{
-	xfs_trans_t *tp;
-	xfs_inode_t *ip;
-
-
-	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
-	atomic_inc(&mp->m_active_trans);
-	if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
-		xfs_trans_cancel(tp, 0);
-		return;
-	}
-
-	ip = mp->m_rootip;
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_ihold(tp, ip);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	xfs_trans_set_sync(tp);
-	xfs_trans_commit(tp, 0, NULL);
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-}
-
 int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bc8c8c7f903..e8274d6b723 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -145,51 +145,6 @@ xfs_inobp_check(
 }
 #endif
 
-/*
- * called from bwrite on xfs inode buffers
- */
-void
-xfs_inobp_bwcheck(xfs_buf_t *bp)
-{
-	xfs_mount_t	*mp;
-	int		i;
-	int		j;
-	xfs_dinode_t	*dip;
-
-	ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL);
-
-	mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
-
-
-	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-	for (i = 0; i < j; i++)  {
-		dip = (xfs_dinode_t *) xfs_buf_offset(bp,
-						i * mp->m_sb.sb_inodesize);
-		if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
-			cmn_err(CE_WARN,
-"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x",
-				INT_GET(dip->di_core.di_magic, ARCH_CONVERT),
-				(__uint64_t)(__psunsigned_t) bp,
-				(__int64_t) XFS_BUF_ADDR(bp),
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-			xfs_fs_cmn_err(CE_WARN, mp,
-				"corrupt, unmount and run xfs_repair");
-		}
-		if (!dip->di_next_unlinked)  {
-			cmn_err(CE_WARN,
-"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x",
-				(__uint64_t)(__psunsigned_t) bp,
-				(__int64_t) XFS_BUF_ADDR(bp),
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-			xfs_fs_cmn_err(CE_WARN, mp,
-				"corrupt, unmount and run xfs_repair");
-		}
-	}
-
-	return;
-}
-
 /*
  * This routine is called to map an inode number within a file
  * system to the buffer containing the on-disk version of the
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index cb13f9a1d45..23b48ac1cb7 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -234,9 +234,6 @@ xfs_lock_for_rename(
 	return 0;
 }
 
-
-int rename_which_error_return = 0;
-
 /*
  * xfs_rename
  */
@@ -316,7 +313,6 @@ xfs_rename(
 			&num_inodes);
 
 	if (error) {
-		rename_which_error_return = __LINE__;
 		/*
 		 * We have nothing locked, no inode references, and
 		 * no transaction, so just get out.
@@ -332,7 +328,6 @@ xfs_rename(
 		 */
 		if (target_ip == NULL && (src_dp != target_dp) &&
 		    target_dp->i_d.di_nlink >= XFS_MAXLINK) {
-			rename_which_error_return = __LINE__;
 			error = XFS_ERROR(EMLINK);
 			xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
 			goto rele_return;
@@ -359,7 +354,6 @@ xfs_rename(
 				XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
 	}
 	if (error) {
-		rename_which_error_return = __LINE__;
 		xfs_trans_cancel(tp, 0);
 		goto rele_return;
 	}
@@ -369,7 +363,6 @@ xfs_rename(
 	 */
 	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
 		xfs_trans_cancel(tp, cancel_flags);
-		rename_which_error_return = __LINE__;
 		goto rele_return;
 	}
 
@@ -413,7 +406,6 @@ xfs_rename(
 		if (spaceres == 0 &&
 		    (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name,
 				target_namelen))) {
-			rename_which_error_return = __LINE__;
 			goto error_return;
 		}
 		/*
@@ -425,11 +417,9 @@ xfs_rename(
 					   target_namelen, src_ip->i_ino,
 					   &first_block, &free_list, spaceres);
 		if (error == ENOSPC) {
-			rename_which_error_return = __LINE__;
 			goto error_return;
 		}
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -437,7 +427,6 @@ xfs_rename(
 		if (new_parent && src_is_directory) {
 			error = xfs_bumplink(tp, target_dp);
 			if (error) {
-				rename_which_error_return = __LINE__;
 				goto abort_return;
 			}
 		}
@@ -455,7 +444,6 @@ xfs_rename(
 			if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) ||
 			    (target_ip->i_d.di_nlink > 2)) {
 				error = XFS_ERROR(EEXIST);
-				rename_which_error_return = __LINE__;
 				goto error_return;
 			}
 		}
@@ -473,7 +461,6 @@ xfs_rename(
 			target_namelen, src_ip->i_ino, &first_block,
 			&free_list, spaceres);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -484,7 +471,6 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, target_ip);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		target_ip_dropped = 1;
@@ -495,7 +481,6 @@ xfs_rename(
 			 */
 			error = xfs_droplink(tp, target_ip);
 			if (error) {
-				rename_which_error_return = __LINE__;
 				goto abort_return;
 			}
 		}
@@ -519,7 +504,6 @@ xfs_rename(
 					&free_list, spaceres);
 		ASSERT(error != EEXIST);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -550,7 +534,6 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, src_dp);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 	}
@@ -558,7 +541,6 @@ xfs_rename(
 	error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen,
 			src_ip->i_ino, &first_block, &free_list, spaceres);
 	if (error) {
-		rename_which_error_return = __LINE__;
 		goto abort_return;
 	}
 	xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 3db0e220077..a865f603599 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -331,25 +331,6 @@ undo_blocks:
 }
 
 
-/*
- * This is called to set the a callback to be called when the given
- * transaction is committed to disk.  The transaction pointer and the
- * argument pointer will be passed to the callback routine.
- *
- * Only one callback can be associated with any single transaction.
- */
-void
-xfs_trans_callback(
-	xfs_trans_t		*tp,
-	xfs_trans_callback_t	callback,
-	void			*arg)
-{
-	ASSERT(tp->t_callback == NULL);
-	tp->t_callback = callback;
-	tp->t_callarg = arg;
-}
-
-
 /*
  * Record the indicated change to the given field for application
  * to the file system's superblock when the transaction commits.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bd37ccb85e7..ec541d66fa2 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -987,8 +987,6 @@ xfs_trans_t	*_xfs_trans_alloc(struct xfs_mount *, uint);
 xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
 int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
 				  uint, uint);
-void		xfs_trans_callback(xfs_trans_t *,
-				   void (*)(xfs_trans_t *, void *), void *);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, long);
 struct xfs_buf	*xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
 				   int, uint);
@@ -1010,7 +1008,6 @@ int		xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
 			       xfs_ino_t , uint, uint, struct xfs_inode **);
 void		xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
 void		xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
-void		xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *);
 void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
 void		xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
 struct xfs_efi_log_item	*xfs_trans_get_efi(xfs_trans_t *, uint);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index e2c3706f453..7e7631ca497 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -253,24 +253,6 @@ xfs_trans_ihold(
 	ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
 }
 
-/*
- * Cancel the previous inode hold request made on this inode
- * for this transaction.
- */
-/*ARGSUSED*/
-void
-xfs_trans_ihold_release(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip)
-{
-	ASSERT(ip->i_transp == tp);
-	ASSERT(ip->i_itemp != NULL);
-	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
-	ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
-
-	ip->i_itemp->ili_flags &= ~XFS_ILI_HOLD;
-}
-
 
 /*
  * This is called to mark the fields indicated in fieldmask as needing
-- 
cgit v1.2.3


From ba0f32d46049e2b625dabd33c7964f8ca2cd7651 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:36:52 +1000
Subject: [XFS] mark various symbols static  Patch from Adrian Bunk

SGI-PV: 936255
SGI-Modid: xfs-linux:xfs-kern:192760a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot.c       |  2 +-
 fs/xfs/quota/xfs_dquot_item.c  |  6 +++---
 fs/xfs/quota/xfs_qm.c          | 24 +++++++++++++++---------
 fs/xfs/quota/xfs_qm.h          |  6 ------
 fs/xfs/quota/xfs_qm_bhv.c      |  2 +-
 fs/xfs/quota/xfs_trans_dquot.c |  4 ++--
 fs/xfs/xfs_alloc.c             |  4 ++--
 fs/xfs/xfs_attr.c              | 17 ++++++++++++-----
 fs/xfs/xfs_attr.h              |  7 -------
 fs/xfs/xfs_attr_leaf.c         | 28 ++++++++++++++++++++++------
 fs/xfs/xfs_attr_leaf.h         | 12 ------------
 fs/xfs/xfs_bit.c               |  2 +-
 fs/xfs/xfs_bmap.c              | 15 ++++++++++++++-
 fs/xfs/xfs_bmap.h              | 13 -------------
 fs/xfs/xfs_btree.c             | 12 +++++++++++-
 fs/xfs/xfs_btree.h             | 10 ----------
 fs/xfs/xfs_buf_item.c          | 24 ++++++++++++------------
 fs/xfs/xfs_da_btree.c          |  9 ++++++---
 fs/xfs/xfs_da_btree.h          |  3 ---
 fs/xfs/xfs_dir2_data.c         |  2 +-
 fs/xfs/xfs_dir2_data.h         |  4 ----
 fs/xfs/xfs_dir2_leaf.c         |  8 ++++++--
 fs/xfs/xfs_dir2_leaf.h         |  7 -------
 fs/xfs/xfs_dir_leaf.c          |  6 +++++-
 fs/xfs/xfs_dir_leaf.h          |  2 --
 fs/xfs/xfs_error.c             |  2 +-
 fs/xfs/xfs_error.h             |  3 ---
 fs/xfs/xfs_extfree_item.c      |  4 ++--
 fs/xfs/xfs_inode.c             |  6 +++---
 fs/xfs/xfs_inode.h             |  2 --
 fs/xfs/xfs_inode_item.c        |  2 +-
 fs/xfs/xfs_log.c               |  6 +++---
 fs/xfs/xfs_log_priv.h          |  2 --
 fs/xfs/xfs_log_recover.c       |  6 +++---
 fs/xfs/xfs_mount.c             |  5 +++--
 fs/xfs/xfs_mount.h             |  1 -
 fs/xfs/xfs_trans.c             |  2 +-
 fs/xfs/xfs_vfsops.c            |  4 ++--
 fs/xfs/xfs_vnodeops.c          |  4 ++--
 39 files changed, 135 insertions(+), 143 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index c961ef7d847..9ce471430a0 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ int xfs_dqerror_mod = 33;
  * is the d_id field. The idea is to fill in the entire q_core
  * when we read in the on disk dquot.
  */
-xfs_dquot_t *
+STATIC xfs_dquot_t *
 xfs_qm_dqinit(
 	xfs_mount_t  *mp,
 	xfs_dqid_t   id,
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index a5425ee6e7b..f5271b7b1e8 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -428,7 +428,7 @@ xfs_qm_dquot_logitem_committing(
 /*
  * This is the ops vector for dquots
  */
-struct xfs_item_ops xfs_dquot_item_ops = {
+STATIC struct xfs_item_ops xfs_dquot_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_dquot_logitem_format,
@@ -646,7 +646,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
 	return;
 }
 
-struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_qoff_logitem_format,
@@ -669,7 +669,7 @@ struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 /*
  * This is the ops vector shared by all quotaoff-start log items.
  */
-struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_qoff_logitem_format,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 89f2cd656eb..41bbc49d535 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -81,12 +81,18 @@ struct xfs_qm	*xfs_Gqm;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
-kmem_shaker_t	xfs_qm_shaker;
+STATIC kmem_shaker_t	xfs_qm_shaker;
 
 STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
+STATIC void	xfs_qm_freelist_init(xfs_frlist_t *);
+STATIC void	xfs_qm_freelist_destroy(xfs_frlist_t *);
+STATIC int	xfs_qm_mplist_nowait(xfs_mount_t *);
+STATIC int	xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
+
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int	xfs_qm_shake(int, unsigned int);
 
 #ifdef DEBUG
@@ -184,7 +190,7 @@ xfs_Gqm_init(void)
 /*
  * Destroy the global quota manager when its reference count goes to zero.
  */
-void
+STATIC void
 xfs_qm_destroy(
 	struct xfs_qm	*xqm)
 {
@@ -509,7 +515,7 @@ out:
  * Flush all dquots of the given file system to disk. The dquots are
  * _not_ purged from memory here, just their data written to disk.
  */
-int
+STATIC int
 xfs_qm_dqflush_all(
 	xfs_mount_t	*mp,
 	int		flags)
@@ -1149,7 +1155,7 @@ xfs_qm_sync(
  * This initializes all the quota information that's kept in the
  * mount structure
  */
-int
+STATIC int
 xfs_qm_init_quotainfo(
 	xfs_mount_t	*mp)
 {
@@ -2751,7 +2757,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 }
 
 /* ------------- list stuff -----------------*/
-void
+STATIC void
 xfs_qm_freelist_init(xfs_frlist_t *ql)
 {
 	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
@@ -2760,7 +2766,7 @@ xfs_qm_freelist_init(xfs_frlist_t *ql)
 	ql->qh_nelems = 0;
 }
 
-void
+STATIC void
 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 {
 	xfs_dquot_t	*dqp, *nextdqp;
@@ -2786,7 +2792,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 	ASSERT(ql->qh_nelems == 0);
 }
 
-void
+STATIC void
 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
 {
 	dq->dq_flnext = ql->qh_next;
@@ -2816,7 +2822,7 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
 	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
 }
 
-int
+STATIC int
 xfs_qm_dqhashlock_nowait(
 	xfs_dquot_t *dqp)
 {
@@ -2836,7 +2842,7 @@ xfs_qm_freelist_lock_nowait(
 	return (locked);
 }
 
-int
+STATIC int
 xfs_qm_mplist_nowait(
 	xfs_mount_t	*mp)
 {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index dcf1a7a831d..ae626eca5ac 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -184,7 +184,6 @@ typedef struct xfs_dquot_acct {
 
 extern void		xfs_mount_reset_sbqflags(xfs_mount_t *);
 
-extern int		xfs_qm_init_quotainfo(xfs_mount_t *);
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
 extern int		xfs_qm_mount_quotas(xfs_mount_t *, int);
 extern void		xfs_qm_mount_quotainit(xfs_mount_t *, uint);
@@ -215,14 +214,9 @@ extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
 					xfs_dquot_t *, xfs_dquot_t *, uint);
 
 /* list stuff */
-extern void		xfs_qm_freelist_init(xfs_frlist_t *);
-extern void		xfs_qm_freelist_destroy(xfs_frlist_t *);
-extern void		xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
 extern int		xfs_qm_freelist_lock_nowait(xfs_qm_t *);
-extern int		xfs_qm_mplist_nowait(xfs_mount_t *);
-extern int		xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
 
 /* system call interface */
 extern int		xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index be67d9c265f..09b1171dfb8 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -359,7 +359,7 @@ xfs_qm_dqrele_null(
 }
 
 
-struct xfs_qmops xfs_qmcore_xfs = {
+STATIC struct xfs_qmops xfs_qmcore_xfs = {
 	.xfs_qminit		= xfs_qm_newmount,
 	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
 	.xfs_qmmount		= xfs_qm_endmount,
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 149b2a1fd94..3644ca00cc8 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -187,7 +187,7 @@ xfs_trans_dup_dqinfo(
 /*
  * Wrap around mod_dquot to account for both user and group quotas.
  */
-void
+STATIC void
 xfs_trans_mod_dquot_byino(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
@@ -368,7 +368,7 @@ xfs_trans_dqlockedjoin(
  * Unreserve just the reservations done by this transaction.
  * dquot is still left locked at exit.
  */
-void
+STATIC void
 xfs_trans_apply_dquot_deltas(
 	xfs_trans_t		*tp)
 {
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 36603db10fe..dcfe1970362 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -59,7 +59,7 @@
 #define	XFSA_FIXUP_BNO_OK	1
 #define	XFSA_FIXUP_CNT_OK	2
 
-int
+STATIC int
 xfs_alloc_search_busy(xfs_trans_t *tp,
 		    xfs_agnumber_t agno,
 		    xfs_agblock_t bno,
@@ -2562,7 +2562,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
 /*
  * returns non-zero if any of (agno,bno):len is in a busy list
  */
-int
+STATIC int
 xfs_alloc_search_busy(xfs_trans_t *tp,
 		    xfs_agnumber_t agno,
 		    xfs_agblock_t bno,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index ee8b5904ec7..a41ad3a5e55 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -71,6 +71,11 @@
  * Provide the external interfaces to manage attribute lists.
  */
 
+#define ATTR_SYSCOUNT	2
+STATIC struct attrnames posix_acl_access;
+STATIC struct attrnames posix_acl_default;
+STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT];
+
 /*========================================================================
  * Function prototypes for the kernel.
  *========================================================================*/
@@ -83,6 +88,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
 /*
  * Internal routines when attribute list is one block.
  */
+STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
@@ -90,6 +96,7 @@ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
 /*
  * Internal routines when attribute list is more than one block.
  */
+STATIC int xfs_attr_node_get(xfs_da_args_t *args);
 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
@@ -1102,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
  * This leaf block cannot have a "remote" value, we only call this routine
  * if bmap_one_block() says there is only one block (ie: no remote blks).
  */
-int
+STATIC int
 xfs_attr_leaf_get(xfs_da_args_t *args)
 {
 	xfs_dabuf_t *bp;
@@ -1707,7 +1714,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
  * block, ie: both true Btree attr lists and for single-leaf-blocks with
  * "remote" values taking up more blocks.
  */
-int
+STATIC int
 xfs_attr_node_get(xfs_da_args_t *args)
 {
 	xfs_da_state_t *state;
@@ -2398,7 +2405,7 @@ posix_acl_default_exists(
 	return xfs_acl_vhasacl_default(vp);
 }
 
-struct attrnames posix_acl_access = {
+STATIC struct attrnames posix_acl_access = {
 	.attr_name	= "posix_acl_access",
 	.attr_namelen	= sizeof("posix_acl_access") - 1,
 	.attr_get	= posix_acl_access_get,
@@ -2407,7 +2414,7 @@ struct attrnames posix_acl_access = {
 	.attr_exists	= posix_acl_access_exists,
 };
 
-struct attrnames posix_acl_default = {
+STATIC struct attrnames posix_acl_default = {
 	.attr_name	= "posix_acl_default",
 	.attr_namelen	= sizeof("posix_acl_default") - 1,
 	.attr_get	= posix_acl_default_get,
@@ -2416,7 +2423,7 @@ struct attrnames posix_acl_default = {
 	.attr_exists	= posix_acl_default_exists,
 };
 
-struct attrnames *attr_system_names[] =
+STATIC struct attrnames *attr_system_names[] =
 	{ &posix_acl_access, &posix_acl_default };
 
 
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 67cd0f5ac1a..45ab1c542ba 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -76,11 +76,6 @@ extern struct attrnames attr_system;
 extern struct attrnames attr_trusted;
 extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
 
-#define ATTR_SYSCOUNT	2
-extern struct attrnames posix_acl_access;
-extern struct attrnames posix_acl_default;
-extern struct attrnames *attr_system_names[ATTR_SYSCOUNT];
-
 extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
 extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *);
 
@@ -184,8 +179,6 @@ int xfs_attr_list(bhv_desc_t *, char *, int, int,
 			 struct attrlist_cursor_kern *, struct cred *);
 int xfs_attr_inactive(struct xfs_inode *dp);
 
-int xfs_attr_node_get(struct xfs_da_args *);
-int xfs_attr_leaf_get(struct xfs_da_args *);
 int xfs_attr_shortform_getvalue(struct xfs_da_args *);
 int xfs_attr_fetch(struct xfs_inode *, char *, int,
 			char *, int *, int, struct cred *);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b11256e58bf..1cdd574c63a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -79,6 +79,8 @@
 /*
  * Routines used for growing the Btree.
  */
+STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
+				    xfs_dabuf_t **bpp);
 STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
 					      int freemap_index);
 STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer);
@@ -91,6 +93,16 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 					   int *number_entries_in_blk1,
 					   int *number_usedbytes_in_blk1);
 
+/*
+ * Routines used for shrinking the Btree.
+ */
+STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
+				  xfs_dabuf_t *bp, int level);
+STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
+				  xfs_dabuf_t *bp);
+STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
+				   xfs_dablk_t blkno, int blkcnt);
+
 /*
  * Utility routines.
  */
@@ -99,6 +111,10 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
 					 xfs_attr_leafblock_t *dst_leaf,
 					 int dst_start, int move_count,
 					 xfs_mount_t *mp);
+STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
+STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context,
+			     attrnames_t *, char *name, int namelen,
+			     int valuelen);
 
 
 /*========================================================================
@@ -774,7 +790,7 @@ out:
  * Create the initial contents of a leaf attribute list
  * or a leaf in a node attribute list.
  */
-int
+STATIC int
 xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 {
 	xfs_attr_leafblock_t *leaf;
@@ -2209,7 +2225,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
  * Calculate the number of bytes used to store the indicated attribute
  * (whether local or remote only calculate bytes in this block).
  */
-int
+STATIC int
 xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 {
 	xfs_attr_leaf_name_local_t *name_loc;
@@ -2380,7 +2396,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
  * we may be reading them directly out of a user buffer.
  */
 /*ARGSUSED*/
-int
+STATIC int
 xfs_attr_put_listent(xfs_attr_list_context_t *context,
 		     attrnames_t *namesp, char *name, int namelen, int valuelen)
 {
@@ -2740,7 +2756,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
  * Recurse (gasp!) through the attribute nodes until we find leaves.
  * We're doing a depth-first traversal in order to invalidate everything.
  */
-int
+STATIC int
 xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 				   int level)
 {
@@ -2849,7 +2865,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
  * Note that we must release the lock on the buffer so that we are not
  * caught holding something that the logging code wants to flush to disk.
  */
-int
+STATIC int
 xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 {
 	xfs_attr_leafblock_t *leaf;
@@ -2934,7 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
  * Look at all the extents for this logical region,
  * invalidate any buffers that are incore/in transactions.
  */
-int
+STATIC int
 xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 				    xfs_dablk_t blkno, int blkcnt)
 {
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index b1480e0b334..0a4cfad6df9 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -261,8 +261,6 @@ int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
 /*
  * Routines used for growing the Btree.
  */
-int	xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
-				    struct xfs_dabuf **bpp);
 int	xfs_attr_leaf_split(struct xfs_da_state *state,
 				   struct xfs_da_state_blk *oldblk,
 				   struct xfs_da_state_blk *newblk);
@@ -284,12 +282,6 @@ void	xfs_attr_leaf_unbalance(struct xfs_da_state *state,
 				       struct xfs_da_state_blk *drop_blk,
 				       struct xfs_da_state_blk *save_blk);
 int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
-int	xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
-				      struct xfs_dabuf *bp, int level);
-int	xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
-				      struct xfs_dabuf *bp);
-int	xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp,
-				       xfs_dablk_t blkno, int blkcnt);
 
 /*
  * Utility routines.
@@ -299,10 +291,6 @@ int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
 				   struct xfs_dabuf *leaf2_bp);
 int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize,
 					int *local);
-int	xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index);
-int	xfs_attr_put_listent(struct xfs_attr_list_context *context,
-			     struct attrnames *, char *name, int namelen,
-			     int valuelen);
 int	xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
 
 #endif	/* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index a20a6c3dc13..76c9ad3875e 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -45,7 +45,7 @@
 /*
  * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
  */
-const char xfs_highbit[256] = {
+STATIC const char xfs_highbit[256] = {
        -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */
 	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */
 	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index de316241866..f6f5ad35734 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -301,6 +301,19 @@ xfs_bmap_search_extents(
 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
 	xfs_bmbt_irec_t	*prevp);	/* out: previous extent entry found */
 
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+STATIC int				/* error */
+xfs_bmap_isaeof(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_fileoff_t   off,		/* file offset in fsblocks */
+	int             whichfork,	/* data or attribute fork */
+	char		*aeof);		/* return value */
+
 #ifdef XFS_BMAP_TRACE
 /*
  * Add a bmap trace buffer entry.  Base routine for the others.
@@ -5714,7 +5727,7 @@ unlock_and_return:
  * blocks at the end of the file which do not start at the previous data block,
  * we will try to align the new blocks at stripe unit boundaries.
  */
-int					/* error */
+STATIC int				/* error */
 xfs_bmap_isaeof(
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_fileoff_t   off,		/* file offset in fsblocks */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f1bc22fb26a..e6d22ec9b2e 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -331,19 +331,6 @@ xfs_getbmap(
 	void			__user *ap,	/* pointer to user's array */
 	int			iflags);	/* interface flags */
 
-/*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- */
-int
-xfs_bmap_isaeof(
-	struct xfs_inode	*ip,
-	xfs_fileoff_t		off,
-	int			whichfork,
-	char			*aeof);
-
 /*
  * Check if the endoff is outside the last extent. If so the caller will grow
  * the allocation to a stripe unit boundary
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9dd22dd9548..797f4d96cdf 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -89,6 +89,16 @@ xfs_btree_maxrecs(
  * Internal routines.
  */
 
+/*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+xfs_btree_block_t *			/* generic btree block pointer */
+xfs_btree_get_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree */
+	struct xfs_buf		**bpp);	/* buffer containing the block */
+
 /*
  * Checking routine: return maxrecs for the block.
  */
@@ -497,7 +507,7 @@ xfs_btree_firstrec(
  * Retrieve the block pointer from the cursor at the given level.
  * This may be a bmap btree root or from a buffer.
  */
-xfs_btree_block_t *			/* generic btree block pointer */
+STATIC xfs_btree_block_t *		/* generic btree block pointer */
 xfs_btree_get_block(
 	xfs_btree_cur_t		*cur,	/* btree cursor */
 	int			level,	/* level in btree */
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 93872bba41f..09b4e1532a3 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -324,16 +324,6 @@ xfs_btree_firstrec(
 	xfs_btree_cur_t		*cur,	/* btree cursor */
 	int			level);	/* level to change */
 
-/*
- * Retrieve the block pointer from the cursor at the given level.
- * This may be a bmap btree root or from a buffer.
- */
-xfs_btree_block_t *			/* generic btree block pointer */
-xfs_btree_get_block(
-	xfs_btree_cur_t		*cur,	/* btree cursor */
-	int			level,	/* level in btree */
-	struct xfs_buf		**bpp);	/* buffer containing the block */
-
 /*
  * Get a buffer for the block, return it with no data read.
  * Long-form addressing.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 9ab0039f07d..30b8285ad47 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -172,7 +172,7 @@ STATIC void	xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
  *
  * If the XFS_BLI_STALE flag has been set, then log nothing.
  */
-uint
+STATIC uint
 xfs_buf_item_size(
 	xfs_buf_log_item_t	*bip)
 {
@@ -240,7 +240,7 @@ xfs_buf_item_size(
  * format structure, and the rest point to contiguous chunks
  * within the buffer.
  */
-void
+STATIC void
 xfs_buf_item_format(
 	xfs_buf_log_item_t	*bip,
 	xfs_log_iovec_t		*log_vector)
@@ -365,7 +365,7 @@ xfs_buf_item_format(
  * item in memory so it cannot be written out.  Simply call bpin()
  * on the buffer to do this.
  */
-void
+STATIC void
 xfs_buf_item_pin(
 	xfs_buf_log_item_t	*bip)
 {
@@ -391,7 +391,7 @@ xfs_buf_item_pin(
  * If the XFS_BLI_STALE flag is set and we are the last reference,
  * then free up the buf log item and unlock the buffer.
  */
-void
+STATIC void
 xfs_buf_item_unpin(
 	xfs_buf_log_item_t	*bip,
 	int			stale)
@@ -446,7 +446,7 @@ xfs_buf_item_unpin(
  * so we need to free the item's descriptor (that points to the item)
  * in the transaction.
  */
-void
+STATIC void
 xfs_buf_item_unpin_remove(
 	xfs_buf_log_item_t	*bip,
 	xfs_trans_t		*tp)
@@ -493,7 +493,7 @@ xfs_buf_item_unpin_remove(
  * the lock right away, return 0.  If we can get the lock, pull the
  * buffer from the free list, mark it busy, and return 1.
  */
-uint
+STATIC uint
 xfs_buf_item_trylock(
 	xfs_buf_log_item_t	*bip)
 {
@@ -537,7 +537,7 @@ xfs_buf_item_trylock(
  * This is for support of xfs_trans_bhold(). Make sure the
  * XFS_BLI_HOLD field is cleared if we don't free the item.
  */
-void
+STATIC void
 xfs_buf_item_unlock(
 	xfs_buf_log_item_t	*bip)
 {
@@ -635,7 +635,7 @@ xfs_buf_item_unlock(
  * by returning the original lsn of that transaction here rather than
  * the current one.
  */
-xfs_lsn_t
+STATIC xfs_lsn_t
 xfs_buf_item_committed(
 	xfs_buf_log_item_t	*bip,
 	xfs_lsn_t		lsn)
@@ -654,7 +654,7 @@ xfs_buf_item_committed(
  * and have aborted this transaction, we'll trap this buffer when it tries to
  * get written out.
  */
-void
+STATIC void
 xfs_buf_item_abort(
 	xfs_buf_log_item_t	*bip)
 {
@@ -674,7 +674,7 @@ xfs_buf_item_abort(
  * B_DELWRI set, then get it going out to disk with a call to bawrite().
  * If not, then just release the buffer.
  */
-void
+STATIC void
 xfs_buf_item_push(
 	xfs_buf_log_item_t	*bip)
 {
@@ -693,7 +693,7 @@ xfs_buf_item_push(
 }
 
 /* ARGSUSED */
-void
+STATIC void
 xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 {
 }
@@ -701,7 +701,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 /*
  * This is the ops vector shared by all buf log items.
  */
-struct xfs_item_ops xfs_buf_item_ops = {
+STATIC struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_buf_item_format,
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index d7fe2886676..8b792ddf216 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -113,7 +113,10 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
-
+STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
+				  xfs_da_state_blk_t *drop_blk,
+				  xfs_da_state_blk_t *save_blk);
+STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
 
 /*========================================================================
  * Routines used for growing the Btree.
@@ -1424,7 +1427,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 /*
  * Unlink a block from a doubly linked list of blocks.
  */
-int							/* error */
+STATIC int						/* error */
 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 				 xfs_da_state_blk_t *save_blk)
 {
@@ -2381,7 +2384,7 @@ xfs_da_state_alloc(void)
 /*
  * Kill the altpath contents of a da-state structure.
  */
-void
+STATIC void
 xfs_da_state_kill_altpath(xfs_da_state_t *state)
 {
 	int	i;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 9fc699d9699..3a9b9e809c6 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -296,8 +296,6 @@ int	xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 /*
  * Utility routines.
  */
-int	xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
-					 xfs_da_state_blk_t *save_blk);
 int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 				       xfs_da_state_blk_t *new_blk);
 
@@ -320,7 +318,6 @@ uint xfs_da_hashname(uchar_t *name_string, int name_length);
 uint xfs_da_log2_roundup(uint i);
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
-void xfs_da_state_kill_altpath(xfs_da_state_t *state);
 
 void xfs_da_buf_done(xfs_dabuf_t *dabuf);
 void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index db9887a107d..a0aa0e44ff9 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -304,7 +304,7 @@ xfs_dir2_data_freeinsert(
 /*
  * Remove a bestfree entry from the table.
  */
-void
+STATIC void
 xfs_dir2_data_freeremove(
 	xfs_dir2_data_t		*d,		/* data block pointer */
 	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 3f02294ccff..476cac920bf 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -192,10 +192,6 @@ extern xfs_dir2_data_free_t *
 	xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
 				 xfs_dir2_data_unused_t *dup, int *loghead);
 
-extern void
-	xfs_dir2_data_freeremove(xfs_dir2_data_t *d,
-				 xfs_dir2_data_free_t *dfp, int *loghead);
-
 extern void
 	xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
 			       int *loghead, char *aendp);
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 262d1e86df3..79918df6ffe 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -77,6 +77,10 @@ static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
 #endif
 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
 				    int *indexp, xfs_dabuf_t **dbpp);
+static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				    int first, int last);
+extern void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
 
 /*
  * Convert a block form directory to a leaf form directory.
@@ -1214,7 +1218,7 @@ xfs_dir2_leaf_init(
 /*
  * Log the bests entries indicated from a leaf1 block.
  */
-void
+static void
 xfs_dir2_leaf_log_bests(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_dabuf_t		*bp,		/* leaf buffer */
@@ -1278,7 +1282,7 @@ xfs_dir2_leaf_log_header(
 /*
  * Log the tail of the leaf1 block.
  */
-void
+STATIC void
 xfs_dir2_leaf_log_tail(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_dabuf_t		*bp)		/* leaf buffer */
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 7f20eee56a5..3303cd6f4c0 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -329,16 +329,9 @@ extern void
 	xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
 			       int first, int last);
 
-extern void
-	xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
-				int first, int last);
-
 extern void
 	xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
 
-extern void
-	xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
-
 extern int
 	xfs_dir2_leaf_lookup(struct xfs_da_args *args);
 
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 617018d6bbd..c2ea6171fb0 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -91,6 +91,10 @@ STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
 					  int *number_entries_in_blk1,
 					  int *number_namebytes_in_blk1);
 
+STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
+				xfs_dablk_t which_block,
+				struct xfs_dabuf **bpp);
+
 /*
  * Utility routines.
  */
@@ -781,7 +785,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
  * Create the initial contents of a leaf directory
  * or a leaf in a node directory.
  */
-int
+STATIC int
 xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 {
 	xfs_dir_leafblock_t *leaf;
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index 00d68d33cc7..dd423ce1bc8 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -202,8 +202,6 @@ int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
 /*
  * Routines used for growing the Btree.
  */
-int	xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
-				   struct xfs_dabuf **bpp);
 int	xfs_dir_leaf_split(struct xfs_da_state *state,
 				  struct xfs_da_state_blk *oldblk,
 				  struct xfs_da_state_blk *newblk);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bbe1dea11c0..dcd3fdd5c1f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -280,7 +280,7 @@ xfs_error_report(
 	}
 }
 
-void
+STATIC void
 xfs_hex_dump(void *p, int length)
 {
 	__uint8_t *uip = (__uint8_t*)p;
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6bc0535c0a6..52ee2b90b5e 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -73,9 +73,6 @@ xfs_corruption_error(
 	int		linenum,
 	inst_t		*ra);
 
-extern void
-xfs_hex_dump(void *p, int length);
-
 #define	XFS_ERROR_REPORT(e, lvl, mp)	\
 	xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
 #define	XFS_CORRUPTION_ERROR(e, lvl, mp, mem)	\
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 5eafd5b6321..4b45ff739e6 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -288,7 +288,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
 /*
  * This is the ops vector shared by all efi log items.
  */
-struct xfs_item_ops xfs_efi_item_ops = {
+STATIC struct xfs_item_ops xfs_efi_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_efi_item_format,
@@ -615,7 +615,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
 /*
  * This is the ops vector shared by all efd log items.
  */
-struct xfs_item_ops xfs_efd_item_ops = {
+STATIC struct xfs_item_ops xfs_efd_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_efd_item_format,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e8274d6b723..99421638e86 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -158,7 +158,7 @@ xfs_inobp_check(
  * Use xfs_imap() to determine the size and location of the
  * buffer to read from disk.
  */
-int
+STATIC int
 xfs_inotobp(
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
@@ -2111,7 +2111,7 @@ static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
 		(ip->i_update_core == 0));
 }
 
-void
+STATIC void
 xfs_ifree_cluster(
 	xfs_inode_t	*free_ip,
 	xfs_trans_t	*tp,
@@ -2830,7 +2830,7 @@ xfs_iunpin(
  * be subsequently pinned once someone is waiting for it to be
  * unpinned.
  */
-void
+STATIC void
 xfs_iunpin_wait(
 	xfs_inode_t	*ip)
 {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 37e1c316f3b..2c9a7c4dabd 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -487,8 +487,6 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
 /*
  * xfs_inode.c prototypes.
  */
-int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
-			    xfs_dinode_t **, struct xfs_buf **, int *);
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
 			  xfs_daddr_t);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 768cb1816b8..0eed30f5cb1 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -910,7 +910,7 @@ xfs_inode_item_committing(
 /*
  * This is the ops vector shared by all buf log items.
  */
-struct xfs_item_ops xfs_inode_item_ops = {
+STATIC struct xfs_item_ops xfs_inode_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_inode_item_format,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 092d5fb096b..1cd2ac16387 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -134,7 +134,7 @@ STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
 #define xlog_verify_tail_lsn(a,b,c)
 #endif
 
-int		xlog_iclogs_empty(xlog_t *log);
+STATIC int	xlog_iclogs_empty(xlog_t *log);
 
 #ifdef DEBUG
 int xlog_do_error = 0;
@@ -1857,7 +1857,7 @@ xlog_write(xfs_mount_t *	mp,
  *
  * State Change: DIRTY -> ACTIVE
  */
-void
+STATIC void
 xlog_state_clean_log(xlog_t *log)
 {
 	xlog_in_core_t	*iclog;
@@ -3542,7 +3542,7 @@ xfs_log_force_umount(
 	return (retval);
 }
 
-int
+STATIC int
 xlog_iclogs_empty(xlog_t *log)
 {
 	xlog_in_core_t	*iclog;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c31e3ce3be6..1a1d452f15f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -535,7 +535,6 @@ typedef struct log {
 
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
-extern int	 xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk);
 extern int	 xlog_find_tail(xlog_t	*log,
 				xfs_daddr_t *head_blk,
 				xfs_daddr_t *tail_blk,
@@ -548,7 +547,6 @@ extern void	 xlog_recover_process_iunlinks(xlog_t *log);
 extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
 extern void	 xlog_put_bp(struct xfs_buf *);
 extern int	 xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
-extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
 
 /* iclog tracing */
 #define XLOG_TRACE_GRAB_FLUSH  1
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9824b5bf0ec..593e597c86b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -148,7 +148,7 @@ xlog_bread(
  * The buffer is kept locked across the write and is returned locked.
  * This can only be used for synchronous log writes.
  */
-int
+STATIC int
 xlog_bwrite(
 	xlog_t		*log,
 	xfs_daddr_t	blk_no,
@@ -179,7 +179,7 @@ xlog_bwrite(
 	return error;
 }
 
-xfs_caddr_t
+STATIC xfs_caddr_t
 xlog_align(
 	xlog_t		*log,
 	xfs_daddr_t	blk_no,
@@ -528,7 +528,7 @@ out:
  *
  * Return: zero if normal, non-zero if error.
  */
-int
+STATIC int
 xlog_find_head(
 	xlog_t 		*log,
 	xfs_daddr_t	*return_head_blk)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2ec967d93e5..f618f6d6381 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -64,6 +64,7 @@
 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
+STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
 static struct {
     short offset;
@@ -555,7 +556,7 @@ xfs_readsb(xfs_mount_t *mp)
  * fields from the superblock associated with the given
  * mount structure
  */
-void
+STATIC void
 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 {
 	int	i;
@@ -1146,7 +1147,7 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
 	xfs_free_buftarg(mp->m_ddev_targp, 0);
 }
 
-void
+STATIC void
 xfs_unmountfs_wait(xfs_mount_t *mp)
 {
 	if (mp->m_logdev_targp != mp->m_ddev_targp)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 30dd08fb9f5..cd5170ec73a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -544,7 +544,6 @@ extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int	xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
 
 extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
-extern void	xfs_unmountfs_wait(xfs_mount_t *);
 extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
 extern int	xfs_unmountfs_writesb(xfs_mount_t *);
 extern int	xfs_unmount_flush(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a865f603599..06dfca531f7 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -532,7 +532,7 @@ xfs_trans_apply_sb_deltas(
  *
  * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
  */
-void
+STATIC void
 xfs_trans_unreserve_and_mod_sb(
 	xfs_trans_t	*tp)
 {
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b5373665010..fb1ae6cfb1f 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1681,7 +1681,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base)
 	return simple_strtoul(cp, endp, base) << shift_left_factor;
 }
 
-int
+STATIC int
 xfs_parseargs(
 	struct bhv_desc		*bhv,
 	char			*options,
@@ -1867,7 +1867,7 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
 	return 0;
 }
 
-int
+STATIC int
 xfs_showargs(
 	struct bhv_desc		*bhv,
 	struct seq_file		*m)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 25a526629b1..5703783991f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -4028,7 +4028,7 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
  *      errno on error
  *
  */
-int
+STATIC int
 xfs_alloc_file_space(
 	xfs_inode_t		*ip,
 	xfs_off_t		offset,
-- 
cgit v1.2.3


From efa8027804b2bd8eb7d4b41a1f5b738c36e44e8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:37:17 +1000
Subject: [XFS] rewrite xfs_iflush_all

SGI-PV: 936890
SGI-Modid: xfs-linux:xfs-kern:193349a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c | 110 +++++++++++------------------------------------------
 fs/xfs/xfs_inode.h |   7 +---
 fs/xfs/xfs_mount.c |   2 +-
 3 files changed, 25 insertions(+), 94 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 99421638e86..a2c723baff0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3556,107 +3556,43 @@ corrupt_out:
 
 
 /*
- * Flush all inactive inodes in mp.  Return true if no user references
- * were found, false otherwise.
+ * Flush all inactive inodes in mp.
  */
-int
+void
 xfs_iflush_all(
-	xfs_mount_t	*mp,
-	int		flag)
+	xfs_mount_t	*mp)
 {
-	int		busy;
-	int		done;
-	int		purged;
 	xfs_inode_t	*ip;
-	vmap_t		vmap;
 	vnode_t		*vp;
 
-	busy = done = 0;
-	while (!done) {
-		purged = 0;
-		XFS_MOUNT_ILOCK(mp);
-		ip = mp->m_inodes;
-		if (ip == NULL) {
-			break;
-		}
-		do {
-			/* Make sure we skip markers inserted by sync */
-			if (ip->i_mount == NULL) {
-				ip = ip->i_mnext;
-				continue;
-			}
-
-			/*
-			 * It's up to our caller to purge the root
-			 * and quota vnodes later.
-			 */
-			vp = XFS_ITOV_NULL(ip);
-
-			if (!vp) {
-				XFS_MOUNT_IUNLOCK(mp);
-				xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
-				purged = 1;
-				break;
-			}
+ again:
+	XFS_MOUNT_ILOCK(mp);
+	ip = mp->m_inodes;
+	if (ip == NULL)
+		goto out;
 
-			if (vn_count(vp) != 0) {
-				if (vn_count(vp) == 1 &&
-				    (ip == mp->m_rootip ||
-				     (mp->m_quotainfo &&
-				      (ip->i_ino == mp->m_sb.sb_uquotino ||
-				       ip->i_ino == mp->m_sb.sb_gquotino)))) {
+	do {
+		/* Make sure we skip markers inserted by sync */
+		if (ip->i_mount == NULL) {
+			ip = ip->i_mnext;
+			continue;
+		}
 
-					ip = ip->i_mnext;
-					continue;
-				}
-				if (!(flag & XFS_FLUSH_ALL)) {
-					busy = 1;
-					done = 1;
-					break;
-				}
-				/*
-				 * Ignore busy inodes but continue flushing
-				 * others.
-				 */
-				ip = ip->i_mnext;
-				continue;
-			}
-			/*
-			 * Sample vp mapping while holding mp locked on MP
-			 * systems, so we don't purge a reclaimed or
-			 * nonexistent vnode.  We break from the loop
-			 * since we know that we modify
-			 * it by pulling ourselves from it in xfs_reclaim()
-			 * called via vn_purge() below.  Set ip to the next
-			 * entry in the list anyway so we'll know below
-			 * whether we reached the end or not.
-			 */
-			VMAP(vp, vmap);
+		vp = XFS_ITOV_NULL(ip);
+		if (!vp) {
 			XFS_MOUNT_IUNLOCK(mp);
+			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
+			goto again;
+		}
 
-			vn_purge(vp, &vmap);
+		ASSERT(vn_count(vp) == 0);
 
-			purged = 1;
-			break;
-		} while (ip != mp->m_inodes);
-		/*
-		 * We need to distinguish between when we exit the loop
-		 * after a purge and when we simply hit the end of the
-		 * list.  We can't use the (ip == mp->m_inodes) test,
-		 * because when we purge an inode at the start of the list
-		 * the next inode on the list becomes mp->m_inodes.  That
-		 * would cause such a test to bail out early.  The purged
-		 * variable tells us how we got out of the loop.
-		 */
-		if (!purged) {
-			done = 1;
-		}
-	}
+		ip = ip->i_mnext;
+	} while (ip != mp->m_inodes);
+ out:
 	XFS_MOUNT_IUNLOCK(mp);
-	return !busy;
 }
 
-
 /*
  * xfs_iaccess: check accessibility of inode for mode.
  */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 2c9a7c4dabd..54d9e54c7c9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -411,11 +411,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
 #define	XFS_IFLUSH_ASYNC		4
 #define	XFS_IFLUSH_DELWRI		5
 
-/*
- * Flags for xfs_iflush_all.
- */
-#define	XFS_FLUSH_ALL		0x1
-
 /*
  * Flags for xfs_itruncate_start().
  */
@@ -520,7 +515,7 @@ void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
 int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
-int		xfs_iflush_all(struct xfs_mount *, int);
+void		xfs_iflush_all(struct xfs_mount *);
 int		xfs_iaccess(xfs_inode_t *, mode_t, cred_t *);
 uint		xfs_iroundup(uint);
 void		xfs_ichgtime(xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f618f6d6381..5b363fcf866 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1082,7 +1082,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	int64_t		fsid;
 #endif
 
-	xfs_iflush_all(mp, XFS_FLUSH_ALL);
+	xfs_iflush_all(mp);
 
 	XFS_QM_DQPURGEALL(mp,
 		XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
-- 
cgit v1.2.3


From 66f58d236fd9c2676545678374d58d48206bdbfa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:37:43 +1000
Subject: [XFS] simplify XFS_PURGE_INODE

SGI-PV: 936891
SGI-Modid: xfs-linux:xfs-kern:193408a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_quota_priv.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 414b6004af2..675f03f443d 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -177,13 +177,7 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 	(!((dqp)->q_core.d_id))
 
 #define XFS_PURGE_INODE(ip)		\
-	{				\
-	  vmap_t dqvmap;		\
-	  vnode_t *dqvp;		\
-	  dqvp = XFS_ITOV(ip);		\
-	  VMAP(dqvp, dqvmap);		\
-	  VN_RELE(dqvp);		\
-	}
+	IRELE(ip);
 
 #define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
 				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
-- 
cgit v1.2.3


From 8401e9631c26dca9ebbc6997ac445fd49b06c79e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:38:03 +1000
Subject: [XFS] remove xfs_incore_relse

SGI-PV: 936977
SGI-Modid: xfs-linux:xfs-kern:193409a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 10 ----------
 fs/xfs/linux-2.6/xfs_buf.h |  1 -
 fs/xfs/xfs_mount.c         |  9 ---------
 3 files changed, 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 049f8711617..c60e69431e1 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1563,16 +1563,6 @@ xfs_free_buftarg(
 	kmem_free(btp, sizeof(*btp));
 }
 
-void
-xfs_incore_relse(
-	xfs_buftarg_t		*btp,
-	int			delwri_only,
-	int			wait)
-{
-	invalidate_bdev(btp->pbr_bdev, 1);
-	truncate_inode_pages(btp->pbr_mapping, 0LL);
-}
-
 STATIC int
 xfs_setsize_buftarg_flags(
 	xfs_buftarg_t		*btp,
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 74deed8e6d9..3f8f69a66ae 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -576,7 +576,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
 extern void xfs_free_buftarg(xfs_buftarg_t *, int);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
 
 #define xfs_getsize_buftarg(buftarg) \
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5b363fcf866..82e1646e624 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1112,15 +1112,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	 */
 	ASSERT(mp->m_inodes == NULL);
 
-	/*
-	 * We may have bufs that are in the process of getting written still.
-	 * We must wait for the I/O completion of those. The sync flag here
-	 * does a two pass iteration thru the bufcache.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */
-	}
-
 	xfs_unmountfs_close(mp, cr);
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
 		xfs_uuid_unmount(mp);
-- 
cgit v1.2.3


From c8ad20ffeb592d66ea869c57f8c525a9d727c67b Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:38:48 +1000
Subject: [XFS] Add support for project quota, based on Dan Knappes earlier
 work.

SGI-PV: 932952
SGI-Modid: xfs-linux:xfs-kern:22805a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h   |   6 +-
 fs/xfs/linux-2.6/xfs_super.c   |   6 +-
 fs/xfs/quota/xfs_dquot.c       |  34 ++++++----
 fs/xfs/quota/xfs_dquot.h       |  29 ++++----
 fs/xfs/quota/xfs_qm.c          | 142 +++++++++++++++++++++++++-------------
 fs/xfs/quota/xfs_qm.h          |   4 +-
 fs/xfs/quota/xfs_qm_bhv.c      |  41 +++++++++--
 fs/xfs/quota/xfs_qm_syscalls.c | 151 +++++++++++++++++++++++++----------------
 fs/xfs/quota/xfs_quota_priv.h  |   6 +-
 fs/xfs/quota/xfs_trans_dquot.c |   6 +-
 fs/xfs/xfs_buf_item.h          |   2 +-
 fs/xfs/xfs_log_recover.c       |  11 ++-
 fs/xfs/xfs_mount.h             |   6 +-
 fs/xfs/xfs_quota.h             |  61 +++++++++++------
 fs/xfs/xfs_trans_buf.c         |   1 +
 fs/xfs/xfs_utils.c             |   2 +-
 fs/xfs/xfs_vfsops.c            |  10 ---
 fs/xfs/xfs_vnodeops.c          |  33 ++++++---
 18 files changed, 348 insertions(+), 203 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 71bb41019a1..44eb313f22b 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -230,8 +230,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
  * field (see the QCMD macro in quota.h).  These macros help keep the
  * code portable - they are not visible from the syscall interface.
  */
-#define Q_XSETGQLIM	XQM_CMD(0x8)	/* set groups disk limits */
-#define Q_XGETGQUOTA	XQM_CMD(0x9)	/* get groups disk limits */
+#define Q_XSETGQLIM	XQM_CMD(8)	/* set groups disk limits */
+#define Q_XGETGQUOTA	XQM_CMD(9)	/* get groups disk limits */
+#define Q_XSETPQLIM	XQM_CMD(10)	/* set projects disk limits */
+#define Q_XGETPQUOTA	XQM_CMD(11)	/* get projects disk limits */
 
 /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
 /* we may well need to fine-tune this if it ever becomes an issue.  */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 455e2b2fb96..d5f0340ddcd 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -701,7 +701,8 @@ linvfs_getxquota(
 	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
 	int			error, getmode;
 
-	getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+	getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
+		 ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
 	VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
 	return -error;
 }
@@ -716,7 +717,8 @@ linvfs_setxquota(
 	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
 	int			error, setmode;
 
-	setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+	setmode = (type == USRQUOTA) ? Q_XSETQLIM :
+		 ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
 	VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
 	return -error;
 }
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 9ce471430a0..68089f56d5c 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -399,9 +399,9 @@ xfs_qm_init_dquot_blk(
 	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
 		xfs_qm_dqinit_core(curid, type, d);
 	xfs_trans_dquot_buf(tp, bp,
-			    type & XFS_DQ_USER ?
-			    XFS_BLI_UDQUOT_BUF :
-			    XFS_BLI_GDQUOT_BUF);
+			    (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
+			    ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
+			     XFS_BLI_GDQUOT_BUF)));
 	xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
 }
 
@@ -482,8 +482,7 @@ xfs_qm_dqalloc(
 	 * the entire thing.
 	 */
 	xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
-			      dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
-			      bp);
+			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 
 	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
 		goto error1;
@@ -613,8 +612,7 @@ xfs_qm_dqtobp(
 	/*
 	 * A simple sanity check in case we got a corrupted dquot...
 	 */
-	if (xfs_qm_dqcheck(ddq, id,
-			   dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+	if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
 			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
 			   "dqtobp")) {
 		if (!(flags & XFS_QMOPT_DQREPAIR)) {
@@ -891,8 +889,8 @@ int
 xfs_qm_dqget(
 	xfs_mount_t	*mp,
 	xfs_inode_t	*ip,	  /* locked inode (optional) */
-	xfs_dqid_t	id,	  /* gid or uid, depending on type */
-	uint		type,	  /* UDQUOT or GDQUOT */
+	xfs_dqid_t	id,	  /* uid/projid/gid depending on type */
+	uint		type,	  /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
 	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
 	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
 {
@@ -903,7 +901,9 @@ xfs_qm_dqget(
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
 	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+printk("XQM: ESRCH1\n");
 		return (ESRCH);
 	}
 	h = XFS_DQ_HASH(mp, id, type);
@@ -921,7 +921,9 @@ xfs_qm_dqget(
  again:
 
 #ifdef DEBUG
-	ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP);
+	ASSERT(type == XFS_DQ_USER ||
+	       type == XFS_DQ_PROJ ||
+	       type == XFS_DQ_GROUP);
 	if (ip) {
 		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 		if (type == XFS_DQ_USER)
@@ -979,6 +981,7 @@ xfs_qm_dqget(
 				  &dqp))) {
 		if (ip)
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
+if (error == ESRCH) printk("XQM: ESRCH2\n");
 		return (error);
 	}
 
@@ -1004,6 +1007,7 @@ xfs_qm_dqget(
 		if (! XFS_IS_DQTYPE_ON(mp, type)) {
 			/* inode stays locked on return */
 			xfs_qm_dqdestroy(dqp);
+printk("XQM: ESRCH3\n");
 			return XFS_ERROR(ESRCH);
 		}
 		/*
@@ -1244,8 +1248,8 @@ xfs_qm_dqflush(
 		return (error);
 	}
 
-	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN,
-			   "dqflush (incore copy)")) {
+	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT),
+			   0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
 		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
 		return XFS_ERROR(EIO);
 	}
@@ -1397,7 +1401,8 @@ xfs_dqlock2(
 {
 	if (d1 && d2) {
 		ASSERT(d1 != d2);
-		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
+		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) >
+		    INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
 			xfs_dqlock(d2);
 			xfs_dqlock(d1);
 		} else {
@@ -1520,8 +1525,7 @@ xfs_qm_dqprint(xfs_dquot_t *dqp)
 	cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
 	cmn_err(CE_DEBUG, "---- dquotID =  %d",
 		(int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
-	cmn_err(CE_DEBUG, "---- type    =  %s",
-		XFS_QM_ISUDQ(dqp) ? "USR" : "GRP");
+	cmn_err(CE_DEBUG, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
 	cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
 	cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
 	cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 35aeeafe479..39175103c8e 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -114,25 +114,18 @@ typedef struct xfs_dquot {
 #define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
 
 /*
- * Quota Accounting flags
+ * Quota Accounting/Enforcement flags
  */
-#define XFS_ALL_QUOTA_ACCT	(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD)
-#define XFS_ALL_QUOTA_ACTV	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
-#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD)
+#define XFS_ALL_QUOTA_ACCT	\
+		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
 
-#define XFS_IS_QUOTA_RUNNING(mp)  ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
-
-/*
- * Quota Limit Enforcement flags
- */
+#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
 #define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
-#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
-#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
+#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
 
 #ifdef DEBUG
 static inline int
@@ -167,6 +160,8 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP)
 #define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
 #define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
 				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
@@ -174,7 +169,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 
 #define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
 				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-				     (XFS_IS_GQUOTA_ON((d)->q_mount))))
+				     (XFS_IS_OQUOTA_ON((d)->q_mount))))
 
 #ifdef XFS_DQUOT_TRACE
 /*
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 41bbc49d535..3ea75972767 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -310,9 +310,9 @@ xfs_qm_mount_quotainit(
 	uint		flags)
 {
 	/*
-	 * User or group quotas has to be on.
+	 * User, projects or group quotas has to be on.
 	 */
-	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA));
+	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
 
 	/*
 	 * Initialize the flags in the mount structure. From this point
@@ -330,7 +330,11 @@ xfs_qm_mount_quotainit(
 	if (flags & XFSMNT_GQUOTA) {
 		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 		if (flags & XFSMNT_GQUOTAENF)
-			mp->m_qflags |= XFS_GQUOTA_ENFD;
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
+	} else if (flags & XFSMNT_PQUOTA) {
+		mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+		if (flags & XFSMNT_PQUOTAENF)
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
 	}
 }
 
@@ -363,11 +367,11 @@ xfs_qm_mount_quotas(
 
 	/*
 	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o quota/uquota/gquota options, revoke the
+	 * mount without -o uquota/pquota/gquota options, revoke the
 	 * quotachecked license, and bail out.
 	 */
 	if (! XFS_IS_QUOTA_ON(mp) &&
-	    (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) {
+	    (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
 		mp->m_qflags = 0;
 		goto write_changes;
 	}
@@ -619,7 +623,7 @@ xfs_qm_detach_gdquots(
 STATIC int
 xfs_qm_dqpurge_int(
 	xfs_mount_t	*mp,
-	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */
+	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
 {
 	xfs_dquot_t	*dqp;
 	uint		dqtype;
@@ -631,6 +635,7 @@ xfs_qm_dqpurge_int(
 		return (0);
 
 	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
 	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
 
 	xfs_qm_mplist_lock(mp);
@@ -740,11 +745,11 @@ xfs_qm_dqattach_one(
 
 	/*
 	 * udqhint is the i_udquot field in inode, and is non-NULL only
-	 * when the type arg is XFS_DQ_GROUP. Its purpose is to save a
+	 * when the type arg is group/project. Its purpose is to save a
 	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
 	 * the user dquot.
 	 */
-	ASSERT(!udqhint || type == XFS_DQ_GROUP);
+	ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
 	if (udqhint && !dolock)
 		xfs_dqlock(udqhint);
 
@@ -903,8 +908,8 @@ xfs_qm_dqattach_grouphint(
 
 
 /*
- * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON
- * in to account.
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
  * much made this code a complete mess, but it has been pretty useful.
@@ -943,8 +948,13 @@ xfs_qm_dqattach(
 		nquotas++;
 	}
 	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+	if (XFS_IS_OQUOTA_ON(mp)) {
+		error = XFS_IS_GQUOTA_ON(mp) ?
+			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+						flags & XFS_QMOPT_DQALLOC,
+						flags & XFS_QMOPT_DQLOCK,
+						ip->i_udquot, &ip->i_gdquot) :
+			xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
 						flags & XFS_QMOPT_DQALLOC,
 						flags & XFS_QMOPT_DQLOCK,
 						ip->i_udquot, &ip->i_gdquot);
@@ -995,7 +1005,7 @@ xfs_qm_dqattach(
 		}
 		if (XFS_IS_UQUOTA_ON(mp))
 			ASSERT(ip->i_udquot);
-		if (XFS_IS_GQUOTA_ON(mp))
+		if (XFS_IS_OQUOTA_ON(mp))
 			ASSERT(ip->i_gdquot);
 	}
 #endif
@@ -1024,13 +1034,13 @@ xfs_qm_dqdetach(
 
 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-	if (ip->i_udquot)
-		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 	if (ip->i_udquot) {
+		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 		xfs_qm_dqrele(ip->i_udquot);
 		ip->i_udquot = NULL;
 	}
 	if (ip->i_gdquot) {
+		xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
 		xfs_qm_dqrele(ip->i_gdquot);
 		ip->i_gdquot = NULL;
 	}
@@ -1208,8 +1218,9 @@ xfs_qm_init_quotainfo(
 	 * and group quotas, at least not at this point.
 	 */
 	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-			     (XFS_IS_UQUOTA_RUNNING(mp)) ?
-			     XFS_DQ_USER : XFS_DQ_GROUP,
+			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+				XFS_DQ_PROJ),
 			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
 			     &dqp);
 	if (! error) {
@@ -1372,13 +1383,20 @@ xfs_qm_dqget_noattach(
 		ASSERT(udqp);
 	}
 
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(ip->i_gdquot == NULL);
 		if (udqp)
 			xfs_dqunlock(udqp);
-		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-					 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-					 &gdqp))) {
+		error = XFS_IS_GQUOTA_ON(mp) ?
+				xfs_qm_dqget(mp, ip,
+					     ip->i_d.di_gid, XFS_DQ_GROUP,
+					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+					     &gdqp) :
+				xfs_qm_dqget(mp, ip,
+					     ip->i_d.di_projid, XFS_DQ_PROJ,
+					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+					     &gdqp);
+		if (error) {
 			if (udqp)
 				xfs_qm_dqrele(udqp);
 			ASSERT(error != ESRCH);
@@ -1547,11 +1565,14 @@ xfs_qm_dqiter_bufs(
 	int		error;
 	int		notcommitted;
 	int		incr;
+	int		type;
 
 	ASSERT(blkcnt > 0);
 	notcommitted = 0;
 	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
 		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
+	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
 	error = 0;
 
 	/*
@@ -1570,9 +1591,7 @@ xfs_qm_dqiter_bufs(
 		if (error)
 			break;
 
-		(void) xfs_qm_reset_dqcounts(mp, bp, firstid,
-					     flags & XFS_QMOPT_UQUOTA ?
-					     XFS_DQ_USER : XFS_DQ_GROUP);
+		(void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
 		xfs_bdwrite(mp, bp);
 		/*
 		 * goto the next block.
@@ -1584,7 +1603,7 @@ xfs_qm_dqiter_bufs(
 }
 
 /*
- * Iterate over all allocated USR/GRP dquots in the system, calling a
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
  * caller supplied function for every chunk of dquots that we find.
  */
 STATIC int
@@ -1855,7 +1874,7 @@ xfs_qm_dqusage_adjust(
 		xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
 		xfs_qm_dqput(udqp);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(gdqp);
 		xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
 		xfs_qm_dqput(gdqp);
@@ -1904,7 +1923,7 @@ xfs_qm_quotacheck(
 	cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
 
 	/*
-	 * First we go thru all the dquots on disk, USR and GRP, and reset
+	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
 	 * their counters to zero. We need a clean slate.
 	 * We don't log our changes till later.
 	 */
@@ -1915,9 +1934,10 @@ xfs_qm_quotacheck(
 	}
 
 	if ((gip = XFS_QI_GQIP(mp))) {
-		if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA)))
+		if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
 			goto error_return;
-		flags |= XFS_GQUOTA_CHKD;
+		flags |= XFS_OQUOTA_CHKD;
 	}
 
 	do {
@@ -1944,7 +1964,7 @@ xfs_qm_quotacheck(
 	if (error) {
 		xfs_qm_dqpurge_all(mp,
 				   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-				   XFS_QMOPT_QUOTAOFF);
+				   XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
 		goto error_return;
 	}
 	/*
@@ -1967,7 +1987,7 @@ xfs_qm_quotacheck(
 	 * quotachecked status, since we won't be doing accounting for
 	 * that type anymore.
 	 */
-	mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD);
+	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
 	mp->m_qflags |= flags;
 
 	XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
@@ -2019,7 +2039,7 @@ xfs_qm_init_quotainos(
 					     0, 0, &uip, 0)))
 				return XFS_ERROR(error);
 		}
-		if (XFS_IS_GQUOTA_ON(mp) &&
+		if (XFS_IS_OQUOTA_ON(mp) &&
 		    mp->m_sb.sb_gquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_gquotino > 0);
 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
@@ -2049,10 +2069,12 @@ xfs_qm_init_quotainos(
 
 		flags &= ~XFS_QMOPT_SBVERSION;
 	}
-	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
-		if ((error = xfs_qm_qino_alloc(mp, &gip,
-					      sbflags | XFS_SB_GQUOTINO,
-					      flags | XFS_QMOPT_GQUOTA))) {
+	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+		flags |= (XFS_IS_GQUOTA_ON(mp) ?
+				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+		error = xfs_qm_qino_alloc(mp, &gip,
+					  sbflags | XFS_SB_GQUOTINO, flags);
+		if (error) {
 			if (uip)
 				VN_RELE(XFS_ITOV(uip));
 
@@ -2458,6 +2480,7 @@ xfs_qm_vop_dqalloc(
 	xfs_inode_t	*ip,
 	uid_t		uid,
 	gid_t		gid,
+	prid_t		prid,
 	uint		flags,
 	xfs_dquot_t	**O_udqpp,
 	xfs_dquot_t	**O_gdqpp)
@@ -2489,8 +2512,7 @@ xfs_qm_vop_dqalloc(
 	}
 
 	uq = gq = NULL;
-	if ((flags & XFS_QMOPT_UQUOTA) &&
-	    XFS_IS_UQUOTA_ON(mp)) {
+	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
 		if (ip->i_d.di_uid != uid) {
 			/*
 			 * What we need is the dquot that has this uid, and
@@ -2528,8 +2550,7 @@ xfs_qm_vop_dqalloc(
 			xfs_dqunlock(uq);
 		}
 	}
-	if ((flags & XFS_QMOPT_GQUOTA) &&
-	    XFS_IS_GQUOTA_ON(mp)) {
+	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
 		if (ip->i_d.di_gid != gid) {
 			xfs_iunlock(ip, lockflags);
 			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
@@ -2552,6 +2573,29 @@ xfs_qm_vop_dqalloc(
 			XFS_DQHOLD(gq);
 			xfs_dqunlock(gq);
 		}
+	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+		if (ip->i_d.di_projid != prid) {
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+						 XFS_DQ_PROJ,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &gq))) {
+				if (uq)
+					xfs_qm_dqrele(uq);
+				ASSERT(error != ENOENT);
+				return (error);
+			}
+			xfs_dqunlock(gq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			ASSERT(ip->i_gdquot);
+			gq = ip->i_gdquot;
+			xfs_dqlock(gq);
+			XFS_DQHOLD(gq);
+			xfs_dqunlock(gq);
+		}
 	}
 	if (uq)
 		xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
@@ -2617,7 +2661,7 @@ xfs_qm_vop_chown(
 }
 
 /*
- * Quota reservations for setattr(AT_UID|AT_GID).
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
  */
 int
 xfs_qm_vop_chown_reserve(
@@ -2652,12 +2696,16 @@ xfs_qm_vop_chown_reserve(
 			unresudq = ip->i_udquot;
 		}
 	}
-	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
-	    ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) {
-		delblksgdq = gdqp;
-		if (delblks) {
-			ASSERT(ip->i_gdquot);
-			unresgdq = ip->i_gdquot;
+	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+		if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid !=
+				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) ||
+		    (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid !=
+				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) {
+			delblksgdq = gdqp;
+			if (delblks) {
+				ASSERT(ip->i_gdquot);
+				unresgdq = ip->i_gdquot;
+			}
 		}
 	}
 
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index ae626eca5ac..78196877954 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -202,7 +202,7 @@ extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
 /* vop stuff */
 extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
-					uid_t, gid_t, uint,
+					uid_t, gid_t, prid_t, uint,
 					xfs_dquot_t **, xfs_dquot_t **);
 extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
 					xfs_trans_t *, xfs_inode_t *,
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 09b1171dfb8..dc3c37a1e15 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -71,10 +71,13 @@
 #define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
 #define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
 #define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
+#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
 #define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
 #define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
 #define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
 
 STATIC int
@@ -109,6 +112,14 @@ xfs_qm_parseargs(
 			args->flags |= XFSMNT_UQUOTA;
 			args->flags &= ~XFSMNT_UQUOTAENF;
 			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+			args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+			args->flags |= XFSMNT_PQUOTA;
+			args->flags &= ~XFSMNT_PQUOTAENF;
+			referenced = 1;
 		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
 			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
 			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
@@ -127,6 +138,12 @@ xfs_qm_parseargs(
 			*this_char++ = ',';
 	}
 
+	if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+		cmn_err(CE_WARN,
+			"XFS: cannot mount with both project and group quota");
+		return XFS_ERROR(EINVAL);
+	}
+
 	PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
 	if (!error && !referenced)
 		bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
@@ -148,13 +165,19 @@ xfs_qm_showargs(
 			seq_puts(m, "," MNTOPT_UQUOTANOENF);
 	}
 
+	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+		(mp->m_qflags & XFS_OQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_PRJQUOTA) :
+			seq_puts(m, "," MNTOPT_PQUOTANOENF);
+	}
+
 	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
+		(mp->m_qflags & XFS_OQUOTA_ENFD) ?
 			seq_puts(m, "," MNTOPT_GRPQUOTA) :
 			seq_puts(m, "," MNTOPT_GQUOTANOENF);
 	}
 
-	if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT)))
+	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 		seq_puts(m, "," MNTOPT_NOQUOTA);
 
 	PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
@@ -171,7 +194,7 @@ xfs_qm_mount(
 	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
 	int			error;
 
-	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
+	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA))
 		xfs_qm_mount_quotainit(mp, args->flags);
 	PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
 	return error;
@@ -255,16 +278,17 @@ xfs_qm_newmount(
 	uint		*quotaflags)
 {
 	uint		quotaondisk;
-	uint		uquotaondisk = 0, gquotaondisk = 0;
+	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
 	*quotaflags = 0;
 	*needquotamount = B_FALSE;
 
 	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-		mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT);
+				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
 	if (quotaondisk) {
 		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+		pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
 		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
 	}
 
@@ -277,13 +301,16 @@ xfs_qm_newmount(
 
 	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
 	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
 	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-	    (!gquotaondisk &&  XFS_IS_GQUOTA_ON(mp)))  &&
+	    (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
 	    xfs_dev_is_read_only(mp, "changing quota state")) {
 		cmn_err(CE_WARN,
-			"XFS: please mount with%s%s%s.",
+			"XFS: please mount with%s%s%s%s.",
 			(!quotaondisk ? "out quota" : ""),
 			(uquotaondisk ? " usrquota" : ""),
+			(pquotaondisk ? " prjquota" : ""),
 			(gquotaondisk ? " grpquota" : ""));
 		return XFS_ERROR(EPERM);
 	}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 229f5b5a2d2..365a054f02d 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -118,40 +118,41 @@ xfs_qm_quotactl(
 	 * The following commands are valid even when quotaoff.
 	 */
 	switch (cmd) {
+	case Q_XQUOTARM:
 		/*
-		 * truncate quota files. quota must be off.
+		 * Truncate quota files. quota must be off.
 		 */
-	      case Q_XQUOTARM:
 		if (XFS_IS_QUOTA_ON(mp) || addr == NULL)
 			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		return (xfs_qm_scall_trunc_qfiles(mp,
 			       xfs_qm_import_qtype_flags(*(uint *)addr)));
+
+	case Q_XGETQSTAT:
 		/*
 		 * Get quota status information.
 		 */
-	      case Q_XGETQSTAT:
 		return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
 
+	case Q_XQUOTAON:
 		/*
-		 * QUOTAON for root f/s and quota enforcement on others..
-		 * Quota accounting for non-root f/s's must be turned on
-		 * at mount time.
+		 * QUOTAON - enabling quota enforcement.
+		 * Quota accounting must be turned on at mount time.
 		 */
-	      case Q_XQUOTAON:
 		if (addr == NULL)
 			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		return (xfs_qm_scall_quotaon(mp,
 					  xfs_qm_import_flags(*(uint *)addr)));
-	      case Q_XQUOTAOFF:
+
+	case Q_XQUOTAOFF:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		break;
 
-	      default:
+	default:
 		break;
 	}
 
@@ -159,7 +160,7 @@ xfs_qm_quotactl(
 		return XFS_ERROR(ESRCH);
 
 	switch (cmd) {
-	      case Q_XQUOTAOFF:
+	case Q_XQUOTAOFF:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_quotaoff(mp,
@@ -167,42 +168,39 @@ xfs_qm_quotactl(
 					    B_FALSE);
 		break;
 
-		/*
-		 * Defaults to XFS_GETUQUOTA.
-		 */
-	      case Q_XGETQUOTA:
+	case Q_XGETQUOTA:
 		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
 					(fs_disk_quota_t *)addr);
 		break;
-		/*
-		 * Set limits, both hard and soft. Defaults to Q_SETUQLIM.
-		 */
-	      case Q_XSETQLIM:
+	case Q_XGETGQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
+					(fs_disk_quota_t *)addr);
+		break;
+	case Q_XGETPQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
+					(fs_disk_quota_t *)addr);
+		break;
+
+	case Q_XSETQLIM:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
 					     (fs_disk_quota_t *)addr);
 		break;
-
-	       case Q_XSETGQLIM:
+	case Q_XSETGQLIM:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
 					     (fs_disk_quota_t *)addr);
 		break;
-
-
-	      case Q_XGETGQUOTA:
-		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
-					(fs_disk_quota_t *)addr);
+	case Q_XSETPQLIM:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
+					     (fs_disk_quota_t *)addr);
 		break;
 
-		/*
-		 * Quotas are entirely undefined after quotaoff in XFS quotas.
-		 * For instance, there's no way to set limits when quotaoff.
-		 */
-
-	      default:
+	default:
 		error = XFS_ERROR(EINVAL);
 		break;
 	}
@@ -286,8 +284,12 @@ xfs_qm_scall_quotaoff(
 	}
 	if (flags & XFS_GQUOTA_ACCT) {
 		dqtype |= XFS_QMOPT_GQUOTA;
-		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
 		inactivate_flags |= XFS_GQUOTA_ACTIVE;
+	} else if (flags & XFS_PQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_PQUOTA;
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+		inactivate_flags |= XFS_PQUOTA_ACTIVE;
 	}
 
 	/*
@@ -364,7 +366,8 @@ xfs_qm_scall_quotaoff(
 	/*
 	 * If quotas is completely disabled, close shop.
 	 */
-	if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) {
+	if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+	    ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
 		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 		xfs_qm_destroy_quotainfo(mp);
 		return (0);
@@ -378,7 +381,7 @@ xfs_qm_scall_quotaoff(
 		XFS_PURGE_INODE(XFS_QI_UQIP(mp));
 		XFS_QI_UQIP(mp) = NULL;
 	}
-	if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) {
+	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
 		XFS_PURGE_INODE(XFS_QI_GQIP(mp));
 		XFS_QI_GQIP(mp) = NULL;
 	}
@@ -411,7 +414,8 @@ xfs_qm_scall_trunc_qfiles(
 		}
 	}
 
-	if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) {
+	if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
+	    mp->m_sb.sb_gquotino != NULLFSINO) {
 		error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
 		if (! error) {
 			(void) xfs_truncate_file(mp, qip);
@@ -434,7 +438,7 @@ xfs_qm_scall_quotaon(
 	uint		flags)
 {
 	int		error;
-	unsigned long s;
+	unsigned long	s;
 	uint		qf;
 	uint		accflags;
 	__int64_t	sbflags;
@@ -468,9 +472,13 @@ xfs_qm_scall_quotaon(
 	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
 	    (flags & XFS_UQUOTA_ENFD))
 	    ||
+	    ((flags & XFS_PQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+	    (flags & XFS_OQUOTA_ENFD))
+	    ||
 	    ((flags & XFS_GQUOTA_ACCT) == 0 &&
 	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-	    (flags & XFS_GQUOTA_ENFD))) {
+	    (flags & XFS_OQUOTA_ENFD))) {
 		qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
 			flags, mp->m_sb.sb_qflags);
 		return XFS_ERROR(EINVAL);
@@ -504,6 +512,10 @@ xfs_qm_scall_quotaon(
 	 */
 	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
 	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
 	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
 		return (0);
 
@@ -521,7 +533,6 @@ xfs_qm_scall_quotaon(
 }
 
 
-
 /*
  * Return quota status information, such as uquota-off, enforcements, etc.
  */
@@ -776,9 +787,9 @@ xfs_qm_log_quotaoff_end(
 	xfs_qoff_logitem_t	*startqoff,
 	uint			flags)
 {
-	xfs_trans_t	       *tp;
+	xfs_trans_t		*tp;
 	int			error;
-	xfs_qoff_logitem_t     *qoffi;
+	xfs_qoff_logitem_t	*qoffi;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
 
@@ -928,18 +939,26 @@ xfs_qm_export_dquot(
 
 STATIC uint
 xfs_qm_import_qtype_flags(
-	uint uflags)
+	uint		uflags)
 {
+	uint		oflags = 0;
+
 	/*
-	 * Can't be both at the same time.
+	 * Can't be more than one, or none.
 	 */
 	if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
-	     (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
-	    ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0))
+			(XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
+	    ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ==
+			(XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ||
+	    ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ==
+			(XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ||
+	    ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0))
 		return (0);
 
-	return (uflags & XFS_USER_QUOTA) ?
-		XFS_DQ_USER : XFS_DQ_GROUP;
+	oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0;
+	oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0;
+	oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0;
+	return oflags;
 }
 
 STATIC uint
@@ -947,14 +966,19 @@ xfs_qm_export_qtype_flags(
 	uint flags)
 {
 	/*
-	 * Can't be both at the same time.
+	 * Can't be more than one, or none.
 	 */
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) !=
-		(XFS_GROUP_QUOTA | XFS_USER_QUOTA));
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0);
+	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) !=
+		(XFS_PROJ_QUOTA | XFS_USER_QUOTA));
+	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) !=
+		(XFS_PROJ_QUOTA | XFS_GROUP_QUOTA));
+	ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) !=
+		(XFS_USER_QUOTA | XFS_GROUP_QUOTA));
+	ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0);
 
 	return (flags & XFS_DQ_USER) ?
-		XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+		XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+			XFS_PROJ_QUOTA : XFS_GROUP_QUOTA;
 }
 
 STATIC uint
@@ -965,12 +989,14 @@ xfs_qm_import_flags(
 
 	if (uflags & XFS_QUOTA_UDQ_ACCT)
 		flags |= XFS_UQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_PDQ_ACCT)
+		flags |= XFS_PQUOTA_ACCT;
 	if (uflags & XFS_QUOTA_GDQ_ACCT)
 		flags |= XFS_GQUOTA_ACCT;
 	if (uflags & XFS_QUOTA_UDQ_ENFD)
 		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & XFS_QUOTA_GDQ_ENFD)
-		flags |= XFS_GQUOTA_ENFD;
+	if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+		flags |= XFS_OQUOTA_ENFD;
 	return (flags);
 }
 
@@ -984,12 +1010,16 @@ xfs_qm_export_flags(
 	uflags = 0;
 	if (flags & XFS_UQUOTA_ACCT)
 		uflags |= XFS_QUOTA_UDQ_ACCT;
+	if (flags & XFS_PQUOTA_ACCT)
+		uflags |= XFS_QUOTA_PDQ_ACCT;
 	if (flags & XFS_GQUOTA_ACCT)
 		uflags |= XFS_QUOTA_GDQ_ACCT;
 	if (flags & XFS_UQUOTA_ENFD)
 		uflags |= XFS_QUOTA_UDQ_ENFD;
-	if (flags & XFS_GQUOTA_ENFD)
-		uflags |= XFS_QUOTA_GDQ_ENFD;
+	if (flags & (XFS_OQUOTA_ENFD)) {
+		uflags |= (flags & XFS_GQUOTA_ACCT) ?
+			XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD;
+	}
 	return (uflags);
 }
 
@@ -1070,7 +1100,7 @@ again:
 			xfs_qm_dqrele(ip->i_udquot);
 			ip->i_udquot = NULL;
 		}
-		if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
 			xfs_qm_dqrele(ip->i_gdquot);
 			ip->i_gdquot = NULL;
 		}
@@ -1160,7 +1190,6 @@ xfs_qm_dqtest_print(
 {
 	cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------");
 	cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id);
-	cmn_err(CE_DEBUG, "---- type     = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP");
 	cmn_err(CE_DEBUG, "---- fs       = 0x%p", d->q_mount);
 	cmn_err(CE_DEBUG, "---- bcount   = %Lu (0x%x)",
 		d->d_bcount, (int)d->d_bcount);
@@ -1231,7 +1260,7 @@ xfs_dqtest_cmp2(
 #ifdef QUOTADEBUG
 	if (!err) {
 		cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked",
-			d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount);
+			d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
 	}
 #endif
 	return (err);
@@ -1287,6 +1316,7 @@ STATIC void
 xfs_qm_internalqcheck_get_dquots(
 	xfs_mount_t	*mp,
 	xfs_dqid_t	uid,
+	xfs_dqid_t	projid,
 	xfs_dqid_t	gid,
 	xfs_dqtest_t	**ud,
 	xfs_dqtest_t	**gd)
@@ -1295,6 +1325,8 @@ xfs_qm_internalqcheck_get_dquots(
 		xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
 	if (XFS_IS_GQUOTA_ON(mp))
 		xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
+	else if (XFS_IS_PQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
 }
 
 
@@ -1362,13 +1394,14 @@ xfs_qm_internalqcheck_adjust(
 	}
 	xfs_qm_internalqcheck_get_dquots(mp,
 					(xfs_dqid_t) ip->i_d.di_uid,
+					(xfs_dqid_t) ip->i_d.di_projid,
 					(xfs_dqid_t) ip->i_d.di_gid,
 					&ud, &gd);
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		ASSERT(ud);
 		xfs_qm_internalqcheck_dqadjust(ip, ud);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(gd);
 		xfs_qm_internalqcheck_dqadjust(ip, gd);
 	}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 675f03f443d..472afd3570c 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -102,7 +102,8 @@ static inline int XQMISLCKD(struct xfs_dqhash *h)
 				     (xfs_Gqm->qm_grp_dqhtable + \
 				      XFS_DQ_HASHVAL(mp, id)))
 #define XFS_IS_DQTYPE_ON(mp, type)   (type == XFS_DQ_USER ? \
-				      XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp))
+					XFS_IS_UQUOTA_ON(mp) : \
+					XFS_IS_OQUOTA_ON(mp))
 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
 	!dqp->q_core.d_blk_hardlimit && \
 	!dqp->q_core.d_blk_softlimit && \
@@ -180,7 +181,8 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 	IRELE(ip);
 
 #define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
+				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
 #define DQFLAGTO_DIRTYSTR(d)	(XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
 
 #endif	/* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 3644ca00cc8..565efb73c23 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -207,12 +207,10 @@ xfs_trans_mod_dquot_byino(
 	if (tp->t_dqinfo == NULL)
 		xfs_trans_alloc_dqinfo(tp);
 
-	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) {
+	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
 		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-	}
-	if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) {
+	if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
 		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-	}
 }
 
 STATIC xfs_dqtrx_t *
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 5f1b0c9308f..01aed5f2d57 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -80,7 +80,7 @@ typedef struct xfs_buf_log_format_t {
  * user or group dquots and may require special recovery handling.
  */
 #define	XFS_BLI_UDQUOT_BUF	0x4
-/* #define XFS_BLI_PDQUOT_BUF	0x8 */
+#define XFS_BLI_PDQUOT_BUF	0x8
 #define	XFS_BLI_GDQUOT_BUF	0x10
 
 #define	XFS_BLI_CHUNK		128
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 593e597c86b..91d764a5a9b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1964,7 +1964,8 @@ xlog_recover_do_reg_buffer(
 		 * probably a good thing to do for other buf types also.
 		 */
 		error = 0;
-		if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+		if (buf_f->blf_flags &
+		   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
@@ -2030,6 +2031,7 @@ xfs_qm_dqcheck(
 	}
 
 	if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER &&
+	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_PROJ &&
 	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
@@ -2135,6 +2137,8 @@ xlog_recover_do_dquot_buffer(
 	type = 0;
 	if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF)
 		type |= XFS_DQ_USER;
+	if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF)
+		type |= XFS_DQ_PROJ;
 	if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF)
 		type |= XFS_DQ_GROUP;
 	/*
@@ -2247,7 +2251,8 @@ xlog_recover_do_buffer_trans(
 	error = 0;
 	if (flags & XFS_BLI_INODE_BUF) {
 		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
-	} else if (flags & (XFS_BLI_UDQUOT_BUF | XFS_BLI_GDQUOT_BUF)) {
+	} else if (flags &
+		  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 		xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
 	} else {
 		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
@@ -2619,7 +2624,7 @@ xlog_recover_do_dquot_trans(
 	 * This type of quotas was turned off, so ignore this record.
 	 */
 	type = INT_GET(recddq->d_flags, ARCH_CONVERT) &
-			(XFS_DQ_USER | XFS_DQ_GROUP);
+			(XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
 	ASSERT(type);
 	if (log->l_quotaoffs_flag & type)
 		return (0);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cd5170ec73a..5affba38a57 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -141,7 +141,7 @@ typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
 typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
 typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
 typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
-			struct xfs_inode *, uid_t, gid_t, uint,
+			struct xfs_inode *, uid_t, gid_t, prid_t, uint,
 			struct xfs_dquot **, struct xfs_dquot **);
 typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
 			struct xfs_dquot *, struct xfs_dquot *);
@@ -185,8 +185,8 @@ typedef struct xfs_qmops {
 	(*(mp)->m_qm_ops.xfs_dqdetach)(ip)
 #define XFS_QM_DQPURGEALL(mp, fl) \
 	(*(mp)->m_qm_ops.xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, fl, dq1, dq2) \
-	(*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, fl, dq1, dq2)
+#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
+	(*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
 #define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
 	(*(mp)->m_qm_ops.xfs_dqvopcreate)(tp, ip, dq1, dq2)
 #define XFS_QM_DQVOPRENAME(mp, ip) \
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 703ec4efcb4..341cb4604c6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -96,7 +96,7 @@ typedef struct xfs_dqblk {
  * flags for q_flags field in the dquot.
  */
 #define XFS_DQ_USER		0x0001		/* a user quota */
-/* #define XFS_DQ_PROJ		0x0002		-- project quota (IRIX) */
+#define XFS_DQ_PROJ		0x0002		/* project quota */
 #define XFS_DQ_GROUP		0x0004		/* a group quota */
 #define XFS_DQ_FLOCKED		0x0008		/* flush lock taken */
 #define XFS_DQ_DIRTY		0x0010		/* dquot is dirty */
@@ -104,6 +104,8 @@ typedef struct xfs_dqblk {
 #define XFS_DQ_INACTIVE		0x0040		/* dq off mplist & hashlist */
 #define XFS_DQ_MARKER		0x0080		/* sentinel */
 
+#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
+
 /*
  * In the worst case, when both user and group quotas are on,
  * we can have a max of three dquots changing in a single transaction.
@@ -124,7 +126,7 @@ typedef struct xfs_dqblk {
 typedef struct xfs_dq_logformat {
 	__uint16_t		qlf_type;      /* dquot log item type */
 	__uint16_t		qlf_size;      /* size of this item */
-	xfs_dqid_t		qlf_id;	       /* usr/grp id number : 32 bits */
+	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
 	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
 	__int32_t		qlf_len;       /* len of dquot buffer */
 	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
@@ -152,9 +154,9 @@ typedef struct xfs_qoff_logformat {
 #define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
 #define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
 #define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
-#define XFS_PQUOTA_ACCT	0x0008  /* (IRIX) project quota accounting ON */
-#define XFS_GQUOTA_ENFD	0x0010  /* group quota limits enforced */
-#define XFS_GQUOTA_CHKD	0x0020  /* quotacheck run on grp quotas */
+#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
+#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
+#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
 #define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
 
 /*
@@ -162,17 +164,22 @@ typedef struct xfs_qoff_logformat {
  * are in the process of getting turned off. These flags are in m_qflags but
  * never in sb_qflags.
  */
-#define XFS_UQUOTA_ACTIVE	0x0080  /* uquotas are being turned off */
-#define XFS_GQUOTA_ACTIVE	0x0100  /* gquotas are being turned off */
+#define XFS_UQUOTA_ACTIVE	0x0100  /* uquotas are being turned off */
+#define XFS_PQUOTA_ACTIVE	0x0200  /* pquotas are being turned off */
+#define XFS_GQUOTA_ACTIVE	0x0400  /* gquotas are being turned off */
 
 /*
  * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
  * quota will be not be switched off as long as that inode lock is held.
  */
 #define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
-						   XFS_GQUOTA_ACTIVE))
+						   XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
+#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
 #define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
 #define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
+#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
 
 /*
  * Flags to tell various functions what to do. Not all of these are meaningful
@@ -182,7 +189,7 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_DQLOCK	0x0000001 /* dqlock */
 #define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
 #define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
-#define XFS_QMOPT_GQUOTA	0x0000008 /* group dquot requested */
+#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
 #define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_DQSUSER	0x0000020 /* don't cache super users dquot */
 #define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
@@ -192,6 +199,7 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if necessary */
 #define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot, if damaged. */
+#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 
 /*
  * flags to xfs_trans_mod_dquot to indicate which field needs to be
@@ -231,7 +239,8 @@ typedef struct xfs_qoff_logformat {
 #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
 
 
-#define XFS_QMOPT_QUOTALL	(XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA)
+#define XFS_QMOPT_QUOTALL	\
+		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
 #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
 
 #ifdef __KERNEL__
@@ -246,21 +255,33 @@ typedef struct xfs_qoff_logformat {
  */
 #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
 				     (ip)->i_udquot == NULL) || \
-				    (XFS_IS_GQUOTA_ON(mp) && \
+				    (XFS_IS_OQUOTA_ON(mp) && \
 				     (ip)->i_gdquot == NULL))
 
-#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \
-				     (mp->m_sb.sb_qflags & \
-				      XFS_UQUOTA_CHKD) == 0) || \
-				    (XFS_IS_GQUOTA_ON(mp) && \
-				     (mp->m_sb.sb_qflags & \
-				      XFS_GQUOTA_CHKD) == 0))
+#define XFS_QM_NEED_QUOTACHECK(mp) \
+	((XFS_IS_UQUOTA_ON(mp) && \
+		(mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \
+	 (XFS_IS_GQUOTA_ON(mp) && \
+		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
+		 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \
+	 (XFS_IS_PQUOTA_ON(mp) && \
+		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
+		 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT))))
+
+#define XFS_MOUNT_QUOTA_SET1	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
+
+#define XFS_MOUNT_QUOTA_SET2	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
 
 #define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
-				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD)
+				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\
+				 XFS_GQUOTA_ACCT)
 #define XFS_MOUNT_QUOTA_MASK	(XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
-				 XFS_GQUOTA_ACTIVE)
+				 XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
 
 
 /*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index a9682b9510c..144da7a8546 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -976,6 +976,7 @@ xfs_trans_dquot_buf(
 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 	ASSERT(type == XFS_BLI_UDQUOT_BUF ||
+	       type == XFS_BLI_PDQUOT_BUF ||
 	       type == XFS_BLI_GDQUOT_BUF);
 
 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index d1f8146a06e..11351f08d43 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -428,7 +428,7 @@ xfs_truncate_file(
 		if (ip->i_ino != mp->m_sb.sb_uquotino)
 			ASSERT(ip->i_udquot);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		if (ip->i_ino != mp->m_sb.sb_gquotino)
 			ASSERT(ip->i_gdquot);
 	}
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fb1ae6cfb1f..10350049834 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -367,16 +367,6 @@ xfs_finish_flags(
 		return XFS_ERROR(EROFS);
 	}
 
-	/*
-	 * disallow mount attempts with (IRIX) project quota enabled
-	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) {
-		cmn_err(CE_WARN,
-	"XFS: cannot mount a filesystem with IRIX project quota enabled");
-		return XFS_ERROR(ENOSYS);
-	}
-
 	/*
 	 * check for shared mount.
 	 */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5703783991f..695978b2749 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -351,21 +351,28 @@ xfs_setattr(
 	 * If the IDs do change before we take the ilock, we're covered
 	 * because the i_*dquot fields will get updated anyway.
 	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
+	if (XFS_IS_QUOTA_ON(mp) &&
+	    (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
 		uint	qflags = 0;
 
-		if (mask & XFS_AT_UID) {
+		if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
 			uid = vap->va_uid;
 			qflags |= XFS_QMOPT_UQUOTA;
 		} else {
 			uid = ip->i_d.di_uid;
 		}
-		if (mask & XFS_AT_GID) {
+		if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
 			gid = vap->va_gid;
 			qflags |= XFS_QMOPT_GQUOTA;
 		}  else {
 			gid = ip->i_d.di_gid;
 		}
+		if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
+			projid = vap->va_projid;
+			qflags |= XFS_QMOPT_PQUOTA;
+		}  else {
+			projid = ip->i_d.di_projid;
+		}
 		/*
 		 * We take a reference when we initialize udqp and gdqp,
 		 * so it is important that we never blindly double trip on
@@ -373,7 +380,8 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
+					 &udqp, &gdqp);
 		if (code)
 			return (code);
 	}
@@ -510,10 +518,11 @@ xfs_setattr(
 			goto error_return;
 		}
 		/*
-		 * Do a quota reservation only if uid or gid is actually
+		 * Do a quota reservation only if uid/projid/gid is actually
 		 * going to change.
 		 */
 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		    (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
 			ASSERT(tp);
 			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -774,6 +783,7 @@ xfs_setattr(
 		}
 		if (igid != gid) {
 			if (XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & XFS_AT_GID);
 				ASSERT(gdqp);
 				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
@@ -782,6 +792,13 @@ xfs_setattr(
 			ip->i_d.di_gid = gid;
 		}
 		if (iprojid != projid) {
+			if (XFS_IS_PQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_GQUOTA_ON(mp));
+				ASSERT(mask & XFS_AT_PROJID);
+				ASSERT(gdqp);
+				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
 			ip->i_d.di_projid = projid;
 			/*
 			 * We may have to rev the inode as well as
@@ -1907,7 +1924,7 @@ xfs_create(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -2812,7 +2829,7 @@ xfs_mkdir(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -3366,7 +3383,7 @@ xfs_symlink(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
-- 
cgit v1.2.3


From 365ca83d509f77f2095976edb8d10ca6e9d86d58 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:39:12 +1000
Subject: [XFS] Add support for project quota inheritance, a merge of Glens
 changes.

SGI-PV: 932952
SGI-Modid: xfs-linux:xfs-kern:22806a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c    | 24 +++++++++++++++---------
 fs/xfs/xfs_vnodeops.c | 37 ++++++++++++++++++++++---------------
 2 files changed, 37 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a2c723baff0..34bdf590968 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1202,26 +1202,32 @@ xfs_ialloc(
 	case S_IFREG:
 	case S_IFDIR:
 		if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-			if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
-				if ((mode & S_IFMT) == S_IFDIR) {
-					ip->i_d.di_flags |= XFS_DIFLAG_RTINHERIT;
-				} else {
-					ip->i_d.di_flags |= XFS_DIFLAG_REALTIME;
+			uint	di_flags = 0;
+
+			if ((mode & S_IFMT) == S_IFDIR) {
+				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
+					di_flags |= XFS_DIFLAG_RTINHERIT;
+			} else {
+				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
+					di_flags |= XFS_DIFLAG_REALTIME;
 					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
 				}
 			}
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
 			    xfs_inherit_noatime)
-				ip->i_d.di_flags |= XFS_DIFLAG_NOATIME;
+				di_flags |= XFS_DIFLAG_NOATIME;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
 			    xfs_inherit_nodump)
-				ip->i_d.di_flags |= XFS_DIFLAG_NODUMP;
+				di_flags |= XFS_DIFLAG_NODUMP;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
 			    xfs_inherit_sync)
-				ip->i_d.di_flags |= XFS_DIFLAG_SYNC;
+				di_flags |= XFS_DIFLAG_SYNC;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
 			    xfs_inherit_nosymlinks)
-				ip->i_d.di_flags |= XFS_DIFLAG_NOSYMLINKS;
+				di_flags |= XFS_DIFLAG_NOSYMLINKS;
+			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+				di_flags |= XFS_DIFLAG_PROJINHERIT;
+			ip->i_d.di_flags |= di_flags;
 		}
 		/* FALLTHROUGH */
 	case S_IFLNK:
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 695978b2749..96dc18b73cf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -507,8 +507,6 @@ xfs_setattr(
 		 * that the group ID supplied to the chown() function
 		 * shall be equal to either the group ID or one of the
 		 * supplementary group IDs of the calling process.
-		 *
-		 * XXX: How does restricted_chown affect projid?
 		 */
 		if (restricted_chown &&
 		    (iuid != uid || (igid != gid &&
@@ -860,6 +858,8 @@ xfs_setattr(
 				di_flags |= XFS_DIFLAG_NOATIME;
 			if (vap->va_xflags & XFS_XFLAG_NODUMP)
 				di_flags |= XFS_DIFLAG_NODUMP;
+			if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
+				di_flags |= XFS_DIFLAG_PROJINHERIT;
 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1915,7 +1915,9 @@ xfs_create(
 	/* Return through std_return after this point. */
 
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
@@ -2621,17 +2623,7 @@ xfs_link(
 	if (src_vp->v_type == VDIR)
 		return XFS_ERROR(EPERM);
 
-	/*
-	 * For now, manually find the XFS behavior descriptor for
-	 * the source vnode.  If it doesn't exist then something
-	 * is wrong and we should just return an error.
-	 * Eventually we need to figure out how link is going to
-	 * work in the face of stacked vnodes.
-	 */
 	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
-	if (src_bdp == NULL) {
-		return XFS_ERROR(EXDEV);
-	}
 	sip = XFS_BHVTOI(src_bdp);
 	tdp = XFS_BHVTOI(target_dir_bdp);
 	mp = tdp->i_mount;
@@ -2698,6 +2690,17 @@ xfs_link(
 		goto error_return;
 	}
 
+	/*
+	 * If we are using project inheritance, we only allow hard link
+	 * creation in our tree when the project IDs are the same; else
+	 * the tree quota mechanism could be circumvented.
+	 */
+	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+		     (tdp->i_d.di_projid != sip->i_d.di_projid))) {
+		error = XFS_ERROR(EPERM);
+		goto error_return;
+	}
+
 	if (resblks == 0 &&
 	    (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name,
 			target_namelen)))
@@ -2820,7 +2823,9 @@ xfs_mkdir(
 
 	mp = dp->i_mount;
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
@@ -3374,7 +3379,9 @@ xfs_symlink(
 	/* Return through std_return after this point. */
 
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
-- 
cgit v1.2.3


From 6add2c4288801bd7fd0a4cc3277de7688f1f6714 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Tue, 21 Jun 2005 15:39:44 +1000
Subject: [XFS] Fix up some warning fallout from functions made static

SGI-PV: 936255
SGI-Modid: xfs-linux:xfs-kern:193691a

Signed-off-by: Eric Sandeen <sandeen@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_btree.c     | 2 +-
 fs/xfs/xfs_dir2_leaf.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 797f4d96cdf..0cc63d657a1 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -93,7 +93,7 @@ xfs_btree_maxrecs(
  * Retrieve the block pointer from the cursor at the given level.
  * This may be a bmap btree root or from a buffer.
  */
-xfs_btree_block_t *			/* generic btree block pointer */
+STATIC xfs_btree_block_t *			/* generic btree block pointer */
 xfs_btree_get_block(
 	xfs_btree_cur_t		*cur,	/* btree cursor */
 	int			level,	/* level in btree */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 79918df6ffe..056f5283904 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -79,7 +79,7 @@ static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
 				    int *indexp, xfs_dabuf_t **dbpp);
 static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
 				    int first, int last);
-extern void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
 
 
 /*
-- 
cgit v1.2.3


From 48fab6bf5f8baf0d16b20a35e537719d14b66275 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:40:20 +1000
Subject: [XFS] add XFS_INOBT_IS_FREE_DISK

SGI-PV: 928382
SGI-Modid: xfs-linux:xfs-kern:193778a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_ialloc_btree.h | 8 ++++++--
 fs/xfs/xfs_macros.c       | 5 +++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 803c4d17a05..44be188674a 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -100,9 +100,13 @@ xfs_inofree_t xfs_inobt_mask(int i);
 #endif
 #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE)
 int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i);
-#define	XFS_INOBT_IS_FREE(rp,i)	xfs_inobt_is_free(rp,i)
+#define	XFS_INOBT_IS_FREE(rp,i)		xfs_inobt_is_free(rp,i)
+#define	XFS_INOBT_IS_FREE_DISK(rp,i)	xfs_inobt_is_free_disk(rp,i)
 #else
-#define	XFS_INOBT_IS_FREE(rp,i)	(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
+#define	XFS_INOBT_IS_FREE(rp,i)	\
+	(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
+#define XFS_INOBT_IS_FREE_DISK(rp,i) \
+	((INT_GET((rp)->ir_free, ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0)
 #endif
 #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE)
 void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i);
diff --git a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c
index ce4f46c6b3a..698c2cd6285 100644
--- a/fs/xfs/xfs_macros.c
+++ b/fs/xfs/xfs_macros.c
@@ -1658,6 +1658,11 @@ xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i)
 {
 	return XFS_INOBT_IS_FREE(rp, i);
 }
+int
+xfs_inobt_is_free_disk(xfs_inobt_rec_t *rp, int i)
+{
+	return XFS_INOBT_IS_FREE_DISK(rp, i);
+}
 #endif
 
 #if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_LAST_REC)
-- 
cgit v1.2.3


From f898d6c09caa40d82203acd72e9fda3cd5aeae74 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:40:48 +1000
Subject: [XFS] quiesce the filesystem proper when freezing

SGI-PV: 936977
SGI-Modid: xfs-linux:xfs-kern:193840a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c |  6 +++--
 fs/xfs/linux-2.6/xfs_vfs.h   |  1 +
 fs/xfs/xfs_vfsops.c          | 60 ++++++++++++++++++++++++++------------------
 3 files changed, 40 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d5f0340ddcd..5fe9af38aa2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -590,8 +590,10 @@ linvfs_sync_super(
 	int		error;
 	int		flags = SYNC_FSDATA;
 
-	if (wait)
-		flags |= SYNC_WAIT;
+	if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
+		flags = SYNC_QUIESCE;
+	else
+		flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
 
 	VFS_SYNC(vfsp, flags, NULL, error);
 	sb->s_dirt = 0;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 76493991578..7ee1f714e9b 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -107,6 +107,7 @@ typedef enum {
 #define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
+#define SYNC_QUIESCE		0x0100  /* quiesce fileystem for a snapshot */
 
 typedef int	(*vfs_mount_t)(bhv_desc_t *,
 				struct xfs_mount_args *, struct cred *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 10350049834..42bcc021520 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -612,7 +612,34 @@ out:
 	return XFS_ERROR(error);
 }
 
-#define REMOUNT_READONLY_FLAGS	(SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
+STATIC int
+xfs_quiesce_fs(
+	xfs_mount_t		*mp)
+{
+	int			count = 0, pincount;
+		
+	xfs_refcache_purge_mp(mp);
+	xfs_flush_buftarg(mp->m_ddev_targp, 0);
+	xfs_finish_reclaim_all(mp, 0);
+
+	/* This loop must run at least twice.
+	 * The first instance of the loop will flush
+	 * most meta data but that will generate more
+	 * meta data (typically directory updates).
+	 * Which then must be flushed and logged before
+	 * we can write the unmount record.
+	 */ 
+	do {
+		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
+		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+		if (!pincount) {
+			delay(50);
+			count++;
+		}
+	} while (count < 2);
+
+	return 0;
+}
 
 STATIC int
 xfs_mntupdate(
@@ -622,8 +649,7 @@ xfs_mntupdate(
 {
 	struct vfs	*vfsp = bhvtovfs(bdp);
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
-	int		pincount, error;
-	int		count = 0;
+	int		error;
 
 	if (args->flags & XFSMNT_NOATIME)
 		mp->m_flags |= XFS_MOUNT_NOATIME;
@@ -635,25 +661,7 @@ xfs_mntupdate(
 	}
 
 	if (*flags & MS_RDONLY) {
-		xfs_refcache_purge_mp(mp);
-		xfs_flush_buftarg(mp->m_ddev_targp, 0);
-		xfs_finish_reclaim_all(mp, 0);
-
-		/* This loop must run at least twice.
-		 * The first instance of the loop will flush
-		 * most meta data but that will generate more
-		 * meta data (typically directory updates).
-		 * Which then must be flushed and logged before
-		 * we can write the unmount record.
-		 */ 
-		do {
-			VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
-			pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-			if (!pincount) {
-				delay(50);
-				count++;
-			}
-		} while (count < 2);
+		xfs_quiesce_fs(mp);
 
 		/* Ok now write out an unmount record */
 		xfs_log_unmount_write(mp);
@@ -869,10 +877,12 @@ xfs_sync(
 	int		flags,
 	cred_t		*credp)
 {
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
-	mp = XFS_BHVTOM(bdp);
-	return (xfs_syncsub(mp, flags, 0, NULL));
+	if (unlikely(flags == SYNC_QUIESCE))
+		return xfs_quiesce_fs(mp);
+	else
+		return xfs_syncsub(mp, flags, 0, NULL);
 }
 
 /*
-- 
cgit v1.2.3


From 7d795ca3442c7a562c45aeb7a7a808c79992a589 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:41:19 +1000
Subject: [XFS] consolidate extent item freeing

SGI-PV: 938062
SGI-Modid: xfs-linux:xfs-kern:194415a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_extfree_item.c | 122 ++++++++++++----------------------------------
 fs/xfs/xfs_extfree_item.h |   2 +
 fs/xfs/xfs_log_recover.c  |  14 ++----
 3 files changed, 35 insertions(+), 103 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 4b45ff739e6..db7cbd1bc85 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -59,6 +59,18 @@ STATIC void	xfs_efi_item_abort(xfs_efi_log_item_t *);
 STATIC void	xfs_efd_item_abort(xfs_efd_log_item_t *);
 
 
+void
+xfs_efi_item_free(xfs_efi_log_item_t *efip)
+{
+	int nexts = efip->efi_format.efi_nextents;
+
+	if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
+		kmem_free(efip, sizeof(xfs_efi_log_item_t) +
+				(nexts - 1) * sizeof(xfs_extent_t));
+	} else {
+		kmem_zone_free(xfs_efi_zone, efip);
+	}
+}
 
 /*
  * This returns the number of iovecs needed to log the given efi item.
@@ -120,8 +132,6 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
 STATIC void
 xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	SPLDECL(s);
 
@@ -132,21 +142,11 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
 /*
@@ -159,8 +159,6 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 STATIC void
 xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	xfs_log_item_desc_t	*lidp;
 	SPLDECL(s);
@@ -178,23 +176,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-		/*
-		 * now free the item itself
-		 */
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
 /*
@@ -245,18 +231,7 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
 STATIC void
 xfs_efi_item_abort(xfs_efi_log_item_t *efip)
 {
-	int	nexts;
-	int	size;
-
-	nexts = efip->efi_format.efi_nextents;
-	if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efi_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efip, size);
-	} else {
-		kmem_zone_free(xfs_efi_zone, efip);
-	}
-	return;
+	xfs_efi_item_free(efip);
 }
 
 /*
@@ -355,8 +330,6 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 {
 	xfs_mount_t	*mp;
 	int		extents_left;
-	uint		size;
-	int		nexts;
 	SPLDECL(s);
 
 	mp = efip->efi_item.li_mountp;
@@ -372,20 +345,10 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
+		xfs_efi_item_free(efip);
 	} else {
 		AIL_UNLOCK(mp, s);
 	}
-
-	if (extents_left == 0) {
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
-	}
 }
 
 /*
@@ -398,8 +361,6 @@ STATIC void
 xfs_efi_cancel(
 	xfs_efi_log_item_t	*efip)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	SPLDECL(s);
 
@@ -410,26 +371,25 @@ xfs_efi_cancel(
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_CANCELED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
+STATIC void
+xfs_efd_item_free(xfs_efd_log_item_t *efdp)
+{
+	int nexts = efdp->efd_format.efd_nextents;
 
-
-
+	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
+		kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
+				(nexts - 1) * sizeof(xfs_extent_t));
+	} else {
+		kmem_zone_free(xfs_efd_zone, efdp);
+	}
+}
 
 /*
  * This returns the number of iovecs needed to log the given efd item.
@@ -533,9 +493,6 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
 STATIC xfs_lsn_t
 xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 {
-	uint	size;
-	int	nexts;
-
 	/*
 	 * If we got a log I/O error, it's always the case that the LR with the
 	 * EFI got unpinned and freed before the EFD got aborted.
@@ -543,15 +500,7 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 	if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
 		xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
 
-	nexts = efdp->efd_format.efd_nextents;
-	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efd_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efdp, size);
-	} else {
-		kmem_zone_free(xfs_efd_zone, efdp);
-	}
-
+	xfs_efd_item_free(efdp);
 	return (xfs_lsn_t)-1;
 }
 
@@ -565,9 +514,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 STATIC void
 xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
 {
-	int	nexts;
-	int	size;
-
 	/*
 	 * If we got a log I/O error, it's always the case that the LR with the
 	 * EFI got unpinned and freed before the EFD got aborted. So don't
@@ -576,15 +522,7 @@ xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
 	if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
 		xfs_efi_cancel(efdp->efd_efip);
 
-	nexts = efdp->efd_format.efd_nextents;
-	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efd_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efdp, size);
-	} else {
-		kmem_zone_free(xfs_efd_zone, efdp);
-	}
-	return;
+	xfs_efd_item_free(efdp);
 }
 
 /*
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 7122d6101d1..d433bac9f59 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -118,6 +118,8 @@ xfs_efi_log_item_t	*xfs_efi_init(struct xfs_mount *, uint);
 xfs_efd_log_item_t	*xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
 				      uint);
 
+void			xfs_efi_item_free(xfs_efi_log_item_t *);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 91d764a5a9b..0aac28ddb81 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2747,7 +2747,6 @@ xlog_recover_do_efd_trans(
 	xfs_efi_log_item_t	*efip = NULL;
 	xfs_log_item_t		*lip;
 	int			gen;
-	int			nexts;
 	__uint64_t		efi_id;
 	SPLDECL(s);
 
@@ -2782,22 +2781,15 @@ xlog_recover_do_efd_trans(
 		}
 		lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
 	}
-	if (lip == NULL) {
-		AIL_UNLOCK(mp, s);
-	}
 
 	/*
 	 * If we found it, then free it up.  If it wasn't there, it
 	 * must have been overwritten in the log.  Oh well.
 	 */
 	if (lip != NULL) {
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			kmem_free(lip, sizeof(xfs_efi_log_item_t) +
-				  ((nexts - 1) * sizeof(xfs_extent_t)));
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
+	} else {
+		AIL_UNLOCK(mp, s);
 	}
 }
 
-- 
cgit v1.2.3


From d130c14c0310edac5ea0c6327bef7e3715f9a083 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:43:22 +1000
Subject: [XFS] simplify ASSERT

SGI-PV: 938063
SGI-Modid: xfs-linux:xfs-kern:194416a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/support/debug.c | 1 -
 fs/xfs/support/debug.h | 7 +------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 7d6e1f37df1..4ed7b6928cd 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -36,7 +36,6 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 
-int			doass = 1;
 static char		message[256];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
 
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 40b0f4c54d9..c5b9365a7e2 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -50,16 +50,11 @@ extern void cmn_err(int, char *, ...);
 #endif
 
 #ifdef DEBUG
-# ifdef lint
-#  define ASSERT(EX)	((void)0) /* avoid "constant in conditional" babble */
-# else
-#  define ASSERT(EX) ((!doass||(EX))?((void)0):assfail(#EX, __FILE__, __LINE__))
-# endif	/* lint */
+# define ASSERT(EX)	((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
 #else
 # define ASSERT(x)	((void)0)
 #endif
 
-extern int doass;		/* dynamically turn off asserts */
 extern void assfail(char *, char *, int);
 #ifdef DEBUG
 extern unsigned long random(void);
-- 
cgit v1.2.3


From bd5a876ac4c130e8e1986dcdbb21839ae4cd91c0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Tue, 21 Jun 2005 15:47:39 +1000
Subject: [XFS] (mostly) remove xfs_inval_cached_pages  Since the last round of
 direct I/O locking changes it is just a wrapper around VOP_FLUSHINVAL_PAGES,
 so it's not nessecary anymore.  Keep a simplified version for kernels <
 2.4.22, as these don't have the changed direct I/O locking.

SGI-PV: 938064
SGI-Modid: xfs-linux:xfs-kern:194420a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 24 ------------------------
 fs/xfs/linux-2.6/xfs_lrw.h |  2 --
 fs/xfs/xfs_dfrag.c         |  7 ++++---
 fs/xfs/xfs_vnodeops.c      | 16 +++++++++++++---
 4 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index aa9daaea6c3..46b61a859af 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -209,30 +209,6 @@ unlock:
 	return (-status);
 }
 
-/*
- * xfs_inval_cached_pages
- * 
- * This routine is responsible for keeping direct I/O and buffered I/O
- * somewhat coherent.  From here we make sure that we're at least
- * temporarily holding the inode I/O lock exclusively and then call
- * the page cache to flush and invalidate any cached pages.  If there
- * are no cached pages this routine will be very quick.
- */
-void
-xfs_inval_cached_pages(
-	vnode_t		*vp,
-	xfs_iocore_t	*io,
-	xfs_off_t	offset,
-	int		write,
-	int		relock)
-{
-	if (VN_CACHED(vp)) {
-		xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
-		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
-	}
-
-}
-
 ssize_t			/* bytes read, or (-)  error */
 xfs_read(
 	bhv_desc_t		*bdp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index d723e35254a..f197a720e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,8 +94,6 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t);
-extern void xfs_inval_cached_pages(struct vnode	*, struct xfs_iocore *,
-				xfs_off_t, int, int);
 extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 63abdc2ac7f..681be5c93af 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -180,9 +180,10 @@ xfs_swapext(
 		goto error0;
 	}
 
-	if (VN_CACHED(tvp) != 0)
-		xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore),
-						(xfs_off_t)0, 0, 0);
+	if (VN_CACHED(tvp) != 0) {
+		xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
+		VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED);
+	}
 
 	/* Verify O_DIRECT for ftmp */
 	if (VN_CACHED(tvp) != 0) {
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 96dc18b73cf..d64ebcfa0b6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4329,6 +4329,7 @@ xfs_free_file_space(
 	xfs_off_t		len,
 	int			attr_flags)
 {
+	vnode_t			*vp;
 	int			committed;
 	int			done;
 	xfs_off_t		end_dmi_offset;
@@ -4349,9 +4350,11 @@ xfs_free_file_space(
 	xfs_trans_t		*tp;
 	int			need_iolock = 1;
 
-	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
+	vp = XFS_ITOV(ip);
 	mp = ip->i_mount;
 
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
@@ -4368,7 +4371,7 @@ xfs_free_file_space(
 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_d.di_size)
 			end_dmi_offset = ip->i_d.di_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
 				offset, end_dmi_offset - offset,
 				AT_DELAY_FLAG(attr_flags), NULL);
 		if (error)
@@ -4387,7 +4390,14 @@ xfs_free_file_space(
 	ioffset = offset & ~(rounding - 1);
 	if (ilen & (rounding - 1))
 		ilen = (ilen + rounding) & ~(rounding - 1);
-	xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0);
+
+	if (VN_CACHED(vp) != 0) {
+		xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
+				ctooff(offtoct(ioffset)), -1);
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)),
+				-1, FI_REMAPF_LOCKED);
+	}
+
 	/*
 	 * Need to zero the stuff we're not freeing, on disk.
 	 * If its a realtime file & can't use unwritten extents then we
-- 
cgit v1.2.3


From 77bc5beb5977a166e41b87c9d55d8e9cf2b3a04f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:48:04 +1000
Subject: [XFS] Makes more sense to use the fsxattr interface instead of adding
 new ioctls for project IDs.

SGI-PV: 938145
SGI-Modid: xfs-linux:xfs-kern:22899a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c   | 33 ++++++++-------------------------
 fs/xfs/linux-2.6/xfs_ioctl32.c |  2 --
 fs/xfs/xfs_fs.h                |  5 ++---
 3 files changed, 10 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index df17d93bd09..05a447e51cc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -777,8 +777,6 @@ xfs_ioctl(
 	case XFS_IOC_GETVERSION:
 	case XFS_IOC_GETXFLAGS:
 	case XFS_IOC_SETXFLAGS:
-	case XFS_IOC_GETPROJID:
-	case XFS_IOC_SETPROJID:
 	case XFS_IOC_FSGETXATTR:
 	case XFS_IOC_FSSETXATTR:
 	case XFS_IOC_FSGETXATTRA:
@@ -1176,7 +1174,8 @@ xfs_ioc_xattr(
 
 	switch (cmd) {
 	case XFS_IOC_FSGETXATTR: {
-		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+			     XFS_AT_NEXTENTS | XFS_AT_PROJID;
 		VOP_GETATTR(vp, &va, 0, NULL, error);
 		if (error)
 			return -error;
@@ -1184,6 +1183,7 @@ xfs_ioc_xattr(
 		fa.fsx_xflags	= va.va_xflags;
 		fa.fsx_extsize	= va.va_extsize;
 		fa.fsx_nextents = va.va_nextents;
+		fa.fsx_projid	= va.va_projid;
 
 		if (copy_to_user(arg, &fa, sizeof(fa)))
 			return -XFS_ERROR(EFAULT);
@@ -1198,9 +1198,10 @@ xfs_ioc_xattr(
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
 		va.va_xflags  = fa.fsx_xflags;
 		va.va_extsize = fa.fsx_extsize;
+		va.va_projid  = fa.fsx_projid;
 
 		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
 		if (!error)
@@ -1209,7 +1210,8 @@ xfs_ioc_xattr(
 	}
 
 	case XFS_IOC_FSGETXATTRA: {
-		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+			     XFS_AT_ANEXTENTS | XFS_AT_PROJID;
 		VOP_GETATTR(vp, &va, 0, NULL, error);
 		if (error)
 			return -error;
@@ -1217,6 +1219,7 @@ xfs_ioc_xattr(
 		fa.fsx_xflags	= va.va_xflags;
 		fa.fsx_extsize	= va.va_extsize;
 		fa.fsx_nextents = va.va_anextents;
+		fa.fsx_projid	= va.va_projid;
 
 		if (copy_to_user(arg, &fa, sizeof(fa)))
 			return -XFS_ERROR(EFAULT);
@@ -1260,26 +1263,6 @@ xfs_ioc_xattr(
 		return 0;
 	}
 
-	case XFS_IOC_GETPROJID: {
-		va.va_mask = XFS_AT_PROJID;
-		VOP_GETATTR(vp, &va, 0, NULL, error);
-		if (error)
-			return -error;
-		if (copy_to_user(arg, &va.va_projid, sizeof(va.va_projid)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_SETPROJID: {
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		va.va_mask = XFS_AT_PROJID;
-		if (copy_from_user(&va.va_projid, arg, sizeof(va.va_projid)))
-			return -XFS_ERROR(EFAULT);
-		VOP_SETATTR(vp, &va, 0, NULL, error);
-		return -error;
-	}
-
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index be72aca5944..0f8f1384eb3 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -100,8 +100,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 	case XFS_IOC_GETBMAP:
 	case XFS_IOC_GETBMAPA:
 	case XFS_IOC_GETBMAPX:
-	case XFS_IOC_SETPROJID:
-	case XFS_IOC_GETPROJID:
 /* not handled
 	case XFS_IOC_FD_TO_HANDLE:
 	case XFS_IOC_PATH_TO_HANDLE:
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index a7bd4687fa5..095af0a5cff 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -60,7 +60,8 @@ struct fsxattr {
 	__u32		fsx_xflags;	/* xflags field value (get/set) */
 	__u32		fsx_extsize;	/* extsize field value (get/set)*/
 	__u32		fsx_nextents;	/* nextents field value (get)	*/
-	unsigned char	fsx_pad[16];
+	__u32		fsx_projid;	/* project identifier (get/set) */
+	unsigned char	fsx_pad[12];
 };
 #endif
 
@@ -477,8 +478,6 @@ typedef struct xfs_handle {
 /*	XFS_IOC_SETBIOSIZE ---- deprecated 46	   */
 /*	XFS_IOC_GETBIOSIZE ---- deprecated 47	   */
 #define XFS_IOC_GETBMAPX	_IOWR('X', 56, struct getbmap)
-#define XFS_IOC_SETPROJID	_IOWR('X', 57, __uint32_t)
-#define XFS_IOC_GETPROJID	_IOWR('X', 58, __uint32_t)
 
 /*
  * ioctl commands that replace IRIX syssgi()'s
-- 
cgit v1.2.3


From 06d10dd9ca70ff1318ff2b871ff5f61a94223d9f Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:48:47 +1000
Subject: [XFS] Merge fixes into realtime quota code, since one/two reported,
 still not enabled though.

SGI-PV: 938145
SGI-Modid: xfs-linux:xfs-kern:22900a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm.c          |  34 +++++++------
 fs/xfs/quota/xfs_qm.h          |   6 ++-
 fs/xfs/quota/xfs_quota_priv.h  |   1 +
 fs/xfs/quota/xfs_trans_dquot.c |  50 +++++++++---------
 fs/xfs/xfs_bmap.c              | 112 +++++++++++++++++++++++------------------
 fs/xfs/xfs_iomap.c             |  65 ++++++++++--------------
 fs/xfs/xfs_quota.h             |  11 +---
 fs/xfs/xfs_vnodeops.c          |   5 +-
 8 files changed, 143 insertions(+), 141 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3ea75972767..3254cb7b87f 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1251,6 +1251,10 @@ xfs_qm_init_quotainfo(
 				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
 				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
 				XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit =
+				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ?
+				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) :
+				XFS_QM_RTBWARNLIMIT;
 		qinf->qi_bhardlimit =
 				INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
 		qinf->qi_bsoftlimit =
@@ -1276,6 +1280,7 @@ xfs_qm_init_quotainfo(
 		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
 		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
 		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
 	}
 
 	return (0);
@@ -2624,6 +2629,9 @@ xfs_qm_vop_chown(
 	xfs_dquot_t	*newdq)
 {
 	xfs_dquot_t	*prevdq;
+	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
+				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
 	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
@@ -2632,20 +2640,12 @@ xfs_qm_vop_chown(
 	ASSERT(prevdq);
 	ASSERT(prevdq != newdq);
 
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    -(ip->i_d.di_nblocks));
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    -1);
+	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
 
 	/* the sparkling new dquot */
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    ip->i_d.di_nblocks);
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    1);
+	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
 
 	/*
 	 * Take an extra reference, because the inode
@@ -2673,7 +2673,7 @@ xfs_qm_vop_chown_reserve(
 {
 	int		error;
 	xfs_mount_t	*mp;
-	uint		delblks;
+	uint		delblks, blkflags;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
 
 	ASSERT(XFS_ISLOCKED_INODE(ip));
@@ -2682,6 +2682,8 @@ xfs_qm_vop_chown_reserve(
 
 	delblks = ip->i_delayed_blks;
 	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+	blkflags = XFS_IS_REALTIME_INODE(ip) ?
+			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
 
 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
 	    ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
@@ -2711,7 +2713,7 @@ xfs_qm_vop_chown_reserve(
 
 	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
 				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-				flags | XFS_QMOPT_RES_REGBLKS)))
+				flags | blkflags)))
 		return (error);
 
 	/*
@@ -2728,11 +2730,11 @@ xfs_qm_vop_chown_reserve(
 		ASSERT(unresudq || unresgdq);
 		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
 				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-				flags | XFS_QMOPT_RES_REGBLKS)))
+				flags | blkflags)))
 			return (error);
 		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
 				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-				XFS_QMOPT_RES_REGBLKS);
+				blkflags);
 	}
 
 	return (0);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 78196877954..b03eecf3b6c 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -133,8 +133,9 @@ typedef struct xfs_quotainfo {
 	time_t		 qi_btimelimit;	 /* limit for blks timer */
 	time_t		 qi_itimelimit;	 /* limit for inodes timer */
 	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
-	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for num warnings */
-	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for num warnings */
+	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
+	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
+	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
 	mutex_t		 qi_quotaofflock;/* to serialize quotaoff */
 	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
 	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
@@ -176,6 +177,7 @@ typedef struct xfs_dquot_acct {
 
 #define XFS_QM_BWARNLIMIT	5
 #define XFS_QM_IWARNLIMIT	5
+#define XFS_QM_RTBWARNLIMIT	5
 
 #define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock, PINOD))
 #define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 472afd3570c..bf413e70ec0 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -56,6 +56,7 @@
 #define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
 #define XFS_QI_ITIMELIMIT(mp)	((mp)->m_quotainfo->qi_itimelimit)
 #define XFS_QI_BWARNLIMIT(mp)	((mp)->m_quotainfo->qi_bwarnlimit)
+#define XFS_QI_RTBWARNLIMIT(mp)	((mp)->m_quotainfo->qi_rtbwarnlimit)
 #define XFS_QI_IWARNLIMIT(mp)	((mp)->m_quotainfo->qi_iwarnlimit)
 #define XFS_QI_QOFFLOCK(mp)	((mp)->m_quotainfo->qi_quotaofflock)
 
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 565efb73c23..3b99daf8a64 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -497,7 +497,7 @@ xfs_trans_apply_dquot_deltas(
 			 * Adjust the RT reservation.
 			 */
 			if (qtrx->qt_rtblk_res != 0) {
-				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+				if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
 					if (qtrx->qt_rtblk_res >
 					    qtrx->qt_rtblk_res_used)
 					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
@@ -530,12 +530,6 @@ xfs_trans_apply_dquot_deltas(
 					    (xfs_qcnt_t)qtrx->qt_icount_delta;
 			}
 
-
-#ifdef QUOTADEBUG
-			if (qtrx->qt_rtblk_res != 0)
-				cmn_err(CE_DEBUG, "RT res %d for 0x%p\n",
-					(int) qtrx->qt_rtblk_res, dqp);
-#endif
 			ASSERT(dqp->q_res_bcount >=
 				INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
 			ASSERT(dqp->q_res_icount >=
@@ -636,7 +630,10 @@ xfs_trans_dqresv(
 	int		error;
 	xfs_qcnt_t	hardlimit;
 	xfs_qcnt_t	softlimit;
-	time_t		btimer;
+	time_t		timer;
+	xfs_qwarncnt_t	warns;
+	xfs_qwarncnt_t	warnlimit;
+	xfs_qcnt_t	count;
 	xfs_qcnt_t	*resbcountp;
 	xfs_quotainfo_t	*q = mp->m_quotainfo;
 
@@ -651,7 +648,9 @@ xfs_trans_dqresv(
 		softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
 		if (!softlimit)
 			softlimit = q->qi_bsoftlimit;
-		btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+		timer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+		warns = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT);
+		warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_bcount;
 	} else {
 		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -661,7 +660,9 @@ xfs_trans_dqresv(
 		softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
 		if (!softlimit)
 			softlimit = q->qi_rtbsoftlimit;
-		btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+		timer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+		warns = INT_GET(dqp->q_core.d_rtbwarns, ARCH_CONVERT);
+		warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_rtbcount;
 	}
 	error = 0;
@@ -691,37 +692,36 @@ xfs_trans_dqresv(
 				 * If timer or warnings has expired,
 				 * return EDQUOT
 				 */
-				if ((btimer != 0 && get_seconds() > btimer) ||
-				    (dqp->q_core.d_bwarns &&
-				     INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >=
-				     XFS_QI_BWARNLIMIT(dqp->q_mount))) {
+				if ((timer != 0 && get_seconds() > timer) ||
+				    (warns != 0 && warns >= warnlimit)) {
 					error = EDQUOT;
 					goto error_return;
 				}
 			}
 		}
 		if (ninos > 0) {
-			hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT);
+			count = INT_GET(dqp->q_core.d_icount, ARCH_CONVERT);
+			timer = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT);
+			warns = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT);
+			warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount);
+			hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit,
+						ARCH_CONVERT);
 			if (!hardlimit)
 				hardlimit = q->qi_ihardlimit;
-			softlimit = INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT);
+			softlimit = INT_GET(dqp->q_core.d_ino_softlimit,
+						ARCH_CONVERT);
 			if (!softlimit)
 				softlimit = q->qi_isoftlimit;
-			if (hardlimit > 0ULL &&
-			    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= hardlimit) {
+			if (hardlimit > 0ULL && count >= hardlimit) {
 				error = EDQUOT;
 				goto error_return;
-			} else if (softlimit > 0ULL &&
-				   INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= softlimit) {
+			} else if (softlimit > 0ULL && count >= softlimit) {
 				/*
 				 * If timer or warnings has expired,
 				 * return EDQUOT
 				 */
-				if ((dqp->q_core.d_itimer &&
-				     get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) ||
-				    (dqp->q_core.d_iwarns &&
-				     INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >=
-				     XFS_QI_IWARNLIMIT(dqp->q_mount))) {
+				if ((timer != 0 && get_seconds() > timer) ||
+				     (warns != 0 && warns >= warnlimit)) {
 					error = EDQUOT;
 					goto error_return;
 				}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index f6f5ad35734..6f5d283888a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4545,18 +4545,17 @@ xfs_bmapi(
 	xfs_extlen_t	alen;		/* allocated extent length */
 	xfs_fileoff_t	aoff;		/* allocated file offset */
 	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
-	char		contig;		/* allocation must be one extent */
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
-	char		delay;		/* this request is for delayed alloc */
 	xfs_fileoff_t	end;		/* end of mapped file region */
 	int		eof;		/* we've hit the end of extent list */
+	char		contig;		/* allocation must be one extent */
+	char		delay;		/* this request is for delayed alloc */
+	char		exact;		/* don't do all of wasdelayed extent */
 	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
 	int		error;		/* error return */
-	char		exact;		/* don't do all of wasdelayed extent */
 	xfs_bmbt_irec_t	got;		/* current extent list record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extlen_t	indlen;		/* indirect blocks length */
-	char		inhole;		/* current location is hole in file */
 	xfs_extnum_t	lastx;		/* last useful extent number */
 	int		logflags;	/* flags for transaction logging */
 	xfs_extlen_t	minleft;	/* min blocks left after allocation */
@@ -4567,13 +4566,15 @@ xfs_bmapi(
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	xfs_fileoff_t	obno;		/* old block number (offset) */
 	xfs_bmbt_irec_t	prev;		/* previous extent list record */
-	char		stateless;	/* ignore state flag set */
 	int		tmp_logflags;	/* temp flags holder */
+	int		whichfork;	/* data or attr fork */
+	char		inhole;		/* current location is hole in file */
+	char		stateless;	/* ignore state flag set */
 	char		trim;		/* output trimmed to match range */
 	char		userdata;	/* allocating non-metadata */
 	char		wasdelay;	/* old extent was delayed */
-	int		whichfork;	/* data or attr fork */
 	char		wr;		/* this is a write request */
+	char		rt;		/* this is a realtime file */
 	char		rsvd;		/* OK to allocate reserved blocks */
 #ifdef DEBUG
 	xfs_fileoff_t	orig_bno;	/* original block number value */
@@ -4603,6 +4604,7 @@ xfs_bmapi(
 	}
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
+	rt = XFS_IS_REALTIME_INODE(ip);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_ext_max ==
 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4707,9 +4709,16 @@ xfs_bmapi(
 			}
 			minlen = contig ? alen : 1;
 			if (delay) {
-				indlen = (xfs_extlen_t)
-					xfs_bmap_worst_indlen(ip, alen);
-				ASSERT(indlen > 0);
+				xfs_extlen_t	extsz = 0;
+
+				/* Figure out the extent size, adjust alen */
+				if (rt) {
+					if (!(extsz = ip->i_d.di_extsize))
+						extsz = mp->m_sb.sb_rextsize;
+					alen = roundup(alen, extsz);
+					extsz = alen / mp->m_sb.sb_rextsize;
+				}
+
 				/*
 				 * Make a transaction-less quota reservation for
 				 * delayed allocation blocks. This number gets
@@ -4717,8 +4726,10 @@ xfs_bmapi(
 				 * We return EDQUOT if we haven't allocated
 				 * blks already inside this loop;
 				 */
-				if (XFS_TRANS_RESERVE_BLKQUOTA(
-						mp, NULL, ip, (long)alen)) {
+				if (XFS_TRANS_RESERVE_QUOTA_NBLKS(
+						mp, NULL, ip, (long)alen, 0,
+						rt ? XFS_QMOPT_RES_RTBLKS :
+						     XFS_QMOPT_RES_REGBLKS)) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -4731,40 +4742,34 @@ xfs_bmapi(
 				 * Split changing sb for alen and indlen since
 				 * they could be coming from different places.
 				 */
-				if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
-					xfs_extlen_t	extsz;
-					xfs_extlen_t	ralen;
-					if (!(extsz = ip->i_d.di_extsize))
-						extsz = mp->m_sb.sb_rextsize;
-					ralen = roundup(alen, extsz);
-					ralen = ralen / mp->m_sb.sb_rextsize;
-					if (xfs_mod_incore_sb(mp,
-						XFS_SBS_FREXTENTS,
-						-(ralen), rsvd)) {
-						if (XFS_IS_QUOTA_ON(ip->i_mount))
-							XFS_TRANS_UNRESERVE_BLKQUOTA(
-						     		mp, NULL, ip,
-								(long)alen);
-						break;
-					}
-				} else {
-					if (xfs_mod_incore_sb(mp,
-							      XFS_SBS_FDBLOCKS,
-							      -(alen), rsvd)) {
-						if (XFS_IS_QUOTA_ON(ip->i_mount))
-							XFS_TRANS_UNRESERVE_BLKQUOTA(
-								mp, NULL, ip,
-								(long)alen);
-						break;
-					}
-				}
+				indlen = (xfs_extlen_t)
+					xfs_bmap_worst_indlen(ip, alen);
+				ASSERT(indlen > 0);
 
-				if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
-						-(indlen), rsvd)) {
-					XFS_TRANS_UNRESERVE_BLKQUOTA(
-						mp, NULL, ip, (long)alen);
+				if (rt)
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FREXTENTS,
+							-(extsz), rsvd);
+				else
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FDBLOCKS,
+							-(alen), rsvd);
+				if (!error)
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FDBLOCKS,
+							-(indlen), rsvd);
+
+				if (error) {
+					if (XFS_IS_QUOTA_ON(ip->i_mount))
+						/* unreserve the blocks now */
+						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
+							mp, NULL, ip,
+							(long)alen, 0, rt ?
+							XFS_QMOPT_RES_RTBLKS :
+							XFS_QMOPT_RES_REGBLKS);
 					break;
 				}
+
 				ip->i_delayed_blks += alen;
 				abno = NULLSTARTBLOCK(indlen);
 			} else {
@@ -5389,13 +5394,24 @@ xfs_bunmapi(
 		}
 		if (wasdel) {
 			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-			xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
-				(int)del.br_blockcount, rsvd);
-			/* Unreserve our quota space */
-			XFS_TRANS_RESERVE_QUOTA_NBLKS(
-				mp, NULL, ip, -((long)del.br_blockcount), 0,
-				isrt ?	XFS_QMOPT_RES_RTBLKS :
+			/* Update realtim/data freespace, unreserve quota */
+			if (isrt) {
+				xfs_filblks_t rtexts;
+
+				rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
+				do_div(rtexts, mp->m_sb.sb_rextsize);
+				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+						(int)rtexts, rsvd);
+				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+					-((long)del.br_blockcount), 0,
+					XFS_QMOPT_RES_RTBLKS);
+			} else {
+				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+						(int)del.br_blockcount, rsvd);
+				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+					-((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
+			}
 			ip->i_delayed_blks -= del.br_blockcount;
 			if (cur)
 				cur->bc_private.b.flags |=
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 469e1a7939d..2edd6769e5d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -385,15 +385,15 @@ xfs_iomap_write_direct(
 	int		nimaps, maps;
 	int		error;
 	int		bmapi_flag;
+	int		quota_flag;
 	int		rt;
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
 	xfs_bmap_free_t free_list;
 	int		aeof;
-	xfs_filblks_t	datablocks;
+	xfs_filblks_t	datablocks, qblocks, resblks;
 	int		committed;
 	int		numrtextents;
-	uint		resblks;
 
 	/*
 	 * Make sure that the dquots are there. This doesn't hold
@@ -419,7 +419,6 @@ xfs_iomap_write_direct(
 		xfs_fileoff_t	map_last_fsb;
 
 		map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-
 		if (map_last_fsb < last_fsb) {
 			last_fsb = map_last_fsb;
 			count_fsb = last_fsb - offset_fsb;
@@ -428,56 +427,47 @@ xfs_iomap_write_direct(
 	}
 
 	/*
-	 * determine if reserving space on
-	 * the data or realtime partition.
+	 * Determine if reserving space on the data or realtime partition.
 	 */
 	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-		int	sbrtextsize, iprtextsize;
+		xfs_extlen_t	extsz;
 
-		sbrtextsize = mp->m_sb.sb_rextsize;
-		iprtextsize =
-			ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize;
-		numrtextents = (count_fsb + iprtextsize - 1);
-		do_div(numrtextents, sbrtextsize);
+		if (!(extsz = ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+		numrtextents = qblocks = (count_fsb + extsz - 1);
+		do_div(numrtextents, mp->m_sb.sb_rextsize);
+		quota_flag = XFS_QMOPT_RES_RTBLKS;
 		datablocks = 0;
 	} else {
-		datablocks = count_fsb;
+		datablocks = qblocks = count_fsb;
+		quota_flag = XFS_QMOPT_RES_REGBLKS;
 		numrtextents = 0;
 	}
 
 	/*
-	 * allocate and setup the transaction
+	 * Allocate and setup the transaction
 	 */
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-
 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
-
 	error = xfs_trans_reserve(tp, resblks,
 			XFS_WRITE_LOG_RES(mp), numrtextents,
 			XFS_TRANS_PERM_LOG_RES,
 			XFS_WRITE_LOG_COUNT);
 
 	/*
-	 * check for running out of space
+	 * Check for running out of space, note: need lock to return
 	 */
 	if (error)
-		/*
-		 * Free the transaction structure.
-		 */
 		xfs_trans_cancel(tp, 0);
-
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
 	if (error)
-		goto error_out; /* Don't return in above if .. trans ..,
-					need lock to return */
+		goto error_out;
 
-	if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) {
+	if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
 		error = (EDQUOT);
 		goto error1;
 	}
-	nimaps = 1;
 
 	bmapi_flag = XFS_BMAPI_WRITE;
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -487,31 +477,29 @@ xfs_iomap_write_direct(
 		bmapi_flag |= XFS_BMAPI_PREALLOC;
 
 	/*
-	 * issue the bmapi() call to allocate the blocks
+	 * Issue the bmapi() call to allocate the blocks
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
+	nimaps = 1;
 	imapp = &imap[0];
 	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
 		bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list);
-	if (error) {
+	if (error)
 		goto error0;
-	}
 
 	/*
-	 * complete the transaction
+	 * Complete the transaction
 	 */
-
 	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
-	if (error) {
+	if (error)
 		goto error0;
-	}
-
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (error) {
+	if (error)
 		goto error_out;
-	}
 
-	/* copy any maps to caller's array and return any error. */
+	/*
+	 * Copy any maps to caller's array and return any error.
+	 */
 	if (nimaps == 0) {
 		error = (ENOSPC);
 		goto error_out;
@@ -530,10 +518,11 @@ xfs_iomap_write_direct(
         }
 	return 0;
 
- error0:	/* Cancel bmap, unlock inode, and cancel trans */
+error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
+	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
 
- error1:	/* Just cancel transaction */
+error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 	*nmaps = 0;	/* nothing set-up here */
 
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 341cb4604c6..7134576ae7f 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -352,15 +352,8 @@ typedef struct xfs_dqtrxops {
 #define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
 	XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
 
-#define XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
-				XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_RESERVE_BLKQUOTA_FORCE(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
-				XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES)
-#define XFS_TRANS_UNRESERVE_BLKQUOTA(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), 0, \
-				XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
 #define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
 	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
 				f | XFS_QMOPT_RES_REGBLKS)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d64ebcfa0b6..1377c868f3f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4175,9 +4175,8 @@ retry:
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
-				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+				ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
 		if (error)
 			goto error1;
 
-- 
cgit v1.2.3


From 754002b4fb1b553bd8f978bb6f5aca7af46fde67 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:49:06 +1000
Subject: [XFS] Merge a few minor fixes to the quota warning code.

SGI-PV: 938145
SGI-Modid: xfs-linux:xfs-kern:22901a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot.c       | 10 +++++++++-
 fs/xfs/quota/xfs_qm.c          |  2 ++
 fs/xfs/quota/xfs_qm_syscalls.c | 24 +++++++++++++++++++++---
 3 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 68089f56d5c..32ec7f4467d 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -286,7 +286,9 @@ xfs_qm_adjust_dqlimits(
  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
  * enforcement's off.
  * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
  */
 void
 xfs_qm_adjust_dqtimers(
@@ -315,6 +317,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_btimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_BTIMELIMIT(mp));
+		} else {
+			d->d_bwarns = 0;
 		}
 	} else {
 		if ((!d->d_blk_softlimit ||
@@ -336,6 +340,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_itimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_ITIMELIMIT(mp));
+		} else {
+			d->d_iwarns = 0;
 		}
 	} else {
 		if ((!d->d_ino_softlimit ||
@@ -357,6 +363,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_rtbtimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_RTBTIMELIMIT(mp));
+		} else {
+			d->d_rtbwarns = 0;
 		}
 	} else {
 		if ((!d->d_rtb_softlimit ||
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3254cb7b87f..f665ca8f9e9 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1550,8 +1550,10 @@ xfs_qm_reset_dqcounts(
 		INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
 		INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
 		INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
+		INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0);
 		INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
 		INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
+		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL);
 		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
 	}
 
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 365a054f02d..68e98962dbe 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -617,7 +617,8 @@ xfs_qm_scall_setqlim(
 	if (!capable(CAP_SYS_ADMIN))
 		return XFS_ERROR(EPERM);
 
-	if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0)
+	if ((newlim->d_fieldmask &
+	    (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
 		return (0);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -702,12 +703,23 @@ xfs_qm_scall_setqlim(
 		qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
 	}
 
+	/*
+	 * Update warnings counter(s) if requested
+	 */
+	if (newlim->d_fieldmask & FS_DQ_BWARNS)
+		INT_SET(ddq->d_bwarns, ARCH_CONVERT, newlim->d_bwarns);
+	if (newlim->d_fieldmask & FS_DQ_IWARNS)
+		INT_SET(ddq->d_iwarns, ARCH_CONVERT, newlim->d_iwarns);
+	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, newlim->d_rtbwarns);
+
 	if (id == 0) {
 		/*
 		 * Timelimits for the super user set the relative time
 		 * the other users can be over quota for this file system.
 		 * If it is zero a default is used.  Ditto for the default
-		 * soft and hard limit values (already done, above).
+		 * soft and hard limit values (already done, above), and
+		 * for warnings.
 		 */
 		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
 			mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
@@ -721,7 +733,13 @@ xfs_qm_scall_setqlim(
 			mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
 			INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
 		}
-	} else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ {
+		if (newlim->d_fieldmask & FS_DQ_BWARNS)
+			mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns;
+		if (newlim->d_fieldmask & FS_DQ_IWARNS)
+			mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns;
+		if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+			mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns;
+	} else {
 		/*
 		 * If the user is now over quota, start the timelimit.
 		 * The user will not be 'warned'.
-- 
cgit v1.2.3


From ad89d0212e32c5cf27dfcbad67b91a32b9878529 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Tue, 21 Jun 2005 15:57:57 +1000
Subject: [XFS] Remove some debugging code from quota syscalls.

SGI-PV: 932952
SGI-Modid: xfs-linux-melb:xfs-kern:22929a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_dquot.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 32ec7f4467d..46ce1e3ce1d 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -911,7 +911,6 @@ xfs_qm_dqget(
 	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
 	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
 	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-printk("XQM: ESRCH1\n");
 		return (ESRCH);
 	}
 	h = XFS_DQ_HASH(mp, id, type);
@@ -989,7 +988,6 @@ printk("XQM: ESRCH1\n");
 				  &dqp))) {
 		if (ip)
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
-if (error == ESRCH) printk("XQM: ESRCH2\n");
 		return (error);
 	}
 
@@ -1015,7 +1013,6 @@ if (error == ESRCH) printk("XQM: ESRCH2\n");
 		if (! XFS_IS_DQTYPE_ON(mp, type)) {
 			/* inode stays locked on return */
 			xfs_qm_dqdestroy(dqp);
-printk("XQM: ESRCH3\n");
 			return XFS_ERROR(ESRCH);
 		}
 		/*
-- 
cgit v1.2.3


From 2c6e5a839f92591a4bc6cac4a575d42151645af3 Mon Sep 17 00:00:00 2001
From: Greg KH <gregkh@suse.de>
Date: Tue, 21 Jun 2005 15:24:19 -0700
Subject: [PATCH] devfs: remove devfs from Kconfig preventing it from being
 built

Here's a much smaller patch to simply disable devfs from the build.  If
this goes well, and there are no complaints for a few weeks, I'll resend
my big "devfs-die-die-die" series of patches that rip the whole thing
out of the kernel tree.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig | 50 --------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 6a4ad4bb7a5..178e27494b7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -741,56 +741,6 @@ config SYSFS
 
 	Designers of embedded systems may wish to say N here to conserve space.
 
-config DEVFS_FS
-	bool "/dev file system support (OBSOLETE)"
-	depends on EXPERIMENTAL
-	help
-	  This is support for devfs, a virtual file system (like /proc) which
-	  provides the file system interface to device drivers, normally found
-	  in /dev. Devfs does not depend on major and minor number
-	  allocations. Device drivers register entries in /dev which then
-	  appear automatically, which means that the system administrator does
-	  not have to create character and block special device files in the
-	  /dev directory using the mknod command (or MAKEDEV script) anymore.
-
-	  This is work in progress. If you want to use this, you *must* read
-	  the material in <file:Documentation/filesystems/devfs/>, especially
-	  the file README there.
-
-	  Note that devfs no longer manages /dev/pts!  If you are using UNIX98
-	  ptys, you will also need to mount the /dev/pts filesystem (devpts).
-
-	  Note that devfs has been obsoleted by udev,
-	  <http://www.kernel.org/pub/linux/utils/kernel/hotplug/>.
-	  It has been stripped down to a bare minimum and is only provided for
-	  legacy installations that use its naming scheme which is
-	  unfortunately different from the names normal Linux installations
-	  use.
-
-	  If unsure, say N.
-
-config DEVFS_MOUNT
-	bool "Automatically mount at boot"
-	depends on DEVFS_FS
-	help
-	  This option appears if you have CONFIG_DEVFS_FS enabled. Setting
-	  this to 'Y' will make the kernel automatically mount devfs onto /dev
-	  when the system is booted, before the init thread is started.
-	  You can override this with the "devfs=nomount" boot option.
-
-	  If unsure, say N.
-
-config DEVFS_DEBUG
-	bool "Debug devfs"
-	depends on DEVFS_FS
-	help
-	  If you say Y here, then the /dev file system code will generate
-	  debugging messages. See the file
-	  <file:Documentation/filesystems/devfs/boot-options> for more
-	  details.
-
-	  If unsure, say N.
-
 config DEVPTS_FS_XATTR
 	bool "/dev/pts Extended Attributes"
 	depends on UNIX98_PTYS
-- 
cgit v1.2.3


From e1a40fa907498030b6e432c0dbcb06d7a9f14ee3 Mon Sep 17 00:00:00 2001
From: Dean Roehrich <roehrich@sgi.com>
Date: Wed, 22 Jun 2005 10:20:44 +1000
Subject: [XFS] Handle inode semaphores properly for dmapi queues

SGI-PV: 931572
SGI-Modid: xfs-linux-melb:xfs-kern:189560a

Signed-off-by: Dean Roehrich <roehrich@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_lrw.c | 10 ++++++++--
 fs/xfs/xfs_dmapi.h         | 23 ++++++++++++-----------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 46b61a859af..acab58c4804 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -280,10 +280,11 @@ xfs_read(
 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
 	    !(ioflags & IO_INVIS)) {
 		vrwlock_t locktype = VRWLOCK_READ;
+		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
 
 		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
 					BHV_TO_VNODE(bdp), *offset, size,
-					FILP_DELAY_FLAG(file), &locktype);
+					dmflags, &locktype);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			goto unlock_isem;
@@ -843,11 +844,15 @@ retry:
 	    !(ioflags & IO_INVIS)) {
 
 		xfs_rwunlock(bdp, locktype);
+		if (need_isem)
+			up(&inode->i_sem);
 		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
-			goto out_unlock_isem;
+			goto out_nounlocks;
+		if (need_isem)
+			down(&inode->i_sem);
 		xfs_rwlock(bdp, locktype);
 		pos = xip->i_d.di_size;
 		ret = 0;
@@ -962,6 +967,7 @@ retry:
  out_unlock_isem:
 	if (need_isem)
 		up(&inode->i_sem);
+ out_nounlocks:
 	return -error;
 }
 
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 16cf9f7a478..55c17adaaa3 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,27 +166,32 @@ typedef enum {
 #define DM_FLAGS_NDELAY		0x001	/* return EAGAIN after dm_pending() */
 #define DM_FLAGS_UNWANTED	0x002	/* event not in fsys dm_eventset_t */
 #define DM_FLAGS_ISEM		0x004	/* thread holds i_sem */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)
-/* i_alloc_sem was added in 2.4.22-pre1 */
 #define DM_FLAGS_IALLOCSEM_RD	0x010	/* thread holds i_alloc_sem rd */
 #define DM_FLAGS_IALLOCSEM_WR	0x020	/* thread holds i_alloc_sem wr */
-#endif
-#endif
 
 /*
  *	Based on IO_ISDIRECT, decide which i_ flag is set.
  */
-#ifdef DM_FLAGS_IALLOCSEM_RD
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
+#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
+			      DM_FLAGS_ISEM : 0)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
+    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
 			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM)
 #define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
-#else
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
 			      0 : DM_FLAGS_ISEM)
 #define DM_SEM_FLAG_WR	(DM_FLAGS_ISEM)
 #endif
 
+
 /*
  *	Macros to turn caller specified delay/block flags into
  *	dm_send_xxxx_event flag DM_FLAGS_NDELAY.
@@ -209,8 +214,4 @@ void xfs_dm_exit(struct file_system_type *);
 #define XFS_DM_EXIT(fstype)
 #endif
 
-#define HAVE_XFS_DM_MM
-int xfs_dm_mm_get(struct vm_area_struct *vma);
-void xfs_dm_mm_put(struct vm_area_struct *vma);
-
 #endif  /* __XFS_DMAPI_H__ */
-- 
cgit v1.2.3


From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 21 Jun 2005 17:14:34 -0700
Subject: [PATCH] smp_processor_id() cleanup

This patch implements a number of smp_processor_id() cleanup ideas that
Arjan van de Ven and I came up with.

The previous __smp_processor_id/_smp_processor_id/smp_processor_id API
spaghetti was hard to follow both on the implementational and on the
usage side.

Some of the complexity arose from picking wrong names, some of the
complexity comes from the fact that not all architectures defined
__smp_processor_id.

In the new code, there are two externally visible symbols:

 - smp_processor_id(): debug variant.

 - raw_smp_processor_id(): nondebug variant. Replaces all existing
   uses of _smp_processor_id() and __smp_processor_id(). Defined
   by every SMP architecture in include/asm-*/smp.h.

There is one new internal symbol, dependent on DEBUG_PREEMPT:

 - debug_smp_processor_id(): internal debug variant, mapped to
                             smp_processor_id().

Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new
lib/smp_processor_id.c file.  All related comments got updated and/or
clarified.

I have build/boot tested the following 8 .config combinations on x86:

 {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT}

I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT.  (Other
architectures are untested, but should work just fine.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_linux.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 71bb41019a1..7d7c8788ea7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -145,10 +145,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 
-#ifndef __smp_processor_id
-#define __smp_processor_id()	smp_processor_id()
+#ifndef raw_smp_processor_id
+#define raw_smp_processor_id()	smp_processor_id()
 #endif
-#define current_cpu()		__smp_processor_id()
+#define current_cpu()		raw_smp_processor_id()
 #define current_pid()		(current->pid)
 #define current_fsuid(cred)	(current->fsuid)
 #define current_fsgid(cred)	(current->fsgid)
-- 
cgit v1.2.3


From 295ab93497ec703f7d6eaf0787dd9768b83035fe Mon Sep 17 00:00:00 2001
From: Nikita Danilov <nikita@clusterfs.com>
Date: Tue, 21 Jun 2005 17:14:38 -0700
Subject: [PATCH] mm: add /proc/zoneinfo

Add /proc/zoneinfo file to display information about memory zones.  Useful
to analyze VM behaviour.

Signed-off-by: Nikita Danilov <nikita@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a60a3b3d8a7..63a9fbf1ac5 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -219,6 +219,19 @@ static struct file_operations fragmentation_file_operations = {
 	.release	= seq_release,
 };
 
+extern struct seq_operations zoneinfo_op;
+static int zoneinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &zoneinfo_op);
+}
+
+static struct file_operations proc_zoneinfo_file_operations = {
+	.open		= zoneinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 static int version_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -589,6 +602,7 @@ void __init proc_misc_init(void)
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
+	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
-- 
cgit v1.2.3


From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001
From: Wolfgang Wander <wwc@rentec.com>
Date: Tue, 21 Jun 2005 17:14:49 -0700
Subject: [PATCH] Avoiding mmap fragmentation

Ingo recently introduced a great speedup for allocating new mmaps using the
free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and
causes huge performance increases in thread creation.

The downside of this patch is that it does lead to fragmentation in the
mmap-ed areas (visible via /proc/self/maps), such that some applications
that work fine under 2.4 kernels quickly run out of memory on any 2.6
kernel.

The problem is twofold:

  1) the free_area_cache is used to continue a search for memory where
     the last search ended.  Before the change new areas were always
     searched from the base address on.

     So now new small areas are cluttering holes of all sizes
     throughout the whole mmap-able region whereas before small holes
     tended to close holes near the base leaving holes far from the base
     large and available for larger requests.

  2) the free_area_cache also is set to the location of the last
     munmap-ed area so in scenarios where we allocate e.g.  five regions of
     1K each, then free regions 4 2 3 in this order the next request for 1K
     will be placed in the position of the old region 3, whereas before we
     appended it to the still active region 1, placing it at the location
     of the old region 2.  Before we had 1 free region of 2K, now we only
     get two free regions of 1K -> fragmentation.

The patch addresses thes issues by introducing yet another cache descriptor
cached_hole_size that contains the largest known hole size below the
current free_area_cache.  If a new request comes in the size is compared
against the cached_hole_size and if the request can be filled with a hole
below free_area_cache the search is started from the base instead.

The results look promising: Whereas 2.6.12-rc4 fragments quickly and my
(earlier posted) leakme.c test program terminates after 50000+ iterations
with 96 distinct and fragmented maps in /proc/self/maps it performs nicely
(as expected) with thread creation, Ingo's test_str02 with 20000 threads
requires 0.7s system time.

Taking out Ingo's patch (un-patch available per request) by basically
deleting all mentions of free_area_cache from the kernel and starting the
search for new memory always at the respective bases we observe: leakme
terminates successfully with 11 distinctive hardly fragmented areas in
/proc/self/maps but thread creating is gringdingly slow: 30+s(!) system
time for Ingo's test_str02 with 20000 threads.

Now - drumroll ;-) the appended patch works fine with leakme: it ends with
only 7 distinct areas in /proc/self/maps and also thread creation seems
sufficiently fast with 0.71s for 20000 threads.

Signed-off-by: Wolfgang Wander <wwc@rentec.com>
Credit-to: "Richard Purdie" <rpurdie@rpsys.net>
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu> (partly)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_aout.c     | 1 +
 fs/binfmt_elf.c      | 1 +
 fs/hugetlbfs/inode.c | 3 +++
 3 files changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 009b8920c1f..dd9baabaf01 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
 	current->mm->free_area_cache = current->mm->mmap_base;
+	current->mm->cached_hole_size = 0;
 
 	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8f6b6b7617..7976a238f0a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	   change some of these later */
 	set_mm_counter(current->mm, rss, 0);
 	current->mm->free_area_cache = current->mm->mmap_base;
+	current->mm->cached_hole_size = 0;
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 				 executable_stack);
 	if (retval < 0) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2af3338f891..3a9b6d179cb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 	start_addr = mm->free_area_cache;
 
+	if (len <= mm->cached_hole_size)
+		start_addr = TASK_UNMAPPED_BASE;
+
 full_search:
 	addr = ALIGN(start_addr, HPAGE_SIZE);
 
-- 
cgit v1.2.3


From 1ad539b2bd89bf2e129123eb24d5bcc4484a35de Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Tue, 21 Jun 2005 17:14:53 -0700
Subject: [PATCH] vm: try_to_free_pages unused argument

try_to_free_pages accepts a third argument, order, but hasn't used it since
before 2.6.0.  The following patch removes the argument and updates all the
calls to try_to_free_pages.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 7e9e409feaa..0befa724ab9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -528,7 +528,7 @@ static void free_more_memory(void)
 	for_each_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones;
 		if (*zones)
-			try_to_free_pages(zones, GFP_NOFS, 0);
+			try_to_free_pages(zones, GFP_NOFS);
 	}
 }
 
-- 
cgit v1.2.3


From 8680e22f296e75e5497edb660c59c6b4dcfbbd32 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Tue, 21 Jun 2005 17:15:16 -0700
Subject: [PATCH] VFS: memory leak in do_kern_mount()

There is a memory leak during mount when CONFIG_SECURITY is enabled and
mount options are specified.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Acked-by: James Morris <jmorris@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index 3a1b8ca04ba..573bcc81bb8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -835,6 +835,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 	mnt->mnt_parent = mnt;
 	mnt->mnt_namespace = current->namespace;
 	up_write(&sb->s_umount);
+	free_secdata(secdata);
 	put_filesystem(type);
 	return mnt;
 out_sb:
-- 
cgit v1.2.3


From 9b1e3afd6d56937ced3914971621d0f053ea9178 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Jun 2005 17:16:38 -0700
Subject: [PATCH] autofs4: avoid panic on bind mount of autofs owned directory

While this is not a solution to bind and move mounts on autofs owned
directories it is necessary to fix the trady error handling.

At least it avoids the kernel panic I observed checking out bug #4589.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 13 +++++++++++++
 fs/autofs4/expire.c   |  5 +----
 fs/autofs4/root.c     |  6 +++++-
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c7b2b889018..9c09641ce90 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -185,6 +185,19 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
+static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+{
+	int res = 0;
+
+	while (d_mountpoint(*dentry)) {
+		int followed = follow_down(mnt, dentry);
+		if (!followed)
+			break;
+		res = 1;
+	}
+	return res;
+}
+
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 500425e24fb..feb6ac427d0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -56,12 +56,9 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
 	mntget(mnt);
 	dget(dentry);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!autofs4_follow_mount(&mnt, &dentry))
 		goto done;
 
-	while (d_mountpoint(dentry) && follow_down(&mnt, &dentry))
-		;
-
 	/* This is an autofs submount, we can't expire it */
 	if (is_autofs4_dentry(dentry))
 		goto done;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3765c047f15..e137acf0543 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -205,7 +205,11 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		struct vfsmount *fp_mnt = mntget(mnt);
 		struct dentry *fp_dentry = dget(dentry);
 
-		while (follow_down(&fp_mnt, &fp_dentry) && d_mountpoint(fp_dentry));
+		if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
+			dput(fp_dentry);
+			mntput(fp_mnt);
+			return -ENOENT;
+		}
 
 		fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
 		status = PTR_ERR(fp);
-- 
cgit v1.2.3


From cc9acc885819696c0ed00f4f0f0cda0c7583f116 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Jun 2005 17:16:39 -0700
Subject: [PATCH] autofs4: post expire race fix

At the tail end of an expire it's possible for a process to enter
autofs4_wait, with a waitq type of NFY_NONE but find that the expire is
finished.  In this cause autofs4_wait will try to create a new wait but not
notify the daemon leading to a hang.  As the wait type is meant to delay mount
requests from revalidate or lookup during an expire and the expire is done all
we need to do is check if the dentry is a mountpoint.  If it's not then we're
done.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/waitq.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 5a40d36e5a5..fa2348dcd67 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -191,6 +191,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	}
 
 	if ( !wq ) {
+		/* Can't wait for an expire if there's no mount */
+		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
+			kfree(name);
+			up(&sbi->wq_sem);
+			return -ENOENT;
+		}
+
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if ( !wq ) {
-- 
cgit v1.2.3


From 1684b2bba6972749bc9acee57585acd6c78050b2 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Jun 2005 17:16:41 -0700
Subject: [PATCH] autofs4: bad lookup fix

For browsable autofs maps, a mount request that arrives at the same time an
expire is happening can fail to perform the needed mount.

This happens becuase the directory exists and so the revalidate succeeds when
we need it to fail so that lookup is called on the same dentry to do the
mount.  Instead lookup is called on the next path component which should be
whithin the mount, but the parent isn't mounted.

The solution is to allow the revalidate to continue and perform the mount as
no directory creation (at mount time) is needed for browsable mount entries.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/root.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e137acf0543..2a771ec6695 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -306,7 +306,14 @@ static int try_to_fill_dentry(struct dentry *dentry,
 		
 		DPRINTK("expire done status=%d", status);
 		
-		return 0;
+		/*
+		 * If the directory still exists the mount request must
+		 * continue otherwise it can't be followed at the right
+		 * time during the walk.
+		 */
+		status = d_invalidate(dentry);
+		if (status != -EBUSY)
+			return 0;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
-- 
cgit v1.2.3


From 1d372116383f79e42a3eb010c7d6ec3dd28767b3 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:42 -0700
Subject: [PATCH] rock: lindent it

Trying to turn rock.c into something which humans can read so we can fix some
bugs.

Start out by feeding it through scripts/Lindent.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 647 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 356 insertions(+), 291 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 089e79c6558..bbc348f0189 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -20,8 +20,7 @@
  * returns a symbolic link name, and a fourth one returns the extent number
  * for the file. */
 
-#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */
-
+#define SIG(A,B) ((A) | ((B) << 8))	/* isonum_721() */
 
 /* This is a way of ensuring that we have something in the system
    use fields that is compatible with Rock Ridge */
@@ -54,7 +53,7 @@
      CHR+=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
      if (LEN<0) LEN=0;                                          \
   }                                                             \
-}                                     
+}
 
 #define MAYBE_CONTINUE(LABEL,DEV) \
   {if (buffer) { kfree(buffer); buffer = NULL; } \
@@ -85,280 +84,348 @@
   }}
 
 /* return length of name field; 0: not found, -1: to be ignored */
-int get_rock_ridge_filename(struct iso_directory_record * de,
-			    char * retname, struct inode * inode)
+int get_rock_ridge_filename(struct iso_directory_record *de,
+			    char *retname, struct inode *inode)
 {
-  int len;
-  unsigned char * chr;
-  CONTINUE_DECLS;
-  int retnamlen = 0, truncate=0;
- 
-  if (!ISOFS_SB(inode->i_sb)->s_rock) return 0;
-  *retname = 0;
-
-  SETUP_ROCK_RIDGE(de, chr, len);
- repeat:
-  {
-    struct rock_ridge * rr;
-    int sig;
-    
-    while (len > 2){ /* There may be one byte for padding somewhere */
-      rr = (struct rock_ridge *) chr;
-      if (rr->len < 3) goto out; /* Something got screwed up here */
-      sig = isonum_721(chr);
-      chr += rr->len; 
-      len -= rr->len;
-      if (len < 0) goto out;	/* corrupted isofs */
-
-      switch(sig){
-      case SIG('R','R'):
-	if((rr->u.RR.flags[0] & RR_NM) == 0) goto out;
-	break;
-      case SIG('S','P'):
-	CHECK_SP(goto out);
-	break;
-      case SIG('C','E'):
-	CHECK_CE;
-	break;
-      case SIG('N','M'):
-	if (truncate) break;
-	if (rr->len < 5) break;
-        /*
-	 * If the flags are 2 or 4, this indicates '.' or '..'.
-	 * We don't want to do anything with this, because it
-	 * screws up the code that calls us.  We don't really
-	 * care anyways, since we can just use the non-RR
-	 * name.
-	 */
-	if (rr->u.NM.flags & 6) {
-	  break;
-	}
+	int len;
+	unsigned char *chr;
+	CONTINUE_DECLS;
+	int retnamlen = 0, truncate = 0;
 
-	if (rr->u.NM.flags & ~1) {
-	  printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags);
-	  break;
-	}
-	if((strlen(retname) + rr->len - 5) >= 254) {
-	  truncate = 1;
-	  break;
+	if (!ISOFS_SB(inode->i_sb)->s_rock)
+		return 0;
+	*retname = 0;
+
+	SETUP_ROCK_RIDGE(de, chr, len);
+      repeat:
+	{
+		struct rock_ridge *rr;
+		int sig;
+
+		while (len > 2) {	/* There may be one byte for padding somewhere */
+			rr = (struct rock_ridge *)chr;
+			if (rr->len < 3)
+				goto out;	/* Something got screwed up here */
+			sig = isonum_721(chr);
+			chr += rr->len;
+			len -= rr->len;
+			if (len < 0)
+				goto out;	/* corrupted isofs */
+
+			switch (sig) {
+			case SIG('R', 'R'):
+				if ((rr->u.RR.flags[0] & RR_NM) == 0)
+					goto out;
+				break;
+			case SIG('S', 'P'):
+				CHECK_SP(goto out);
+				break;
+			case SIG('C', 'E'):
+				CHECK_CE;
+				break;
+			case SIG('N', 'M'):
+				if (truncate)
+					break;
+				if (rr->len < 5)
+					break;
+				/*
+				 * If the flags are 2 or 4, this indicates '.' or '..'.
+				 * We don't want to do anything with this, because it
+				 * screws up the code that calls us.  We don't really
+				 * care anyways, since we can just use the non-RR
+				 * name.
+				 */
+				if (rr->u.NM.flags & 6) {
+					break;
+				}
+
+				if (rr->u.NM.flags & ~1) {
+					printk
+					    ("Unsupported NM flag settings (%d)\n",
+					     rr->u.NM.flags);
+					break;
+				}
+				if ((strlen(retname) + rr->len - 5) >= 254) {
+					truncate = 1;
+					break;
+				}
+				strncat(retname, rr->u.NM.name, rr->len - 5);
+				retnamlen += rr->len - 5;
+				break;
+			case SIG('R', 'E'):
+				if (buffer)
+					kfree(buffer);
+				return -1;
+			default:
+				break;
+			}
+		}
 	}
-	strncat(retname, rr->u.NM.name, rr->len - 5);
-	retnamlen += rr->len - 5;
-	break;
-      case SIG('R','E'):
-	if (buffer) kfree(buffer);
-	return -1;
-      default:
-	break;
-      }
-    }
-  }
-  MAYBE_CONTINUE(repeat,inode);
-  if (buffer) kfree(buffer);
-  return retnamlen; /* If 0, this file did not have a NM field */
- out:
-  if(buffer) kfree(buffer);
-  return 0;
+	MAYBE_CONTINUE(repeat, inode);
+	if (buffer)
+		kfree(buffer);
+	return retnamlen;	/* If 0, this file did not have a NM field */
+      out:
+	if (buffer)
+		kfree(buffer);
+	return 0;
 }
 
 static int
 parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 				struct inode *inode, int regard_xa)
 {
-  int len;
-  unsigned char * chr;
-  int symlink_len = 0;
-  CONTINUE_DECLS;
-
-  if (!ISOFS_SB(inode->i_sb)->s_rock) return 0;
-
-  SETUP_ROCK_RIDGE(de, chr, len);
-  if (regard_xa)
-   {
-     chr+=14;
-     len-=14;
-     if (len<0) len=0;
-   }
-   
- repeat:
-  {
-    int cnt, sig;
-    struct inode * reloc;
-    struct rock_ridge * rr;
-    int rootflag;
-    
-    while (len > 2){ /* There may be one byte for padding somewhere */
-      rr = (struct rock_ridge *) chr;
-      if (rr->len < 3) goto out; /* Something got screwed up here */
-      sig = isonum_721(chr);
-      chr += rr->len; 
-      len -= rr->len;
-      if (len < 0) goto out;	/* corrupted isofs */
-      
-      switch(sig){
+	int len;
+	unsigned char *chr;
+	int symlink_len = 0;
+	CONTINUE_DECLS;
+
+	if (!ISOFS_SB(inode->i_sb)->s_rock)
+		return 0;
+
+	SETUP_ROCK_RIDGE(de, chr, len);
+	if (regard_xa) {
+		chr += 14;
+		len -= 14;
+		if (len < 0)
+			len = 0;
+	}
+
+      repeat:
+	{
+		int cnt, sig;
+		struct inode *reloc;
+		struct rock_ridge *rr;
+		int rootflag;
+
+		while (len > 2) {	/* There may be one byte for padding somewhere */
+			rr = (struct rock_ridge *)chr;
+			if (rr->len < 3)
+				goto out;	/* Something got screwed up here */
+			sig = isonum_721(chr);
+			chr += rr->len;
+			len -= rr->len;
+			if (len < 0)
+				goto out;	/* corrupted isofs */
+
+			switch (sig) {
 #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
-      case SIG('R','R'):
-	if((rr->u.RR.flags[0] & 
- 	    (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out;
-	break;
+			case SIG('R', 'R'):
+				if ((rr->u.RR.flags[0] &
+				     (RR_PX | RR_TF | RR_SL | RR_CL)) == 0)
+					goto out;
+				break;
 #endif
-      case SIG('S','P'):
-	CHECK_SP(goto out);
-	break;
-      case SIG('C','E'):
-	CHECK_CE;
-	break;
-      case SIG('E','R'):
-	ISOFS_SB(inode->i_sb)->s_rock = 1;
-	printk(KERN_DEBUG "ISO 9660 Extensions: ");
-	{ int p;
-	  for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]);
-	}
-	  printk("\n");
-	break;
-      case SIG('P','X'):
-	inode->i_mode  = isonum_733(rr->u.PX.mode);
-	inode->i_nlink = isonum_733(rr->u.PX.n_links);
-	inode->i_uid   = isonum_733(rr->u.PX.uid);
-	inode->i_gid   = isonum_733(rr->u.PX.gid);
-	break;
-      case SIG('P','N'):
-	{ int high, low;
-	  high = isonum_733(rr->u.PN.dev_high);
-	  low = isonum_733(rr->u.PN.dev_low);
-	  /*
-	   * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4,
-	   * then the high field is unused, and the device number is completely
-	   * stored in the low field.  Some writers may ignore this subtlety,
-	   * and as a result we test to see if the entire device number is
-	   * stored in the low field, and use that.
-	   */
-	  if((low & ~0xff) && high == 0) {
-	    inode->i_rdev = MKDEV(low >> 8, low & 0xff);
-	  } else {
-	    inode->i_rdev = MKDEV(high, low);
-	  }
-	}
-	break;
-      case SIG('T','F'):
-	/* Some RRIP writers incorrectly place ctime in the TF_CREATE field.
-	   Try to handle this correctly for either case. */
-	cnt = 0; /* Rock ridge never appears on a High Sierra disk */
-	if(rr->u.TF.flags & TF_CREATE) { 
-	  inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_ctime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_MODIFY) {
-	  inode->i_mtime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_mtime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_ACCESS) {
-	  inode->i_atime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_atime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_ATTRIBUTES) { 
-	  inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_ctime.tv_nsec = 0;
-	} 
-	break;
-      case SIG('S','L'):
-	{int slen;
-	 struct SL_component * slp;
-	 struct SL_component * oldslp;
-	 slen = rr->len - 5;
-	 slp = &rr->u.SL.link;
-	 inode->i_size = symlink_len;
-	 while (slen > 1){
-	   rootflag = 0;
-	   switch(slp->flags &~1){
-	   case 0:
-	     inode->i_size += slp->len;
-	     break;
-	   case 2:
-	     inode->i_size += 1;
-	     break;
-	   case 4:
-	     inode->i_size += 2;
-	     break;
-	   case 8:
-	     rootflag = 1;
-	     inode->i_size += 1;
-	     break;
-	   default:
-	     printk("Symlink component flag not implemented\n");
-	   }
-	   slen -= slp->len + 2;
-	   oldslp = slp;
-	   slp = (struct SL_component *) (((char *) slp) + slp->len + 2);
-
-	   if(slen < 2) {
-	     if(    ((rr->u.SL.flags & 1) != 0) 
-		    && ((oldslp->flags & 1) == 0) ) inode->i_size += 1;
-	     break;
-	   }
-
-	   /*
-	    * If this component record isn't continued, then append a '/'.
-	    */
-	   if (!rootflag && (oldslp->flags & 1) == 0)
-		   inode->i_size += 1;
-	 }
-	}
-	symlink_len = inode->i_size;
-	break;
-      case SIG('R','E'):
-	printk(KERN_WARNING "Attempt to read inode for relocated directory\n");
-	goto out;
-      case SIG('C','L'):
-	ISOFS_I(inode)->i_first_extent = isonum_733(rr->u.CL.location);
-	reloc = isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0);
-	if (!reloc)
-		goto out;
-	inode->i_mode = reloc->i_mode;
-	inode->i_nlink = reloc->i_nlink;
-	inode->i_uid = reloc->i_uid;
-	inode->i_gid = reloc->i_gid;
-	inode->i_rdev = reloc->i_rdev;
-	inode->i_size = reloc->i_size;
-	inode->i_blocks = reloc->i_blocks;
-	inode->i_atime = reloc->i_atime;
-	inode->i_ctime = reloc->i_ctime;
-	inode->i_mtime = reloc->i_mtime;
-	iput(reloc);
-	break;
+			case SIG('S', 'P'):
+				CHECK_SP(goto out);
+				break;
+			case SIG('C', 'E'):
+				CHECK_CE;
+				break;
+			case SIG('E', 'R'):
+				ISOFS_SB(inode->i_sb)->s_rock = 1;
+				printk(KERN_DEBUG "ISO 9660 Extensions: ");
+				{
+					int p;
+					for (p = 0; p < rr->u.ER.len_id; p++)
+						printk("%c", rr->u.ER.data[p]);
+				}
+				printk("\n");
+				break;
+			case SIG('P', 'X'):
+				inode->i_mode = isonum_733(rr->u.PX.mode);
+				inode->i_nlink = isonum_733(rr->u.PX.n_links);
+				inode->i_uid = isonum_733(rr->u.PX.uid);
+				inode->i_gid = isonum_733(rr->u.PX.gid);
+				break;
+			case SIG('P', 'N'):
+				{
+					int high, low;
+					high = isonum_733(rr->u.PN.dev_high);
+					low = isonum_733(rr->u.PN.dev_low);
+					/*
+					 * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4,
+					 * then the high field is unused, and the device number is completely
+					 * stored in the low field.  Some writers may ignore this subtlety,
+					 * and as a result we test to see if the entire device number is
+					 * stored in the low field, and use that.
+					 */
+					if ((low & ~0xff) && high == 0) {
+						inode->i_rdev =
+						    MKDEV(low >> 8, low & 0xff);
+					} else {
+						inode->i_rdev =
+						    MKDEV(high, low);
+					}
+				}
+				break;
+			case SIG('T', 'F'):
+				/* Some RRIP writers incorrectly place ctime in the TF_CREATE field.
+				   Try to handle this correctly for either case. */
+				cnt = 0;	/* Rock ridge never appears on a High Sierra disk */
+				if (rr->u.TF.flags & TF_CREATE) {
+					inode->i_ctime.tv_sec =
+					    iso_date(rr->u.TF.times[cnt++].time,
+						     0);
+					inode->i_ctime.tv_nsec = 0;
+				}
+				if (rr->u.TF.flags & TF_MODIFY) {
+					inode->i_mtime.tv_sec =
+					    iso_date(rr->u.TF.times[cnt++].time,
+						     0);
+					inode->i_mtime.tv_nsec = 0;
+				}
+				if (rr->u.TF.flags & TF_ACCESS) {
+					inode->i_atime.tv_sec =
+					    iso_date(rr->u.TF.times[cnt++].time,
+						     0);
+					inode->i_atime.tv_nsec = 0;
+				}
+				if (rr->u.TF.flags & TF_ATTRIBUTES) {
+					inode->i_ctime.tv_sec =
+					    iso_date(rr->u.TF.times[cnt++].time,
+						     0);
+					inode->i_ctime.tv_nsec = 0;
+				}
+				break;
+			case SIG('S', 'L'):
+				{
+					int slen;
+					struct SL_component *slp;
+					struct SL_component *oldslp;
+					slen = rr->len - 5;
+					slp = &rr->u.SL.link;
+					inode->i_size = symlink_len;
+					while (slen > 1) {
+						rootflag = 0;
+						switch (slp->flags & ~1) {
+						case 0:
+							inode->i_size +=
+							    slp->len;
+							break;
+						case 2:
+							inode->i_size += 1;
+							break;
+						case 4:
+							inode->i_size += 2;
+							break;
+						case 8:
+							rootflag = 1;
+							inode->i_size += 1;
+							break;
+						default:
+							printk
+							    ("Symlink component flag not implemented\n");
+						}
+						slen -= slp->len + 2;
+						oldslp = slp;
+						slp =
+						    (struct SL_component
+						     *)(((char *)slp) +
+							slp->len + 2);
+
+						if (slen < 2) {
+							if (((rr->u.SL.
+							      flags & 1) != 0)
+							    &&
+							    ((oldslp->
+							      flags & 1) == 0))
+								inode->i_size +=
+								    1;
+							break;
+						}
+
+						/*
+						 * If this component record isn't continued, then append a '/'.
+						 */
+						if (!rootflag
+						    && (oldslp->flags & 1) == 0)
+							inode->i_size += 1;
+					}
+				}
+				symlink_len = inode->i_size;
+				break;
+			case SIG('R', 'E'):
+				printk(KERN_WARNING
+				       "Attempt to read inode for relocated directory\n");
+				goto out;
+			case SIG('C', 'L'):
+				ISOFS_I(inode)->i_first_extent =
+				    isonum_733(rr->u.CL.location);
+				reloc =
+				    isofs_iget(inode->i_sb,
+					       ISOFS_I(inode)->i_first_extent,
+					       0);
+				if (!reloc)
+					goto out;
+				inode->i_mode = reloc->i_mode;
+				inode->i_nlink = reloc->i_nlink;
+				inode->i_uid = reloc->i_uid;
+				inode->i_gid = reloc->i_gid;
+				inode->i_rdev = reloc->i_rdev;
+				inode->i_size = reloc->i_size;
+				inode->i_blocks = reloc->i_blocks;
+				inode->i_atime = reloc->i_atime;
+				inode->i_ctime = reloc->i_ctime;
+				inode->i_mtime = reloc->i_mtime;
+				iput(reloc);
+				break;
 #ifdef CONFIG_ZISOFS
-      case SIG('Z','F'):
-	      if ( !ISOFS_SB(inode->i_sb)->s_nocompress ) {
-		      int algo;
-		      algo = isonum_721(rr->u.ZF.algorithm);
-		      if ( algo == SIG('p','z') ) {
-			      int block_shift = isonum_711(&rr->u.ZF.parms[1]);
-			      if ( block_shift < PAGE_CACHE_SHIFT || block_shift > 17 ) {
-				      printk(KERN_WARNING "isofs: Can't handle ZF block size of 2^%d\n", block_shift);
-			      } else {
-				/* Note: we don't change i_blocks here */
-				      ISOFS_I(inode)->i_file_format = isofs_file_compressed;
-				/* Parameters to compression algorithm (header size, block size) */
-				      ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]);
-				      ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]);
-				      inode->i_size = isonum_733(rr->u.ZF.real_size);
-			      }
-		      } else {
-			      printk(KERN_WARNING "isofs: Unknown ZF compression algorithm: %c%c\n",
-				     rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]);
-		      }
-	      }
-	      break;
+			case SIG('Z', 'F'):
+				if (!ISOFS_SB(inode->i_sb)->s_nocompress) {
+					int algo;
+					algo = isonum_721(rr->u.ZF.algorithm);
+					if (algo == SIG('p', 'z')) {
+						int block_shift =
+						    isonum_711(&rr->u.ZF.
+							       parms[1]);
+						if (block_shift <
+						    PAGE_CACHE_SHIFT
+						    || block_shift > 17) {
+							printk(KERN_WARNING
+							       "isofs: Can't handle ZF block size of 2^%d\n",
+							       block_shift);
+						} else {
+							/* Note: we don't change i_blocks here */
+							ISOFS_I(inode)->
+							    i_file_format =
+							    isofs_file_compressed;
+							/* Parameters to compression algorithm (header size, block size) */
+							ISOFS_I(inode)->
+							    i_format_parm[0] =
+							    isonum_711(&rr->u.
+								       ZF.
+								       parms
+								       [0]);
+							ISOFS_I(inode)->
+							    i_format_parm[1] =
+							    isonum_711(&rr->u.
+								       ZF.
+								       parms
+								       [1]);
+							inode->i_size =
+							    isonum_733(rr->u.ZF.
+								       real_size);
+						}
+					} else {
+						printk(KERN_WARNING
+						       "isofs: Unknown ZF compression algorithm: %c%c\n",
+						       rr->u.ZF.algorithm[0],
+						       rr->u.ZF.algorithm[1]);
+					}
+				}
+				break;
 #endif
-      default:
-	break;
-      }
-    }
-  }
-  MAYBE_CONTINUE(repeat,inode);
- out:
-  if(buffer) kfree(buffer);
-  return 0;
+			default:
+				break;
+			}
+		}
+	}
+	MAYBE_CONTINUE(repeat, inode);
+      out:
+	if (buffer)
+		kfree(buffer);
+	return 0;
 }
 
 static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
@@ -376,32 +443,32 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 			if (slp->len > plimit - rpnt)
 				return NULL;
 			memcpy(rpnt, slp->text, slp->len);
-			rpnt+=slp->len;
+			rpnt += slp->len;
 			break;
 		case 2:
 			if (rpnt >= plimit)
 				return NULL;
-			*rpnt++='.';
+			*rpnt++ = '.';
 			break;
 		case 4:
 			if (2 > plimit - rpnt)
 				return NULL;
-			*rpnt++='.';
-			*rpnt++='.';
+			*rpnt++ = '.';
+			*rpnt++ = '.';
 			break;
 		case 8:
 			if (rpnt >= plimit)
 				return NULL;
 			rootflag = 1;
-			*rpnt++='/';
+			*rpnt++ = '/';
 			break;
 		default:
 			printk("Symlink component flag not implemented (%d)\n",
-			     slp->flags);
+			       slp->flags);
 		}
 		slen -= slp->len + 2;
 		oldslp = slp;
-		slp = (struct SL_component *) ((char *) slp + slp->len + 2);
+		slp = (struct SL_component *)((char *)slp + slp->len + 2);
 
 		if (slen < 2) {
 			/*
@@ -412,7 +479,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 			    !(oldslp->flags & 1)) {
 				if (rpnt >= plimit)
 					return NULL;
-				*rpnt++='/';
+				*rpnt++ = '/';
 			}
 			break;
 		}
@@ -423,24 +490,22 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 		if (!rootflag && !(oldslp->flags & 1)) {
 			if (rpnt >= plimit)
 				return NULL;
-			*rpnt++='/';
+			*rpnt++ = '/';
 		}
 	}
 	return rpnt;
 }
 
-int parse_rock_ridge_inode(struct iso_directory_record * de,
-			   struct inode * inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
 {
-   int result=parse_rock_ridge_inode_internal(de,inode,0);
-   /* if rockridge flag was reset and we didn't look for attributes
-    * behind eventual XA attributes, have a look there */
-   if ((ISOFS_SB(inode->i_sb)->s_rock_offset==-1)
-       &&(ISOFS_SB(inode->i_sb)->s_rock==2))
-     {
-	result=parse_rock_ridge_inode_internal(de,inode,14);
-     }
-   return result;
+	int result = parse_rock_ridge_inode_internal(de, inode, 0);
+	/* if rockridge flag was reset and we didn't look for attributes
+	 * behind eventual XA attributes, have a look there */
+	if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
+	    && (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
+		result = parse_rock_ridge_inode_internal(de, inode, 14);
+	}
+	return result;
 }
 
 /* readpage() for symlinks: reads symlink contents into the page and either
@@ -449,7 +514,7 @@ int parse_rock_ridge_inode(struct iso_directory_record * de,
 static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-        struct iso_inode_info *ei = ISOFS_I(inode);
+	struct iso_inode_info *ei = ISOFS_I(inode);
 	char *link = kmap(page);
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	struct buffer_head *bh;
@@ -472,10 +537,10 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	if (!bh)
 		goto out_noread;
 
-        offset = ei->i_iget5_offset;
-	pnt = (unsigned char *) bh->b_data + offset;
+	offset = ei->i_iget5_offset;
+	pnt = (unsigned char *)bh->b_data + offset;
 
-	raw_inode = (struct iso_directory_record *) pnt;
+	raw_inode = (struct iso_directory_record *)pnt;
 
 	/*
 	 * If we go past the end of the buffer, there is some sort of error.
@@ -489,8 +554,8 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	SETUP_ROCK_RIDGE(raw_inode, chr, len);
 
       repeat:
-	while (len > 2) { /* There may be one byte for padding somewhere */
-		rr = (struct rock_ridge *) chr;
+	while (len > 2) {	/* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(chr);
@@ -555,5 +620,5 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 }
 
 struct address_space_operations isofs_symlink_aops = {
-	.readpage	= rock_ridge_symlink_readpage
+	.readpage = rock_ridge_symlink_readpage
 };
-- 
cgit v1.2.3


From 7fa393a1d3d9485e428a3c74b5599190c14b13db Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:43 -0700
Subject: [PATCH] rock: manual tidies

Fix stuff which Lindent got wrong, rework a few deeply-nested blocks.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 571 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 285 insertions(+), 286 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index bbc348f0189..e981c040a49 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -90,82 +90,78 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 	int len;
 	unsigned char *chr;
 	CONTINUE_DECLS;
-	int retnamlen = 0, truncate = 0;
+	struct rock_ridge *rr;
+	int sig;
+	int retnamlen = 0;
+	int truncate = 0;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
 	*retname = 0;
 
 	SETUP_ROCK_RIDGE(de, chr, len);
-      repeat:
-	{
-		struct rock_ridge *rr;
-		int sig;
-
-		while (len > 2) {	/* There may be one byte for padding somewhere */
-			rr = (struct rock_ridge *)chr;
-			if (rr->len < 3)
-				goto out;	/* Something got screwed up here */
-			sig = isonum_721(chr);
-			chr += rr->len;
-			len -= rr->len;
-			if (len < 0)
-				goto out;	/* corrupted isofs */
-
-			switch (sig) {
-			case SIG('R', 'R'):
-				if ((rr->u.RR.flags[0] & RR_NM) == 0)
-					goto out;
+repeat:
+
+	while (len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)chr;
+		if (rr->len < 3)
+			goto out;	/* Something got screwed up here */
+		sig = isonum_721(chr);
+		chr += rr->len;
+		len -= rr->len;
+		if (len < 0)
+			goto out;	/* corrupted isofs */
+
+		switch (sig) {
+		case SIG('R', 'R'):
+			if ((rr->u.RR.flags[0] & RR_NM) == 0)
+				goto out;
+			break;
+		case SIG('S', 'P'):
+			CHECK_SP(goto out);
+			break;
+		case SIG('C', 'E'):
+			CHECK_CE;
+			break;
+		case SIG('N', 'M'):
+			if (truncate)
 				break;
-			case SIG('S', 'P'):
-				CHECK_SP(goto out);
+			if (rr->len < 5)
 				break;
-			case SIG('C', 'E'):
-				CHECK_CE;
+			/*
+			 * If the flags are 2 or 4, this indicates '.' or '..'.
+			 * We don't want to do anything with this, because it
+			 * screws up the code that calls us.  We don't really
+			 * care anyways, since we can just use the non-RR
+			 * name.
+			 */
+			if (rr->u.NM.flags & 6)
 				break;
-			case SIG('N', 'M'):
-				if (truncate)
-					break;
-				if (rr->len < 5)
-					break;
-				/*
-				 * If the flags are 2 or 4, this indicates '.' or '..'.
-				 * We don't want to do anything with this, because it
-				 * screws up the code that calls us.  We don't really
-				 * care anyways, since we can just use the non-RR
-				 * name.
-				 */
-				if (rr->u.NM.flags & 6) {
-					break;
-				}
 
-				if (rr->u.NM.flags & ~1) {
-					printk
-					    ("Unsupported NM flag settings (%d)\n",
-					     rr->u.NM.flags);
-					break;
-				}
-				if ((strlen(retname) + rr->len - 5) >= 254) {
-					truncate = 1;
-					break;
-				}
-				strncat(retname, rr->u.NM.name, rr->len - 5);
-				retnamlen += rr->len - 5;
+			if (rr->u.NM.flags & ~1) {
+				printk("Unsupported NM flag settings (%d)\n",
+					rr->u.NM.flags);
 				break;
-			case SIG('R', 'E'):
-				if (buffer)
-					kfree(buffer);
-				return -1;
-			default:
+			}
+			if ((strlen(retname) + rr->len - 5) >= 254) {
+				truncate = 1;
 				break;
 			}
+			strncat(retname, rr->u.NM.name, rr->len - 5);
+			retnamlen += rr->len - 5;
+			break;
+		case SIG('R', 'E'):
+			if (buffer)
+				kfree(buffer);
+			return -1;
+		default:
+			break;
 		}
 	}
 	MAYBE_CONTINUE(repeat, inode);
-	if (buffer)
-		kfree(buffer);
+	kfree(buffer);
 	return retnamlen;	/* If 0, this file did not have a NM field */
-      out:
+out:
 	if (buffer)
 		kfree(buffer);
 	return 0;
@@ -178,6 +174,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 	int len;
 	unsigned char *chr;
 	int symlink_len = 0;
+	int cnt, sig;
+	struct inode *reloc;
+	struct rock_ridge *rr;
+	int rootflag;
+
 	CONTINUE_DECLS;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
@@ -191,238 +192,237 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 			len = 0;
 	}
 
-      repeat:
-	{
-		int cnt, sig;
-		struct inode *reloc;
-		struct rock_ridge *rr;
-		int rootflag;
-
-		while (len > 2) {	/* There may be one byte for padding somewhere */
-			rr = (struct rock_ridge *)chr;
-			if (rr->len < 3)
-				goto out;	/* Something got screwed up here */
-			sig = isonum_721(chr);
-			chr += rr->len;
-			len -= rr->len;
-			if (len < 0)
-				goto out;	/* corrupted isofs */
-
-			switch (sig) {
+repeat:
+	while (len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)chr;
+		if (rr->len < 3)
+			goto out;	/* Something got screwed up here */
+		sig = isonum_721(chr);
+		chr += rr->len;
+		len -= rr->len;
+		if (len < 0)
+			goto out;	/* corrupted isofs */
+
+		switch (sig) {
 #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
-			case SIG('R', 'R'):
-				if ((rr->u.RR.flags[0] &
-				     (RR_PX | RR_TF | RR_SL | RR_CL)) == 0)
-					goto out;
-				break;
+		case SIG('R', 'R'):
+			if ((rr->u.RR.flags[0] &
+			     (RR_PX | RR_TF | RR_SL | RR_CL)) == 0)
+				goto out;
+			break;
 #endif
-			case SIG('S', 'P'):
-				CHECK_SP(goto out);
-				break;
-			case SIG('C', 'E'):
-				CHECK_CE;
-				break;
-			case SIG('E', 'R'):
-				ISOFS_SB(inode->i_sb)->s_rock = 1;
-				printk(KERN_DEBUG "ISO 9660 Extensions: ");
-				{
-					int p;
-					for (p = 0; p < rr->u.ER.len_id; p++)
-						printk("%c", rr->u.ER.data[p]);
+		case SIG('S', 'P'):
+			CHECK_SP(goto out);
+			break;
+		case SIG('C', 'E'):
+			CHECK_CE;
+			break;
+		case SIG('E', 'R'):
+			ISOFS_SB(inode->i_sb)->s_rock = 1;
+			printk(KERN_DEBUG "ISO 9660 Extensions: ");
+			{
+				int p;
+				for (p = 0; p < rr->u.ER.len_id; p++)
+					printk("%c", rr->u.ER.data[p]);
+			}
+			printk("\n");
+			break;
+		case SIG('P', 'X'):
+			inode->i_mode = isonum_733(rr->u.PX.mode);
+			inode->i_nlink = isonum_733(rr->u.PX.n_links);
+			inode->i_uid = isonum_733(rr->u.PX.uid);
+			inode->i_gid = isonum_733(rr->u.PX.gid);
+			break;
+		case SIG('P', 'N'):
+			{
+				int high, low;
+				high = isonum_733(rr->u.PN.dev_high);
+				low = isonum_733(rr->u.PN.dev_low);
+				/*
+				 * The Rock Ridge standard specifies that if
+				 * sizeof(dev_t) <= 4, then the high field is
+				 * unused, and the device number is completely
+				 * stored in the low field.  Some writers may
+				 * ignore this subtlety,
+				 * and as a result we test to see if the entire
+				 * device number is
+				 * stored in the low field, and use that.
+				 */
+				if ((low & ~0xff) && high == 0) {
+					inode->i_rdev =
+					    MKDEV(low >> 8, low & 0xff);
+				} else {
+					inode->i_rdev =
+					    MKDEV(high, low);
 				}
-				printk("\n");
-				break;
-			case SIG('P', 'X'):
-				inode->i_mode = isonum_733(rr->u.PX.mode);
-				inode->i_nlink = isonum_733(rr->u.PX.n_links);
-				inode->i_uid = isonum_733(rr->u.PX.uid);
-				inode->i_gid = isonum_733(rr->u.PX.gid);
-				break;
-			case SIG('P', 'N'):
-				{
-					int high, low;
-					high = isonum_733(rr->u.PN.dev_high);
-					low = isonum_733(rr->u.PN.dev_low);
-					/*
-					 * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4,
-					 * then the high field is unused, and the device number is completely
-					 * stored in the low field.  Some writers may ignore this subtlety,
-					 * and as a result we test to see if the entire device number is
-					 * stored in the low field, and use that.
-					 */
-					if ((low & ~0xff) && high == 0) {
-						inode->i_rdev =
-						    MKDEV(low >> 8, low & 0xff);
-					} else {
-						inode->i_rdev =
-						    MKDEV(high, low);
+			}
+			break;
+		case SIG('T', 'F'):
+			/*
+			 * Some RRIP writers incorrectly place ctime in the
+			 * TF_CREATE field. Try to handle this correctly for
+			 * either case.
+			 */
+			/* Rock ridge never appears on a High Sierra disk */
+			cnt = 0;
+			if (rr->u.TF.flags & TF_CREATE) {
+				inode->i_ctime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_ctime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_MODIFY) {
+				inode->i_mtime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_mtime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_ACCESS) {
+				inode->i_atime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_atime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_ATTRIBUTES) {
+				inode->i_ctime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_ctime.tv_nsec = 0;
+			}
+			break;
+		case SIG('S', 'L'):
+			{
+				int slen;
+				struct SL_component *slp;
+				struct SL_component *oldslp;
+				slen = rr->len - 5;
+				slp = &rr->u.SL.link;
+				inode->i_size = symlink_len;
+				while (slen > 1) {
+					rootflag = 0;
+					switch (slp->flags & ~1) {
+					case 0:
+						inode->i_size +=
+						    slp->len;
+						break;
+					case 2:
+						inode->i_size += 1;
+						break;
+					case 4:
+						inode->i_size += 2;
+						break;
+					case 8:
+						rootflag = 1;
+						inode->i_size += 1;
+						break;
+					default:
+						printk("Symlink component flag "
+							"not implemented\n");
 					}
-				}
-				break;
-			case SIG('T', 'F'):
-				/* Some RRIP writers incorrectly place ctime in the TF_CREATE field.
-				   Try to handle this correctly for either case. */
-				cnt = 0;	/* Rock ridge never appears on a High Sierra disk */
-				if (rr->u.TF.flags & TF_CREATE) {
-					inode->i_ctime.tv_sec =
-					    iso_date(rr->u.TF.times[cnt++].time,
-						     0);
-					inode->i_ctime.tv_nsec = 0;
-				}
-				if (rr->u.TF.flags & TF_MODIFY) {
-					inode->i_mtime.tv_sec =
-					    iso_date(rr->u.TF.times[cnt++].time,
-						     0);
-					inode->i_mtime.tv_nsec = 0;
-				}
-				if (rr->u.TF.flags & TF_ACCESS) {
-					inode->i_atime.tv_sec =
-					    iso_date(rr->u.TF.times[cnt++].time,
-						     0);
-					inode->i_atime.tv_nsec = 0;
-				}
-				if (rr->u.TF.flags & TF_ATTRIBUTES) {
-					inode->i_ctime.tv_sec =
-					    iso_date(rr->u.TF.times[cnt++].time,
-						     0);
-					inode->i_ctime.tv_nsec = 0;
-				}
-				break;
-			case SIG('S', 'L'):
-				{
-					int slen;
-					struct SL_component *slp;
-					struct SL_component *oldslp;
-					slen = rr->len - 5;
-					slp = &rr->u.SL.link;
-					inode->i_size = symlink_len;
-					while (slen > 1) {
-						rootflag = 0;
-						switch (slp->flags & ~1) {
-						case 0:
+					slen -= slp->len + 2;
+					oldslp = slp;
+					slp = (struct SL_component *)
+						(((char *)slp) + slp->len + 2);
+
+					if (slen < 2) {
+						if (((rr->u.SL.
+						      flags & 1) != 0)
+						    &&
+						    ((oldslp->
+						      flags & 1) == 0))
 							inode->i_size +=
-							    slp->len;
-							break;
-						case 2:
-							inode->i_size += 1;
-							break;
-						case 4:
-							inode->i_size += 2;
-							break;
-						case 8:
-							rootflag = 1;
-							inode->i_size += 1;
-							break;
-						default:
-							printk
-							    ("Symlink component flag not implemented\n");
-						}
-						slen -= slp->len + 2;
-						oldslp = slp;
-						slp =
-						    (struct SL_component
-						     *)(((char *)slp) +
-							slp->len + 2);
-
-						if (slen < 2) {
-							if (((rr->u.SL.
-							      flags & 1) != 0)
-							    &&
-							    ((oldslp->
-							      flags & 1) == 0))
-								inode->i_size +=
-								    1;
-							break;
-						}
-
-						/*
-						 * If this component record isn't continued, then append a '/'.
-						 */
-						if (!rootflag
-						    && (oldslp->flags & 1) == 0)
-							inode->i_size += 1;
+							    1;
+						break;
 					}
+
+					/*
+					 * If this component record isn't
+					 * continued, then append a '/'.
+					 */
+					if (!rootflag
+					    && (oldslp->flags & 1) == 0)
+						inode->i_size += 1;
 				}
-				symlink_len = inode->i_size;
-				break;
-			case SIG('R', 'E'):
-				printk(KERN_WARNING
-				       "Attempt to read inode for relocated directory\n");
+			}
+			symlink_len = inode->i_size;
+			break;
+		case SIG('R', 'E'):
+			printk(KERN_WARNING "Attempt to read inode for "
+					"relocated directory\n");
+			goto out;
+		case SIG('C', 'L'):
+			ISOFS_I(inode)->i_first_extent =
+			    isonum_733(rr->u.CL.location);
+			reloc =
+			    isofs_iget(inode->i_sb,
+				       ISOFS_I(inode)->i_first_extent,
+				       0);
+			if (!reloc)
 				goto out;
-			case SIG('C', 'L'):
-				ISOFS_I(inode)->i_first_extent =
-				    isonum_733(rr->u.CL.location);
-				reloc =
-				    isofs_iget(inode->i_sb,
-					       ISOFS_I(inode)->i_first_extent,
-					       0);
-				if (!reloc)
-					goto out;
-				inode->i_mode = reloc->i_mode;
-				inode->i_nlink = reloc->i_nlink;
-				inode->i_uid = reloc->i_uid;
-				inode->i_gid = reloc->i_gid;
-				inode->i_rdev = reloc->i_rdev;
-				inode->i_size = reloc->i_size;
-				inode->i_blocks = reloc->i_blocks;
-				inode->i_atime = reloc->i_atime;
-				inode->i_ctime = reloc->i_ctime;
-				inode->i_mtime = reloc->i_mtime;
-				iput(reloc);
-				break;
+			inode->i_mode = reloc->i_mode;
+			inode->i_nlink = reloc->i_nlink;
+			inode->i_uid = reloc->i_uid;
+			inode->i_gid = reloc->i_gid;
+			inode->i_rdev = reloc->i_rdev;
+			inode->i_size = reloc->i_size;
+			inode->i_blocks = reloc->i_blocks;
+			inode->i_atime = reloc->i_atime;
+			inode->i_ctime = reloc->i_ctime;
+			inode->i_mtime = reloc->i_mtime;
+			iput(reloc);
+			break;
 #ifdef CONFIG_ZISOFS
-			case SIG('Z', 'F'):
-				if (!ISOFS_SB(inode->i_sb)->s_nocompress) {
-					int algo;
-					algo = isonum_721(rr->u.ZF.algorithm);
-					if (algo == SIG('p', 'z')) {
-						int block_shift =
-						    isonum_711(&rr->u.ZF.
-							       parms[1]);
-						if (block_shift <
-						    PAGE_CACHE_SHIFT
-						    || block_shift > 17) {
-							printk(KERN_WARNING
-							       "isofs: Can't handle ZF block size of 2^%d\n",
-							       block_shift);
-						} else {
-							/* Note: we don't change i_blocks here */
-							ISOFS_I(inode)->
-							    i_file_format =
-							    isofs_file_compressed;
-							/* Parameters to compression algorithm (header size, block size) */
-							ISOFS_I(inode)->
-							    i_format_parm[0] =
-							    isonum_711(&rr->u.
-								       ZF.
-								       parms
-								       [0]);
-							ISOFS_I(inode)->
-							    i_format_parm[1] =
-							    isonum_711(&rr->u.
-								       ZF.
-								       parms
-								       [1]);
-							inode->i_size =
-							    isonum_733(rr->u.ZF.
-								       real_size);
-						}
-					} else {
-						printk(KERN_WARNING
-						       "isofs: Unknown ZF compression algorithm: %c%c\n",
-						       rr->u.ZF.algorithm[0],
-						       rr->u.ZF.algorithm[1]);
-					}
-				}
-				break;
-#endif
-			default:
+		case SIG('Z', 'F'): {
+			int algo;
+
+			if (ISOFS_SB(inode->i_sb)->s_nocompress)
 				break;
+			algo = isonum_721(rr->u.ZF.algorithm);
+			if (algo == SIG('p', 'z')) {
+				int block_shift =
+					isonum_711(&rr->u.ZF.parms[1]);
+				if (block_shift < PAGE_CACHE_SHIFT
+						|| block_shift > 17) {
+					printk(KERN_WARNING "isofs: "
+						"Can't handle ZF block "
+						"size of 2^%d\n",
+						block_shift);
+				} else {
+					/*
+					 * Note: we don't change
+					 * i_blocks here
+					 */
+					ISOFS_I(inode)->i_file_format =
+						isofs_file_compressed;
+					/*
+					 * Parameters to compression
+					 * algorithm (header size,
+					 * block size)
+					 */
+					ISOFS_I(inode)->i_format_parm[0] =
+						isonum_711(&rr->u.ZF.parms[0]);
+					ISOFS_I(inode)->i_format_parm[1] =
+						isonum_711(&rr->u.ZF.parms[1]);
+					inode->i_size =
+					    isonum_733(rr->u.ZF.
+						       real_size);
+				}
+			} else {
+				printk(KERN_WARNING
+				       "isofs: Unknown ZF compression "
+						"algorithm: %c%c\n",
+				       rr->u.ZF.algorithm[0],
+				       rr->u.ZF.algorithm[1]);
 			}
+			break;
+		}
+#endif
+		default:
+			break;
 		}
 	}
 	MAYBE_CONTINUE(repeat, inode);
-      out:
+out:
 	if (buffer)
 		kfree(buffer);
 	return 0;
@@ -553,8 +553,8 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 
 	SETUP_ROCK_RIDGE(raw_inode, chr, len);
 
-      repeat:
-	while (len > 2) {	/* There may be one byte for padding somewhere */
+repeat:
+	while (len > 2) { /* There may be one byte for padding somewhere */
 		rr = (struct rock_ridge *)chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
@@ -586,8 +586,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 		}
 	}
 	MAYBE_CONTINUE(repeat, inode);
-	if (buffer)
-		kfree(buffer);
+	kfree(buffer);
 
 	if (rpnt == link)
 		goto fail;
@@ -600,19 +599,19 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	return 0;
 
 	/* error exit from macro */
-      out:
+out:
 	if (buffer)
 		kfree(buffer);
 	goto fail;
-      out_noread:
+out_noread:
 	printk("unable to read i-node block");
 	goto fail;
-      out_bad_span:
+out_bad_span:
 	printk("symlink spans iso9660 blocks\n");
-      fail:
+fail:
 	brelse(bh);
 	unlock_kernel();
-      error:
+error:
 	SetPageError(page);
 	kunmap(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From 12121714fbf36023d5892034d0c97df54a451543 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:44 -0700
Subject: [PATCH] rock: remove CHECK_SP

Remove the CHECK_SP macro.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index e981c040a49..8497c6bd567 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -22,12 +22,21 @@
 
 #define SIG(A,B) ((A) | ((B) << 8))	/* isonum_721() */
 
-/* This is a way of ensuring that we have something in the system
-   use fields that is compatible with Rock Ridge */
-#define CHECK_SP(FAIL)	       			\
-      if(rr->u.SP.magic[0] != 0xbe) FAIL;	\
-      if(rr->u.SP.magic[1] != 0xef) FAIL;       \
-      ISOFS_SB(inode->i_sb)->s_rock_offset=rr->u.SP.skip;
+/*
+ * This is a way of ensuring that we have something in the system
+ *  use fields that is compatible with Rock Ridge.  Return zero on success.
+ */
+
+static int check_sp(struct rock_ridge *rr, struct inode *inode)
+{
+	if (rr->u.SP.magic[0] != 0xbe)
+		return -1;
+	if (rr->u.SP.magic[1] != 0xef)
+		return -1;
+	ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip;
+	return 0;
+}
+
 /* We define a series of macros because each function must do exactly the
    same thing in certain places.  We use the macros to ensure that everything
    is done correctly */
@@ -118,7 +127,8 @@ repeat:
 				goto out;
 			break;
 		case SIG('S', 'P'):
-			CHECK_SP(goto out);
+			if (check_sp(rr, inode))
+				goto out;
 			break;
 		case SIG('C', 'E'):
 			CHECK_CE;
@@ -212,7 +222,8 @@ repeat:
 			break;
 #endif
 		case SIG('S', 'P'):
-			CHECK_SP(goto out);
+			if (check_sp(rr, inode))
+				goto out;
 			break;
 		case SIG('C', 'E'):
 			CHECK_CE;
@@ -570,7 +581,8 @@ repeat:
 				goto out;
 			break;
 		case SIG('S', 'P'):
-			CHECK_SP(goto out);
+			if (check_sp(rr, inode))
+				goto out;
 			break;
 		case SIG('S', 'L'):
 			rpnt = get_symlink_chunk(rpnt, rr,
-- 
cgit v1.2.3


From a40ea8f22e59c038ffdf219251a67311b9f6e362 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:44 -0700
Subject: [PATCH] rock: remove CONTINUE_DECLS

Remove the CONTINUE_DECLS macro.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 8497c6bd567..9f2aaa27f8f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -37,14 +37,6 @@ static int check_sp(struct rock_ridge *rr, struct inode *inode)
 	return 0;
 }
 
-/* We define a series of macros because each function must do exactly the
-   same thing in certain places.  We use the macros to ensure that everything
-   is done correctly */
-
-#define CONTINUE_DECLS \
-  int cont_extent = 0, cont_offset = 0, cont_size = 0;   \
-  void *buffer = NULL
-
 #define CHECK_CE	       			\
       {cont_extent = isonum_733(rr->u.CE.extent); \
       cont_offset = isonum_733(rr->u.CE.offset); \
@@ -98,7 +90,10 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 {
 	int len;
 	unsigned char *chr;
-	CONTINUE_DECLS;
+	int cont_extent = 0;
+	int cont_offset = 0;
+	int cont_size = 0;
+	void *buffer = NULL;
 	struct rock_ridge *rr;
 	int sig;
 	int retnamlen = 0;
@@ -188,8 +183,10 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 	struct inode *reloc;
 	struct rock_ridge *rr;
 	int rootflag;
-
-	CONTINUE_DECLS;
+	int cont_extent = 0;
+	int cont_offset = 0;
+	int cont_size = 0;
+	void *buffer = NULL;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
@@ -532,7 +529,10 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	char *rpnt = link;
 	unsigned char *pnt;
 	struct iso_directory_record *raw_inode;
-	CONTINUE_DECLS;
+	int cont_extent = 0;
+	int cont_offset = 0;
+	int cont_size = 0;
+	void *buffer = NULL;
 	unsigned long block, offset;
 	int sig;
 	int len;
-- 
cgit v1.2.3


From 04f7aa9c7dc615c690cede9a80c83625ad2efef7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:45 -0700
Subject: [PATCH] rock: remove CHECK_CE

Remove the CHECK_CE macro

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 9f2aaa27f8f..283e92197bb 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -37,11 +37,6 @@ static int check_sp(struct rock_ridge *rr, struct inode *inode)
 	return 0;
 }
 
-#define CHECK_CE	       			\
-      {cont_extent = isonum_733(rr->u.CE.extent); \
-      cont_offset = isonum_733(rr->u.CE.offset); \
-      cont_size = isonum_733(rr->u.CE.size);}
-
 #define SETUP_ROCK_RIDGE(DE,CHR,LEN)	      		      	\
   {LEN= sizeof(struct iso_directory_record) + DE->name_len[0];	\
   if(LEN & 1) LEN++;						\
@@ -126,7 +121,9 @@ repeat:
 				goto out;
 			break;
 		case SIG('C', 'E'):
-			CHECK_CE;
+			cont_extent = isonum_733(rr->u.CE.extent);
+			cont_offset = isonum_733(rr->u.CE.offset);
+			cont_size = isonum_733(rr->u.CE.size);
 			break;
 		case SIG('N', 'M'):
 			if (truncate)
@@ -223,7 +220,9 @@ repeat:
 				goto out;
 			break;
 		case SIG('C', 'E'):
-			CHECK_CE;
+			cont_extent = isonum_733(rr->u.CE.extent);
+			cont_offset = isonum_733(rr->u.CE.offset);
+			cont_size = isonum_733(rr->u.CE.size);
 			break;
 		case SIG('E', 'R'):
 			ISOFS_SB(inode->i_sb)->s_rock = 1;
@@ -592,7 +591,9 @@ repeat:
 			break;
 		case SIG('C', 'E'):
 			/* This tells is if there is a continuation record */
-			CHECK_CE;
+			cont_extent = isonum_733(rr->u.CE.extent);
+			cont_offset = isonum_733(rr->u.CE.offset);
+			cont_size = isonum_733(rr->u.CE.size);
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From 76ab07ebc3ca69e2f14ccbed0de3a9dda9adc6da Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:46 -0700
Subject: [PATCH] rock: remove SETUP_ROCK_RIDGE

- Remove the SETUP_ROCK_RIDGE macro.

- In rock_ridge_symlink_readpage(), rename raw_inode to raw_de.  It points
  at a directory entry, not an inode.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 283e92197bb..aafe356ec1b 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -37,18 +37,23 @@ static int check_sp(struct rock_ridge *rr, struct inode *inode)
 	return 0;
 }
 
-#define SETUP_ROCK_RIDGE(DE,CHR,LEN)	      		      	\
-  {LEN= sizeof(struct iso_directory_record) + DE->name_len[0];	\
-  if(LEN & 1) LEN++;						\
-  CHR = ((unsigned char *) DE) + LEN;				\
-  LEN = *((unsigned char *) DE) - LEN;                          \
-  if (LEN<0) LEN=0;                                             \
-  if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1)                \
-  {                                                             \
-     LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
-     CHR+=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
-     if (LEN<0) LEN=0;                                          \
-  }                                                             \
+static void setup_rock_ridge(struct iso_directory_record *de,
+			struct inode *inode, unsigned char **chr, int *len)
+{
+	*len = sizeof(struct iso_directory_record) + de->name_len[0];
+	if (*len & 1)
+		(*len)++;
+	*chr = (unsigned char *)de + *len;
+	*len = *((unsigned char *)de) - *len;
+	if (*len < 0)
+		*len = 0;
+
+	if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) {
+		*len -= ISOFS_SB(inode->i_sb)->s_rock_offset;
+		*chr += ISOFS_SB(inode->i_sb)->s_rock_offset;
+		if (*len < 0)
+			*len = 0;
+	}
 }
 
 #define MAYBE_CONTINUE(LABEL,DEV) \
@@ -98,7 +103,7 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 		return 0;
 	*retname = 0;
 
-	SETUP_ROCK_RIDGE(de, chr, len);
+	setup_rock_ridge(de, inode, &chr, &len);
 repeat:
 
 	while (len > 2) { /* There may be one byte for padding somewhere */
@@ -188,7 +193,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
 
-	SETUP_ROCK_RIDGE(de, chr, len);
+	setup_rock_ridge(de, inode, &chr, &len);
 	if (regard_xa) {
 		chr += 14;
 		len -= 14;
@@ -527,7 +532,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	struct buffer_head *bh;
 	char *rpnt = link;
 	unsigned char *pnt;
-	struct iso_directory_record *raw_inode;
+	struct iso_directory_record *raw_de;
 	int cont_extent = 0;
 	int cont_offset = 0;
 	int cont_size = 0;
@@ -550,7 +555,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	offset = ei->i_iget5_offset;
 	pnt = (unsigned char *)bh->b_data + offset;
 
-	raw_inode = (struct iso_directory_record *)pnt;
+	raw_de = (struct iso_directory_record *)pnt;
 
 	/*
 	 * If we go past the end of the buffer, there is some sort of error.
@@ -561,7 +566,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	/* Now test for possible Rock Ridge extensions which will override
 	   some of these numbers in the inode structure. */
 
-	SETUP_ROCK_RIDGE(raw_inode, chr, len);
+	setup_rock_ridge(raw_de, inode, &chr, &len);
 
 repeat:
 	while (len > 2) { /* There may be one byte for padding somewhere */
-- 
cgit v1.2.3


From ba40aaf04314ec5efd090e69518033fc55f450fa Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:46 -0700
Subject: [PATCH] rock: remove MAYBE_CONTINUE

- remove the MAYBE_CONTINUE macro

- kfree(NULL) is OK.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 237 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 130 insertions(+), 107 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index aafe356ec1b..efefbcce4ce 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -22,6 +22,16 @@
 
 #define SIG(A,B) ((A) | ((B) << 8))	/* isonum_721() */
 
+struct rock_state {
+	void *buffer;
+	unsigned char *chr;
+	int len;
+	int cont_size;
+	int cont_extent;
+	int cont_offset;
+	struct inode *inode;
+};
+
 /*
  * This is a way of ensuring that we have something in the system
  *  use fields that is compatible with Rock Ridge.  Return zero on success.
@@ -38,82 +48,96 @@ static int check_sp(struct rock_ridge *rr, struct inode *inode)
 }
 
 static void setup_rock_ridge(struct iso_directory_record *de,
-			struct inode *inode, unsigned char **chr, int *len)
+			struct inode *inode, struct rock_state *rs)
 {
-	*len = sizeof(struct iso_directory_record) + de->name_len[0];
-	if (*len & 1)
-		(*len)++;
-	*chr = (unsigned char *)de + *len;
-	*len = *((unsigned char *)de) - *len;
-	if (*len < 0)
-		*len = 0;
+	rs->len = sizeof(struct iso_directory_record) + de->name_len[0];
+	if (rs->len & 1)
+		(rs->len)++;
+	rs->chr = (unsigned char *)de + rs->len;
+	rs->len = *((unsigned char *)de) - rs->len;
+	if (rs->len < 0)
+		rs->len = 0;
 
 	if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) {
-		*len -= ISOFS_SB(inode->i_sb)->s_rock_offset;
-		*chr += ISOFS_SB(inode->i_sb)->s_rock_offset;
-		if (*len < 0)
-			*len = 0;
+		rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset;
+		rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset;
+		if (rs->len < 0)
+			rs->len = 0;
 	}
 }
 
-#define MAYBE_CONTINUE(LABEL,DEV) \
-  {if (buffer) { kfree(buffer); buffer = NULL; } \
-  if (cont_extent){ \
-    int block, offset, offset1; \
-    struct buffer_head * pbh; \
-    buffer = kmalloc(cont_size,GFP_KERNEL); \
-    if (!buffer) goto out; \
-    block = cont_extent; \
-    offset = cont_offset; \
-    offset1 = 0; \
-    pbh = sb_bread(DEV->i_sb, block); \
-    if(pbh){       \
-      if (offset > pbh->b_size || offset + cont_size > pbh->b_size){	\
-	brelse(pbh); \
-	goto out; \
-      } \
-      memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \
-      brelse(pbh); \
-      chr = (unsigned char *) buffer; \
-      len = cont_size; \
-      cont_extent = 0; \
-      cont_size = 0; \
-      cont_offset = 0; \
-      goto LABEL; \
-    }    \
-    printk("Unable to read rock-ridge attributes\n");    \
-  }}
+static void init_rock_state(struct rock_state *rs, struct inode *inode)
+{
+	memset(rs, 0, sizeof(*rs));
+	rs->inode = inode;
+}
+
+/*
+ * Returns 0 if the caller should continue scanning, 1 if the scan must end
+ * and -ve on error.
+ */
+static int rock_continue(struct rock_state *rs)
+{
+	int ret = 1;
+
+	kfree(rs->buffer);
+	rs->buffer = NULL;
+	if (rs->cont_extent) {
+		struct buffer_head *bh;
+
+		rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL);
+		if (!rs->buffer) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ret = -EIO;
+		bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
+		if (bh) {
+			memcpy(rs->buffer, bh->b_data + rs->cont_offset,
+					rs->cont_size);
+			put_bh(bh);
+			rs->chr = rs->buffer;
+			rs->len = rs->cont_size;
+			rs->cont_extent = 0;
+			rs->cont_size = 0;
+			rs->cont_offset = 0;
+			return 0;
+		}
+		printk("Unable to read rock-ridge attributes\n");
+	}
+out:
+	kfree(rs->buffer);
+	rs->buffer = NULL;
+	return ret;
+}
 
 /* return length of name field; 0: not found, -1: to be ignored */
 int get_rock_ridge_filename(struct iso_directory_record *de,
 			    char *retname, struct inode *inode)
 {
-	int len;
-	unsigned char *chr;
-	int cont_extent = 0;
-	int cont_offset = 0;
-	int cont_size = 0;
-	void *buffer = NULL;
+	struct rock_state rs;
 	struct rock_ridge *rr;
 	int sig;
 	int retnamlen = 0;
 	int truncate = 0;
+	int ret = 0;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
 	*retname = 0;
 
-	setup_rock_ridge(de, inode, &chr, &len);
+	init_rock_state(&rs, inode);
+	setup_rock_ridge(de, inode, &rs);
 repeat:
 
-	while (len > 2) { /* There may be one byte for padding somewhere */
-		rr = (struct rock_ridge *)chr;
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
-		sig = isonum_721(chr);
-		chr += rr->len;
-		len -= rr->len;
-		if (len < 0)
+		sig = isonum_721(rs.chr);
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
 			goto out;	/* corrupted isofs */
 
 		switch (sig) {
@@ -126,9 +150,9 @@ repeat:
 				goto out;
 			break;
 		case SIG('C', 'E'):
-			cont_extent = isonum_733(rr->u.CE.extent);
-			cont_offset = isonum_733(rr->u.CE.offset);
-			cont_size = isonum_733(rr->u.CE.size);
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
 			break;
 		case SIG('N', 'M'):
 			if (truncate)
@@ -158,58 +182,55 @@ repeat:
 			retnamlen += rr->len - 5;
 			break;
 		case SIG('R', 'E'):
-			if (buffer)
-				kfree(buffer);
+			kfree(rs.buffer);
 			return -1;
 		default:
 			break;
 		}
 	}
-	MAYBE_CONTINUE(repeat, inode);
-	kfree(buffer);
-	return retnamlen;	/* If 0, this file did not have a NM field */
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret == 1)
+		return retnamlen; /* If 0, this file did not have a NM field */
 out:
-	if (buffer)
-		kfree(buffer);
-	return 0;
+	kfree(rs.buffer);
+	return ret;
 }
 
 static int
 parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 				struct inode *inode, int regard_xa)
 {
-	int len;
-	unsigned char *chr;
 	int symlink_len = 0;
 	int cnt, sig;
 	struct inode *reloc;
 	struct rock_ridge *rr;
 	int rootflag;
-	int cont_extent = 0;
-	int cont_offset = 0;
-	int cont_size = 0;
-	void *buffer = NULL;
+	struct rock_state rs;
+	int ret = 0;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
 
-	setup_rock_ridge(de, inode, &chr, &len);
+	init_rock_state(&rs, inode);
+	setup_rock_ridge(de, inode, &rs);
 	if (regard_xa) {
-		chr += 14;
-		len -= 14;
-		if (len < 0)
-			len = 0;
+		rs.chr += 14;
+		rs.len -= 14;
+		if (rs.len < 0)
+			rs.len = 0;
 	}
 
 repeat:
-	while (len > 2) { /* There may be one byte for padding somewhere */
-		rr = (struct rock_ridge *)chr;
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
-		sig = isonum_721(chr);
-		chr += rr->len;
-		len -= rr->len;
-		if (len < 0)
+		sig = isonum_721(rs.chr);
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
 			goto out;	/* corrupted isofs */
 
 		switch (sig) {
@@ -225,9 +246,9 @@ repeat:
 				goto out;
 			break;
 		case SIG('C', 'E'):
-			cont_extent = isonum_733(rr->u.CE.extent);
-			cont_offset = isonum_733(rr->u.CE.offset);
-			cont_size = isonum_733(rr->u.CE.size);
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
 			break;
 		case SIG('E', 'R'):
 			ISOFS_SB(inode->i_sb)->s_rock = 1;
@@ -433,11 +454,14 @@ repeat:
 			break;
 		}
 	}
-	MAYBE_CONTINUE(repeat, inode);
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret == 1)
+		ret = 0;
 out:
-	if (buffer)
-		kfree(buffer);
-	return 0;
+	kfree(rs.buffer);
+	return ret;
 }
 
 static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
@@ -533,19 +557,16 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	char *rpnt = link;
 	unsigned char *pnt;
 	struct iso_directory_record *raw_de;
-	int cont_extent = 0;
-	int cont_offset = 0;
-	int cont_size = 0;
-	void *buffer = NULL;
 	unsigned long block, offset;
 	int sig;
-	int len;
-	unsigned char *chr;
 	struct rock_ridge *rr;
+	struct rock_state rs;
+	int ret;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		goto error;
 
+	init_rock_state(&rs, inode);
 	block = ei->i_iget5_block;
 	lock_kernel();
 	bh = sb_bread(inode->i_sb, block);
@@ -566,17 +587,17 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	/* Now test for possible Rock Ridge extensions which will override
 	   some of these numbers in the inode structure. */
 
-	setup_rock_ridge(raw_de, inode, &chr, &len);
+	setup_rock_ridge(raw_de, inode, &rs);
 
 repeat:
-	while (len > 2) { /* There may be one byte for padding somewhere */
-		rr = (struct rock_ridge *)chr;
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
-		sig = isonum_721(chr);
-		chr += rr->len;
-		len -= rr->len;
-		if (len < 0)
+		sig = isonum_721(rs.chr);
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
 			goto out;	/* corrupted isofs */
 
 		switch (sig) {
@@ -596,15 +617,18 @@ repeat:
 			break;
 		case SIG('C', 'E'):
 			/* This tells is if there is a continuation record */
-			cont_extent = isonum_733(rr->u.CE.extent);
-			cont_offset = isonum_733(rr->u.CE.offset);
-			cont_size = isonum_733(rr->u.CE.size);
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
 		default:
 			break;
 		}
 	}
-	MAYBE_CONTINUE(repeat, inode);
-	kfree(buffer);
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret < 0)
+		goto fail;
 
 	if (rpnt == link)
 		goto fail;
@@ -618,8 +642,7 @@ repeat:
 
 	/* error exit from macro */
 out:
-	if (buffer)
-		kfree(buffer);
+	kfree(rs.buffer);
 	goto fail;
 out_noread:
 	printk("unable to read i-node block");
-- 
cgit v1.2.3


From 7373909de403d229979842081c63917452e39402 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:47 -0700
Subject: [PATCH] rock: comment tidies

Be a bit more standard in comment layout.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index efefbcce4ce..977dd7009c0 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -13,12 +13,14 @@
 #include "isofs.h"
 #include "rock.h"
 
-/* These functions are designed to read the system areas of a directory record
+/*
+ * These functions are designed to read the system areas of a directory record
  * and extract relevant information.  There are different functions provided
  * depending upon what information we need at the time.  One function fills
  * out an inode structure, a second one extracts a filename, a third one
  * returns a symbolic link name, and a fourth one returns the extent number
- * for the file. */
+ * for the file.
+ */
 
 #define SIG(A,B) ((A) | ((B) << 8))	/* isonum_721() */
 
@@ -34,7 +36,7 @@ struct rock_state {
 
 /*
  * This is a way of ensuring that we have something in the system
- *  use fields that is compatible with Rock Ridge.  Return zero on success.
+ * use fields that is compatible with Rock Ridge.  Return zero on success.
  */
 
 static int check_sp(struct rock_ridge *rr, struct inode *inode)
@@ -111,7 +113,9 @@ out:
 	return ret;
 }
 
-/* return length of name field; 0: not found, -1: to be ignored */
+/*
+ * return length of name field; 0: not found, -1: to be ignored
+ */
 int get_rock_ridge_filename(struct iso_directory_record *de,
 			    char *retname, struct inode *inode)
 {
@@ -535,8 +539,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
 {
 	int result = parse_rock_ridge_inode_internal(de, inode, 0);
-	/* if rockridge flag was reset and we didn't look for attributes
-	 * behind eventual XA attributes, have a look there */
+
+	/*
+	 * if rockridge flag was reset and we didn't look for attributes
+	 * behind eventual XA attributes, have a look there
+	 */
 	if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
 	    && (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
 		result = parse_rock_ridge_inode_internal(de, inode, 14);
@@ -544,9 +551,10 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
 	return result;
 }
 
-/* readpage() for symlinks: reads symlink contents into the page and either
-   makes it uptodate and returns 0 or returns error (-EIO) */
-
+/*
+ * readpage() for symlinks: reads symlink contents into the page and either
+ * makes it uptodate and returns 0 or returns error (-EIO)
+ */
 static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
@@ -584,8 +592,10 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	if (offset + *pnt > bufsize)
 		goto out_bad_span;
 
-	/* Now test for possible Rock Ridge extensions which will override
-	   some of these numbers in the inode structure. */
+	/*
+	 * Now test for possible Rock Ridge extensions which will override
+	 * some of these numbers in the inode structure.
+	 */
 
 	setup_rock_ridge(raw_de, inode, &rs);
 
-- 
cgit v1.2.3


From a089221c5e8a5ae8d74a919c8c7a4d2f68bd59e5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:48 -0700
Subject: [PATCH] rock: lindent rock.h

So we have a couple of rock-ridge bugs.  First up, rotoroot the poor thing
into something which it is possible to work on.

Feed rock.h through Lindent, tidy a couple of things by hand.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.h | 183 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 93 insertions(+), 90 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
index deaf5c8e8b4..9d3cbc76238 100644
--- a/fs/isofs/rock.h
+++ b/fs/isofs/rock.h
@@ -1,85 +1,88 @@
-/* These structs are used by the system-use-sharing protocol, in which the
-   Rock Ridge extensions are embedded.  It is quite possible that other
-   extensions are present on the disk, and this is fine as long as they
-   all use SUSP */
-
-struct SU_SP{
-  unsigned char magic[2];
-  unsigned char skip;
-} __attribute__((packed));
-
-struct SU_CE{
-  char extent[8];
-  char offset[8];
-  char size[8];
+/*
+ * These structs are used by the system-use-sharing protocol, in which the
+ * Rock Ridge extensions are embedded.  It is quite possible that other
+ * extensions are present on the disk, and this is fine as long as they
+ * all use SUSP
+ */
+
+struct SU_SP {
+	unsigned char magic[2];
+	unsigned char skip;
+} __attribute__ ((packed));
+
+struct SU_CE {
+	char extent[8];
+	char offset[8];
+	char size[8];
 };
 
-struct SU_ER{
-  unsigned char len_id;
-  unsigned char len_des;
-  unsigned char len_src;
-  unsigned char ext_ver;
-  char data[0];
-} __attribute__((packed));
-
-struct RR_RR{
-  char flags[1];
-} __attribute__((packed));
-
-struct RR_PX{
-  char mode[8];
-  char n_links[8];
-  char uid[8];
-  char gid[8];
+struct SU_ER {
+	unsigned char len_id;
+	unsigned char len_des;
+	unsigned char len_src;
+	unsigned char ext_ver;
+	char data[0];
+} __attribute__ ((packed));
+
+struct RR_RR {
+	char flags[1];
+} __attribute__ ((packed));
+
+struct RR_PX {
+	char mode[8];
+	char n_links[8];
+	char uid[8];
+	char gid[8];
 };
 
-struct RR_PN{
-  char dev_high[8];
-  char dev_low[8];
+struct RR_PN {
+	char dev_high[8];
+	char dev_low[8];
 };
 
+struct SL_component {
+	unsigned char flags;
+	unsigned char len;
+	char text[0];
+} __attribute__ ((packed));
 
-struct SL_component{
-  unsigned char flags;
-  unsigned char len;
-  char text[0];
-} __attribute__((packed));
+struct RR_SL {
+	unsigned char flags;
+	struct SL_component link;
+} __attribute__ ((packed));
 
-struct RR_SL{
-  unsigned char flags;
-  struct SL_component link;
-} __attribute__((packed));
+struct RR_NM {
+	unsigned char flags;
+	char name[0];
+} __attribute__ ((packed));
 
-struct RR_NM{
-  unsigned char flags;
-  char name[0];
-} __attribute__((packed));
-
-struct RR_CL{
-  char location[8];
+struct RR_CL {
+	char location[8];
 };
 
-struct RR_PL{
-  char location[8];
+struct RR_PL {
+	char location[8];
 };
 
-struct stamp{
-  char time[7];
-} __attribute__((packed));
+struct stamp {
+	char time[7];
+} __attribute__ ((packed));
 
-struct RR_TF{
-  char flags;
-  struct stamp times[0];  /* Variable number of these beasts */
-} __attribute__((packed));
+struct RR_TF {
+	char flags;
+	struct stamp times[0];	/* Variable number of these beasts */
+} __attribute__ ((packed));
 
 /* Linux-specific extension for transparent decompression */
-struct RR_ZF{
-  char algorithm[2];
-  char parms[2];
-  char real_size[8];
+struct RR_ZF {
+	char algorithm[2];
+	char parms[2];
+	char real_size[8];
 };
 
-/* These are the bits and their meanings for flags in the TF structure. */
+/*
+ * These are the bits and their meanings for flags in the TF structure.
+ */
 #define TF_CREATE 1
 #define TF_MODIFY 2
 #define TF_ACCESS 4
@@ -89,31 +92,31 @@ struct RR_ZF{
 #define TF_EFFECTIVE 64
 #define TF_LONG_FORM 128
 
-struct rock_ridge{
-  char signature[2];
-  unsigned char len;
-  unsigned char version;
-  union{
-    struct SU_SP SP;
-    struct SU_CE CE;
-    struct SU_ER ER;
-    struct RR_RR RR;
-    struct RR_PX PX;
-    struct RR_PN PN;
-    struct RR_SL SL;
-    struct RR_NM NM;
-    struct RR_CL CL;
-    struct RR_PL PL;
-    struct RR_TF TF;
-    struct RR_ZF ZF;
-  } u;
+struct rock_ridge {
+	char signature[2];
+	unsigned char len;
+	unsigned char version;
+	union {
+		struct SU_SP SP;
+		struct SU_CE CE;
+		struct SU_ER ER;
+		struct RR_RR RR;
+		struct RR_PX PX;
+		struct RR_PN PN;
+		struct RR_SL SL;
+		struct RR_NM NM;
+		struct RR_CL CL;
+		struct RR_PL PL;
+		struct RR_TF TF;
+		struct RR_ZF ZF;
+	} u;
 };
 
-#define RR_PX 1   /* POSIX attributes */
-#define RR_PN 2   /* POSIX devices */
-#define RR_SL 4   /* Symbolic link */
-#define RR_NM 8   /* Alternate Name */
-#define RR_CL 16  /* Child link */
-#define RR_PL 32  /* Parent link */
-#define RR_RE 64  /* Relocation directory */
-#define RR_TF 128 /* Timestamps */
+#define RR_PX 1			/* POSIX attributes */
+#define RR_PN 2			/* POSIX devices */
+#define RR_SL 4			/* Symbolic link */
+#define RR_NM 8			/* Alternate Name */
+#define RR_CL 16		/* Child link */
+#define RR_PL 32		/* Parent link */
+#define RR_RE 64		/* Relocation directory */
+#define RR_TF 128		/* Timestamps */
-- 
cgit v1.2.3


From 9eb7f2c67c41d2cd730aedcd23e5baca09211d03 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:49 -0700
Subject: [PATCH] isofs: remove debug stuff

isofs/inode.c:

- Remove some crufty leak detection code

- coding style cleanups

- kfree(NULL) is permitted.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/inode.c | 109 ++++++++++++++++---------------------------------------
 1 file changed, 32 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index abd7b12eeca..72cc9727dc0 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -28,11 +28,6 @@
 
 #define BEQUIET
 
-#ifdef LEAK_CHECK
-static int check_malloc;
-static int check_bread;
-#endif
-
 static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
 static int isofs_hash(struct dentry *parent, struct qstr *qstr);
 static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b);
@@ -55,11 +50,6 @@ static void isofs_put_super(struct super_block *sb)
 	}
 #endif
 
-#ifdef LEAK_CHECK
-	printk("Outstanding mallocs:%d, outstanding buffers: %d\n",
-	       check_malloc, check_bread);
-#endif
-
 	kfree(sbi);
 	sb->s_fs_info = NULL;
 	return;
@@ -73,7 +63,7 @@ static kmem_cache_t *isofs_inode_cachep;
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
 	struct iso_inode_info *ei;
-	ei = (struct iso_inode_info *)kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
@@ -84,9 +74,9 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 {
-	struct iso_inode_info *ei = (struct iso_inode_info *) foo;
+	struct iso_inode_info *ei = foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR)
@@ -107,7 +97,8 @@ static int init_inodecache(void)
 static void destroy_inodecache(void)
 {
 	if (kmem_cache_destroy(isofs_inode_cachep))
-		printk(KERN_INFO "iso_inode_cache: not all structures were freed\n");
+		printk(KERN_INFO "iso_inode_cache: not all structures were "
+					"freed\n");
 }
 
 static int isofs_remount(struct super_block *sb, int *flags, char *data)
@@ -144,7 +135,7 @@ static struct dentry_operations isofs_dentry_ops[] = {
 	{
 		.d_hash		= isofs_hashi_ms,
 		.d_compare	= isofs_dentry_cmpi_ms,
-	}
+	},
 #endif
 };
 
@@ -219,8 +210,8 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
 /*
  * Case insensitive compare of two isofs names.
  */
-static int
-isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms)
+static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
+				struct qstr *b, int ms)
 {
 	int alen, blen;
 
@@ -243,8 +234,8 @@ isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int
 /*
  * Case sensitive compare of two isofs names.
  */
-static int
-isofs_dentry_cmp_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms)
+static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a,
+					struct qstr *b, int ms)
 {
 	int alen, blen;
 
@@ -356,7 +347,7 @@ static match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
-static int parse_options(char *options, struct iso9660_options * popt)
+static int parse_options(char *options, struct iso9660_options *popt)
 {
 	char *p;
 	int option;
@@ -493,7 +484,7 @@ static int parse_options(char *options, struct iso9660_options * popt)
  */
 #define WE_OBEY_THE_WRITTEN_STANDARDS 1
 
-static unsigned int isofs_get_last_session(struct super_block *sb,s32 session )
+static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 {
 	struct cdrom_multisession ms_info;
 	unsigned int vol_desc_start;
@@ -518,7 +509,8 @@ static unsigned int isofs_get_last_session(struct super_block *sb,s32 session )
 		printk(KERN_ERR "Invalid session number or type of track\n");
 	}
 	i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
-	if(session > 0) printk(KERN_ERR "Invalid session number\n");
+	if (session > 0)
+		printk(KERN_ERR "Invalid session number\n");
 #if 0
 	printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
 	if (i==0) {
@@ -557,13 +549,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	struct iso9660_options		opt;
 	struct isofs_sb_info	      * sbi;
 
-	sbi = kmalloc(sizeof(struct isofs_sb_info), GFP_KERNEL);
+	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
 	s->s_fs_info = sbi;
-	memset(sbi, 0, sizeof(struct isofs_sb_info));
+	memset(sbi, 0, sizeof(*sbi));
 
-	if (!parse_options((char *) data, &opt))
+	if (!parse_options((char *)data, &opt))
 		goto out_freesbi;
 
 	/*
@@ -1002,7 +994,6 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 		rv++;
 	}
 
-
 abort:
 	unlock_kernel();
 	return rv;
@@ -1014,7 +1005,7 @@ abort:
 static int isofs_get_block(struct inode *inode, sector_t iblock,
 		    struct buffer_head *bh_result, int create)
 {
-	if ( create ) {
+	if (create) {
 		printk("isofs_get_block: Kernel tries to allocate a block\n");
 		return -EROFS;
 	}
@@ -1061,19 +1052,17 @@ static struct address_space_operations isofs_aops = {
 
 static inline void test_and_set_uid(uid_t *p, uid_t value)
 {
-	if(value) {
+	if (value)
 		*p = value;
-	}
 }
 
 static inline void test_and_set_gid(gid_t *p, gid_t value)
 {
-        if(value) {
+        if (value)
                 *p = value;
-        }
 }
 
-static int isofs_read_level3_size(struct inode * inode)
+static int isofs_read_level3_size(struct inode *inode)
 {
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra;
@@ -1136,7 +1125,7 @@ static int isofs_read_level3_size(struct inode * inode)
 				bh = sb_bread(inode->i_sb, block);
 				if (!bh)
 					goto out_noread;
-				memcpy((void *) tmpde + slop, bh->b_data, offset);
+				memcpy((void *)tmpde+slop, bh->b_data, offset);
 			}
 			de = tmpde;
 		}
@@ -1150,12 +1139,11 @@ static int isofs_read_level3_size(struct inode * inode)
 		more_entries = de->flags[-high_sierra] & 0x80;
 
 		i++;
-		if(i > 100)
+		if (i > 100)
 			goto out_toomany;
-	} while(more_entries);
+	} while (more_entries);
 out:
-	if (tmpde)
-		kfree(tmpde);
+	kfree(tmpde);
 	if (bh)
 		brelse(bh);
 	return 0;
@@ -1179,7 +1167,7 @@ out_toomany:
 	goto out;
 }
 
-static void isofs_read_inode(struct inode * inode)
+static void isofs_read_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1249,7 +1237,7 @@ static void isofs_read_inode(struct inode * inode)
 	ei->i_format_parm[2] = 0;
 
 	ei->i_section_size = isonum_733 (de->size);
-	if(de->flags[-high_sierra] & 0x80) {
+	if (de->flags[-high_sierra] & 0x80) {
 		if(isofs_read_level3_size(inode)) goto fail;
 	} else {
 		ei->i_next_section_block = 0;
@@ -1336,16 +1324,16 @@ static void isofs_read_inode(struct inode * inode)
 		/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 
- out:
+out:
 	if (tmpde)
 		kfree(tmpde);
 	if (bh)
 		brelse(bh);
 	return;
 
- out_badread:
+out_badread:
 	printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
- fail:
+fail:
 	make_bad_inode(inode);
 	goto out;
 }
@@ -1394,11 +1382,8 @@ struct inode *isofs_iget(struct super_block *sb,
 
 	hashval = (block << sb->s_blocksize_bits) | offset;
 
-	inode = iget5_locked(sb,
-			     hashval,
-			     &isofs_iget5_test,
-			     &isofs_iget5_set,
-			     &data);
+	inode = iget5_locked(sb, hashval, &isofs_iget5_test,
+			     &isofs_iget5_set, &data);
 
 	if (inode && (inode->i_state & I_NEW)) {
 		sb->s_op->read_inode(inode);
@@ -1408,36 +1393,6 @@ struct inode *isofs_iget(struct super_block *sb,
 	return inode;
 }
 
-#ifdef LEAK_CHECK
-#undef malloc
-#undef free_s
-#undef sb_bread
-#undef brelse
-
-void * leak_check_malloc(unsigned int size){
-  void * tmp;
-  check_malloc++;
-  tmp = kmalloc(size, GFP_KERNEL);
-  return tmp;
-}
-
-void leak_check_free_s(void * obj, int size){
-  check_malloc--;
-  return kfree(obj);
-}
-
-struct buffer_head * leak_check_bread(struct super_block *sb, int block){
-  check_bread++;
-  return sb_bread(sb, block);
-}
-
-void leak_check_brelse(struct buffer_head * bh){
-  check_bread--;
-  return brelse(bh);
-}
-
-#endif
-
 static struct super_block *isofs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
-- 
cgit v1.2.3


From e595447e177b39aa6c96baaa57b30cde2d8b9df7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:50 -0700
Subject: [PATCH] rock.c: handle corrupted directories

The bug in rock.c is that it's totally trusting of the contents of the
directories.  If the directory says there's a continuation 10000 bytes into
this 4k block then we cheerily poke around in memory we don't own and oops.

So change rock_continue() to apply various sanity checks, at least ensuring
that the offset+length remain within the bounds for the header part of a
struct rock_ridge directory entry.

Note that the kernel can still overindex the buffer due to the variable size
of the rock-ridge directory entries.  We cannot check that in rock_continue()
unless we go parse the directory entry's signature and work out its size.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 977dd7009c0..9a81830abff 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -81,9 +81,22 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode)
 static int rock_continue(struct rock_state *rs)
 {
 	int ret = 1;
+	int blocksize = 1 << rs->inode->i_blkbits;
+	const int min_de_size = offsetof(struct rock_ridge, u);
 
 	kfree(rs->buffer);
 	rs->buffer = NULL;
+
+	if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
+	    (unsigned)rs->cont_size > blocksize ||
+	    (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
+		printk(KERN_NOTICE "rock: corrupted directory entry. "
+			"extent=%d, offset=%d, size=%d\n",
+			rs->cont_extent, rs->cont_offset, rs->cont_size);
+		ret = -EIO;
+		goto out;
+	}
+
 	if (rs->cont_extent) {
 		struct buffer_head *bh;
 
-- 
cgit v1.2.3


From 642217c17b9a56c7e4cf48f6a13dfd5e4a4c2e10 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:51 -0700
Subject: [PATCH] rock: rename union members

The silly thing does:

	struct foo { ... };
	...
	#define foo 42

so you can no longer refer to `struct foo' in C code.

Rename the structures.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.h | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
index 9d3cbc76238..ed09e2b0863 100644
--- a/fs/isofs/rock.h
+++ b/fs/isofs/rock.h
@@ -5,18 +5,18 @@
  * all use SUSP
  */
 
-struct SU_SP {
+struct SU_SP_s {
 	unsigned char magic[2];
 	unsigned char skip;
 } __attribute__ ((packed));
 
-struct SU_CE {
+struct SU_CE_s {
 	char extent[8];
 	char offset[8];
 	char size[8];
 };
 
-struct SU_ER {
+struct SU_ER_s {
 	unsigned char len_id;
 	unsigned char len_des;
 	unsigned char len_src;
@@ -24,18 +24,18 @@ struct SU_ER {
 	char data[0];
 } __attribute__ ((packed));
 
-struct RR_RR {
+struct RR_RR_s {
 	char flags[1];
 } __attribute__ ((packed));
 
-struct RR_PX {
+struct RR_PX_s {
 	char mode[8];
 	char n_links[8];
 	char uid[8];
 	char gid[8];
 };
 
-struct RR_PN {
+struct RR_PN_s {
 	char dev_high[8];
 	char dev_low[8];
 };
@@ -46,21 +46,21 @@ struct SL_component {
 	char text[0];
 } __attribute__ ((packed));
 
-struct RR_SL {
+struct RR_SL_s {
 	unsigned char flags;
 	struct SL_component link;
 } __attribute__ ((packed));
 
-struct RR_NM {
+struct RR_NM_s {
 	unsigned char flags;
 	char name[0];
 } __attribute__ ((packed));
 
-struct RR_CL {
+struct RR_CL_s {
 	char location[8];
 };
 
-struct RR_PL {
+struct RR_PL_s {
 	char location[8];
 };
 
@@ -68,13 +68,13 @@ struct stamp {
 	char time[7];
 } __attribute__ ((packed));
 
-struct RR_TF {
+struct RR_TF_s {
 	char flags;
 	struct stamp times[0];	/* Variable number of these beasts */
 } __attribute__ ((packed));
 
 /* Linux-specific extension for transparent decompression */
-struct RR_ZF {
+struct RR_ZF_s {
 	char algorithm[2];
 	char parms[2];
 	char real_size[8];
@@ -97,18 +97,18 @@ struct rock_ridge {
 	unsigned char len;
 	unsigned char version;
 	union {
-		struct SU_SP SP;
-		struct SU_CE CE;
-		struct SU_ER ER;
-		struct RR_RR RR;
-		struct RR_PX PX;
-		struct RR_PN PN;
-		struct RR_SL SL;
-		struct RR_NM NM;
-		struct RR_CL CL;
-		struct RR_PL PL;
-		struct RR_TF TF;
-		struct RR_ZF ZF;
+		struct SU_SP_s SP;
+		struct SU_CE_s CE;
+		struct SU_ER_s ER;
+		struct RR_RR_s RR;
+		struct RR_PX_s PX;
+		struct RR_PN_s PN;
+		struct RR_SL_s SL;
+		struct RR_NM_s NM;
+		struct RR_CL_s CL;
+		struct RR_PL_s PL;
+		struct RR_TF_s TF;
+		struct RR_ZF_s ZF;
 	} u;
 };
 
-- 
cgit v1.2.3


From f2966632a134e865db3c819346a1dc7d96e05309 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 21 Jun 2005 17:16:51 -0700
Subject: [PATCH] rock: handle directory overflows

Handle the case where the variable-sized part of a rock-ridge directory entry
overhangs the end of the buffer which we allocated for it.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/rock.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 9a81830abff..4326cb47f8f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -126,6 +126,66 @@ out:
 	return ret;
 }
 
+/*
+ * We think there's a record of type `sig' at rs->chr.  Parse the signature
+ * and make sure that there's really room for a record of that type.
+ */
+static int rock_check_overflow(struct rock_state *rs, int sig)
+{
+	int len;
+
+	switch (sig) {
+	case SIG('S', 'P'):
+		len = sizeof(struct SU_SP_s);
+		break;
+	case SIG('C', 'E'):
+		len = sizeof(struct SU_CE_s);
+		break;
+	case SIG('E', 'R'):
+		len = sizeof(struct SU_ER_s);
+		break;
+	case SIG('R', 'R'):
+		len = sizeof(struct RR_RR_s);
+		break;
+	case SIG('P', 'X'):
+		len = sizeof(struct RR_PX_s);
+		break;
+	case SIG('P', 'N'):
+		len = sizeof(struct RR_PN_s);
+		break;
+	case SIG('S', 'L'):
+		len = sizeof(struct RR_SL_s);
+		break;
+	case SIG('N', 'M'):
+		len = sizeof(struct RR_NM_s);
+		break;
+	case SIG('C', 'L'):
+		len = sizeof(struct RR_CL_s);
+		break;
+	case SIG('P', 'L'):
+		len = sizeof(struct RR_PL_s);
+		break;
+	case SIG('T', 'F'):
+		len = sizeof(struct RR_TF_s);
+		break;
+	case SIG('Z', 'F'):
+		len = sizeof(struct RR_ZF_s);
+		break;
+	default:
+		len = 0;
+		break;
+	}
+	len += offsetof(struct rock_ridge, u);
+	if (len > rs->len) {
+		printk(KERN_NOTICE "rock: directory entry would overflow "
+				"storage\n");
+		printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
+				sig, len, rs->len);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * return length of name field; 0: not found, -1: to be ignored
  */
@@ -152,10 +212,12 @@ repeat:
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto eio;
 		rs.chr += rr->len;
 		rs.len -= rr->len;
 		if (rs.len < 0)
-			goto out;	/* corrupted isofs */
+			goto eio;	/* corrupted isofs */
 
 		switch (sig) {
 		case SIG('R', 'R'):
@@ -213,6 +275,9 @@ repeat:
 out:
 	kfree(rs.buffer);
 	return ret;
+eio:
+	ret = -EIO;
+	goto out;
 }
 
 static int
@@ -245,10 +310,12 @@ repeat:
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto eio;
 		rs.chr += rr->len;
 		rs.len -= rr->len;
 		if (rs.len < 0)
-			goto out;	/* corrupted isofs */
+			goto eio;	/* corrupted isofs */
 
 		switch (sig) {
 #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
@@ -479,6 +546,9 @@ repeat:
 out:
 	kfree(rs.buffer);
 	return ret;
+eio:
+	ret = -EIO;
+	goto out;
 }
 
 static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
@@ -618,6 +688,8 @@ repeat:
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
 		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto out;
 		rs.chr += rr->len;
 		rs.len -= rr->len;
 		if (rs.len < 0)
-- 
cgit v1.2.3


From 9769f4eb3fad2dd53a5d24c81ee5f7f05450742b Mon Sep 17 00:00:00 2001
From: Jeremy White <jwhite@codeweavers.com>
Date: Tue, 21 Jun 2005 17:16:53 -0700
Subject: [PATCH] isofs: show hidden files, add granularity for assoc/hidden
 files flags

The current isofs treatment of hidden files is flawed in two ways.  First,
it does not provide sufficient granularity; it hides both 'hidden' files
and 'associated' files (resource fork for Mac files).  Second, the default
behavior to completely strip hidden files, while an admirable
implementation of the spec, is a poor choice given the real world use of
hidden files as a poor mans copy protection scheme for MSDOS and Windows
based systems.  A longer description of this is available here:

   http://www.uwsg.iu.edu/hypermail/linux/kernel/0205.3/0267.html

This patch was originally built after a few private conversations with Alan
Cox; I shamefully failed to persist in seeing it go forward, I hope to make
amends now.

This patch introduces granularity by allowing explicit control for both
hidden and associated files.  It also reverses the default so that by
default, hidden files are treated as regular files on the iso9660 file
system.

This allow Wine to process Windows CDs, including those that are hybrid
Mac/Windows CDs properly and completely, without our having to go muck up
peoples fstabs as we do now.  (I have tested this with such a hybrid +
hidden CD and have verified that this patch works as claimed).

Signed-off-by: Jeremy White <jwhite@codeweavers.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/dir.c   | 17 +++++++++++------
 fs/isofs/inode.c | 19 ++++++++++++++-----
 fs/isofs/isofs.h |  2 ++
 fs/isofs/namei.c | 16 +++++++++-------
 4 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 6030956b894..7901ac9f97a 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -193,12 +193,17 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		/* Handle everything else.  Do name translation if there
 		   is no Rock Ridge NM field. */
-		if (sbi->s_unhide == 'n') {
-			/* Do not report hidden or associated files */
-			if (de->flags[-sbi->s_high_sierra] & 5) {
-				filp->f_pos += de_len;
-				continue;
-			}
+
+		/*
+		 * Do not report hidden files if so instructed, or associated
+		 * files unless instructed to do so
+		 */
+		if ((sbi->s_hide == 'y' &&
+				(de->flags[-sbi->s_high_sierra] & 1)) ||
+		      (sbi->s_showassoc =='n' &&
+				(de->flags[-sbi->s_high_sierra] & 4))) {
+			filp->f_pos += de_len;
+			continue;
 		}
 
 		map = 1;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 72cc9727dc0..1652de1b6cb 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -144,7 +144,8 @@ struct iso9660_options{
 	char rock;
 	char joliet;
 	char cruft;
-	char unhide;
+	char hide;
+	char showassoc;
 	char nocompress;
 	unsigned char check;
 	unsigned int blocksize;
@@ -309,13 +310,15 @@ enum {
 	Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore,
 	Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet,
 	Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err,
-	Opt_nocompress,
+	Opt_nocompress, Opt_hide, Opt_showassoc,
 };
 
 static match_table_t tokens = {
 	{Opt_norock, "norock"},
 	{Opt_nojoliet, "nojoliet"},
 	{Opt_unhide, "unhide"},
+	{Opt_hide, "hide"},
+	{Opt_showassoc, "showassoc"},
 	{Opt_cruft, "cruft"},
 	{Opt_utf8, "utf8"},
 	{Opt_iocharset, "iocharset=%s"},
@@ -356,7 +359,8 @@ static int parse_options(char *options, struct iso9660_options *popt)
 	popt->rock = 'y';
 	popt->joliet = 'y';
 	popt->cruft = 'n';
-	popt->unhide = 'n';
+	popt->hide = 'n';
+	popt->showassoc = 'n';
 	popt->check = 'u';		/* unset */
 	popt->nocompress = 0;
 	popt->blocksize = 1024;
@@ -389,8 +393,12 @@ static int parse_options(char *options, struct iso9660_options *popt)
 		case Opt_nojoliet:
 			popt->joliet = 'n';
 			break;
+		case Opt_hide:
+			popt->hide = 'y';
+			break;
 		case Opt_unhide:
-			popt->unhide = 'y';
+		case Opt_showassoc:
+			popt->showassoc = 'y';
 			break;
 		case Opt_cruft:
 			popt->cruft = 'y';
@@ -784,7 +792,8 @@ root_found:
 	sbi->s_rock = (opt.rock == 'y' ? 2 : 0);
 	sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/
 	sbi->s_cruft = opt.cruft;
-	sbi->s_unhide = opt.unhide;
+	sbi->s_hide = opt.hide;
+	sbi->s_showassoc = opt.showassoc;
 	sbi->s_uid = opt.uid;
 	sbi->s_gid = opt.gid;
 	sbi->s_utf8 = opt.utf8;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 9ce7b51fb61..38c75151fc6 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -47,6 +47,8 @@ struct isofs_sb_info {
 	unsigned char s_nosuid;
 	unsigned char s_nodev;
 	unsigned char s_nocompress;
+	unsigned char s_hide;
+	unsigned char s_showassoc;
 
 	mode_t s_mode;
 	gid_t s_gid;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 690edf37173..e37e82b7cbf 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -131,14 +131,16 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 		}
 
 		/*
-		 * Skip hidden or associated files unless unhide is set 
+		 * Skip hidden or associated files unless hide or showassoc,
+		 * respectively, is set
 		 */
 		match = 0;
 		if (dlen > 0 &&
-		    (!(de->flags[-sbi->s_high_sierra] & 5)
-		     || sbi->s_unhide == 'y'))
-		{
-			match = (isofs_cmp(dentry,dpnt,dlen) == 0);
+			(sbi->s_hide =='n' ||
+				(!(de->flags[-sbi->s_high_sierra] & 1))) &&
+			(sbi->s_showassoc =='y' ||
+				(!(de->flags[-sbi->s_high_sierra] & 4)))) {
+			match = (isofs_cmp(dentry, dpnt, dlen) == 0);
 		}
 		if (match) {
 			isofs_normalize_block_and_offset(de,
@@ -146,11 +148,11 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 							 &offset_saved);
                         *block_rv = block_saved;
                         *offset_rv = offset_saved;
-			if (bh) brelse(bh);
+			brelse(bh);
 			return 1;
 		}
 	}
-	if (bh) brelse(bh);
+	brelse(bh);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5b616f5d596c0b056129f8aeafbc08409b3cd050 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:20 +0000
Subject: [PATCH] RPC: Make rpc_create_client() destroy the transport on
 failure.

 This saves us a couple of lines of cleanup code for each call.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c        | 4 +---
 fs/lockd/mon.c         | 5 ++---
 fs/nfs/inode.c         | 2 --
 fs/nfs/mount_clnt.c    | 4 +---
 fs/nfsd/nfs4callback.c | 4 +---
 5 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 52707c5ad6e..90a62f27914 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -193,10 +193,8 @@ nlm_bind_host(struct nlm_host *host)
 		/* Existing NLM servers accept AUTH_UNIX only */
 		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt)) {
-			xprt_destroy(xprt);
+		if (IS_ERR(clnt))
 			goto forgetit;
-		}
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
 		xprt->nocong = 1;	/* No congestion control for NLM */
 		xprt->resvport = 1;	/* NLM requires a reserved port */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6fc1bebeec1..81b5e7778d7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -120,15 +120,14 @@ nsm_create(void)
 				&nsm_program, SM_VERSION,
 				RPC_AUTH_NULL);
 	if (IS_ERR(clnt))
-		goto out_destroy;
+		goto out_err;
 	clnt->cl_softrtry = 1;
 	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
 	xprt->resvport = 1;	/* NSM requires a reserved port */
 	return clnt;
 
-out_destroy:
-	xprt_destroy(xprt);
+out_err:
 	return clnt;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f2317f3e29f..ea784969fb8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -383,7 +383,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	return clnt;
 
 out_fail:
-	xprt_destroy(xprt);
 	return clnt;
 }
 
@@ -1623,7 +1622,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		if (IS_ERR(clnt)) {
 			up_write(&clp->cl_sem);
 			printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-			xprt_destroy(xprt);
 			err = PTR_ERR(clnt);
 			goto out_fail;
 		}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 9d3ddad96d9..0e82617f2de 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 	clnt = rpc_create_client(xprt, hostname,
 				&mnt_program, version,
 				RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
-		xprt_destroy(xprt);
-	} else {
+	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1a55dfcb74b..634465e9cfc 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -430,7 +430,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	if (IS_ERR(clnt)) {
 		dprintk("NFSD: couldn't create callback client\n");
-		goto out_xprt;
+		goto out_err;
 	}
 	clnt->cl_intr = 0;
 	clnt->cl_softrtry = 1;
@@ -465,8 +465,6 @@ out_rpciod:
 out_clnt:
 	rpc_shutdown_client(clnt);
 	goto out_err;
-out_xprt:
-	xprt_destroy(xprt);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
-- 
cgit v1.2.3


From 5ee0ed7d3ab620a764740fb018f469d45f561931 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:20 +0000
Subject: [PATCH] RPC: Make rpc_create_client() probe server for RPC
 program+version support

 Ensure that we don't create an RPC client without checking that the server
 does indeed support the RPC program + version that we are trying to set up.

 This enables us to immediately return an error to "mount" if it turns out
 that the server is only supporting NFSv2, when we requested NFSv3 or NFSv4.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 4 ++--
 fs/lockd/mon.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 90a62f27914..82c77df81c5 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -189,6 +189,8 @@ nlm_bind_host(struct nlm_host *host)
 			goto forgetit;
 
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
+		xprt->nocong = 1;	/* No congestion control for NLM */
+		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
 		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
@@ -196,8 +198,6 @@ nlm_bind_host(struct nlm_host *host)
 		if (IS_ERR(clnt))
 			goto forgetit;
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		xprt->nocong = 1;	/* No congestion control for NLM */
-		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		host->h_rpcclnt = clnt;
 	}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 81b5e7778d7..2d144abe84a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -115,6 +115,7 @@ nsm_create(void)
 	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
+	xprt->resvport = 1;	/* NSM requires a reserved port */
 
 	clnt = rpc_create_client(xprt, "localhost",
 				&nsm_program, SM_VERSION,
@@ -124,7 +125,6 @@ nsm_create(void)
 	clnt->cl_softrtry = 1;
 	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
 	return clnt;
 
 out_err:
-- 
cgit v1.2.3


From 9085bbcb76421a90bea28f4d3d03fa9977319c49 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:20 +0000
Subject: [PATCH] NFS: Kill annoying mount version mismatch printks

 Ensure that we fix up the missing fields in the nfs_mount_data with
 sane defaults for older versions of mount, and return errors in the
 cases where we cannot.

 Convert a bunch of annoying warnings into dprintks()

 Return -EPROTONOSUPPORT rather than EIO if mount() tries to set NFSv3
 without it actually being compiled in.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 179 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 105 insertions(+), 74 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ea784969fb8..32ddcf69e9a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -366,13 +366,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
 				 &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				 server->rpc_ops->version, data->pseudoflavor);
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		goto out_fail;
 	}
 
@@ -426,21 +428,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 
 	/* Check NFS protocol revision and initialize RPC op vector
 	 * and file handle pool. */
-	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3
+	if (server->flags & NFS_MOUNT_VER3) {
 		server->rpc_ops = &nfs_v3_clientops;
 		server->caps |= NFS_CAP_READDIRPLUS;
-		if (data->version < 4) {
-			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
-			return -EIO;
-		}
-#else
-		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
-		return -EIO;
-#endif
 	} else {
 		server->rpc_ops = &nfs_v2_clientops;
 	}
+#else
+	server->rpc_ops = &nfs_v2_clientops;
+#endif
 
 	/* Fill in pseudoflavor for mount version < 5 */
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
@@ -1384,74 +1381,94 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data)
 {
 	int error;
-	struct nfs_server *server;
+	struct nfs_server *server = NULL;
 	struct super_block *s;
 	struct nfs_fh *root;
 	struct nfs_mount_data *data = raw_data;
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
-		return ERR_PTR(-EINVAL);
+	s = ERR_PTR(-EINVAL);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		goto out_err;
+	}
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		goto out_err;
 	}
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
+	}
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	s = ERR_PTR(-EPROTONOSUPPORT);
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		goto out_err;
+	}
+#endif /* CONFIG_NFS_V3 */
 
+	s = ERR_PTR(-ENOMEM);
 	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		return ERR_PTR(-ENOMEM);
+		goto out_err;
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 
-	if (data->version != NFS_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
-		if (data->version < 2)
-			data->namlen = 0;
-		if (data->version < 3)
-			data->bsize  = 0;
-		if (data->version < 4) {
-			data->flags &= ~NFS_MOUNT_VER3;
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		}
-		if (data->version < 5)
-			data->flags &= ~NFS_MOUNT_SECFLAVOUR;
-	}
-
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
 		root->size = data->root.size;
 	else
 		root->size = NFS2_FHSIZE;
+	s = ERR_PTR(-EINVAL);
 	if (root->size > sizeof(root->data)) {
-		printk("nfs_get_sb: invalid root filehandle\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		goto out_err;
 	}
 	memcpy(root->data, data->root.data, root->size);
 
 	/* We now require that the mount process passes the remote address */
 	memcpy(&server->addr, &data->addr, sizeof(server->addr));
 	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote address!\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: mount program didn't pass remote address!\n",
+				__FUNCTION__);
+		goto out_err;
 	}
 
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
-
-	if (IS_ERR(s) || s->s_root) {
-		kfree(server);
-		return s;
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_err;
 	}
 
-	s->s_flags = flags;
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s) || s->s_root)
+		goto out_rpciod_down;
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		kfree(server);
-		return ERR_PTR(-EIO);
-	}
+	s->s_flags = flags;
 
 	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
@@ -1461,6 +1478,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	}
 	s->s_flags |= MS_ACTIVE;
 	return s;
+out_rpciod_down:
+	rpciod_down();
+out_err:
+	kfree(server);
+	return s;
 }
 
 static void nfs_kill_super(struct super_block *s)
@@ -1593,15 +1615,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
-		printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
+		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return -EIO;
 	}
 
 	/* Now create transport and client */
 	authflavour = RPC_AUTH_UNIX;
 	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen > 1)
-			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			err = -EINVAL;
+			goto out_fail;
+		}
 		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
 			err = -EFAULT;
 			goto out_fail;
@@ -1613,16 +1639,18 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 			err = PTR_ERR(xprt);
+			dprintk("%s: cannot create RPC transport. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 		}
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				server->rpc_ops->version, authflavour);
 		if (IS_ERR(clnt)) {
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC client.\n");
 			err = PTR_ERR(clnt);
+			dprintk("%s: cannot create RPC client. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 		}
 		clnt->cl_intr     = 1;
@@ -1654,20 +1682,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	clp = NULL;
 
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-		return PTR_ERR(clnt);
+		err = PTR_ERR(clnt);
+		dprintk("%s: cannot create RPC client. Error = %d\n",
+				__FUNCTION__, err);
+		return err;
 	}
 
 	server->client    = clnt;
 
 	if (server->nfs4_state->cl_idmap == NULL) {
-		printk(KERN_WARNING "NFS: failed to create idmapper.\n");
+		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
 		return -ENOMEM;
 	}
 
 	if (clnt->cl_auth->au_flavor != authflavour) {
 		if (rpcauth_create(authflavour, clnt) == NULL) {
-			printk(KERN_WARNING "NFS: couldn't create credcache!\n");
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
 			return -ENOMEM;
 		}
 	}
@@ -1728,8 +1758,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	struct nfs4_mount_data *data = raw_data;
 	void *p;
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return ERR_PTR(-EINVAL);
+	}
+	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -1740,11 +1774,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 
-	if (data->version != NFS4_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
-	}
-
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
@@ -1771,11 +1800,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	}
 	if (server->addr.sin_family != AF_INET ||
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote IP address!\n");
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
 		s = ERR_PTR(-EINVAL);
 		goto out_free;
 	}
 
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_free;
+	}
+
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 
 	if (IS_ERR(s) || s->s_root)
@@ -1783,13 +1821,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		s = ERR_PTR(-EIO);
-		goto out_free;
-	}
-
 	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
-- 
cgit v1.2.3


From 4ce79717ce32a9f88c1ddce4b9658556cb59d37a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Header file cleanup...

 - Move NFSv4 state definitions into a private header file.
 - Clean up gunk in nfs_fs.h

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c      |   1 +
 fs/nfs/callback_proc.c |   1 +
 fs/nfs/callback_xdr.c  |   1 +
 fs/nfs/delegation.c    |   1 +
 fs/nfs/dir.c           |   1 +
 fs/nfs/idmap.c         |   1 +
 fs/nfs/inode.c         |   1 +
 fs/nfs/nfs4_fs.h       | 250 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c      |   5 +-
 fs/nfs/nfs4renewd.c    |   1 +
 fs/nfs/nfs4state.c     |  12 +--
 fs/nfs/nfs4xdr.c       |   8 +-
 12 files changed, 264 insertions(+), 19 deletions(-)
 create mode 100644 fs/nfs/nfs4_fs.h

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 560d6175dd5..f2ca782aba3 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ece27e42b93..65f1e19e4d1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -8,6 +8,7 @@
 #include <linux/config.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d271df9df2b..c99677ec58f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5b9c60f9779..d7f7eb669d0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -16,6 +16,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_xdr.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ff6155f5e8d..9ccb15e8696 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -32,6 +32,7 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFS_PARANOIA 1
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 87f4f9aeac8..ffb8df91dc3 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -50,6 +50,7 @@
 #include <linux/nfs_fs.h>
 
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 #define IDMAP_HASH_SZ          128
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 32ddcf69e9a..c80a81ff59c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -39,6 +39,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
new file mode 100644
index 00000000000..85cf3bd3692
--- /dev/null
+++ b/fs/nfs/nfs4_fs.h
@@ -0,0 +1,250 @@
+/*
+ * linux/fs/nfs/nfs4_fs.h
+ *
+ * Copyright (C) 2005 Trond Myklebust
+ *
+ * NFSv4-specific filesystem definitions and declarations
+ */
+
+#ifndef __LINUX_FS_NFS_NFS4_FS_H
+#define __LINUX_FS_NFS_NFS4_FS_H
+
+#ifdef CONFIG_NFS_V4
+
+struct idmap;
+
+/*
+ * In a seqid-mutating op, this macro controls which error return
+ * values trigger incrementation of the seqid.
+ *
+ * from rfc 3010:
+ * The client MUST monotonically increment the sequence number for the
+ * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
+ * operations.  This is true even in the event that the previous
+ * operation that used the sequence number received an error.  The only
+ * exception to this rule is if the previous operation received one of
+ * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
+ * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
+ * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
+ *
+ */
+#define seqid_mutating_err(err)       \
+(((err) != NFSERR_STALE_CLIENTID) &&  \
+ ((err) != NFSERR_STALE_STATEID)  &&  \
+ ((err) != NFSERR_BAD_STATEID)    &&  \
+ ((err) != NFSERR_BAD_SEQID)      &&  \
+ ((err) != NFSERR_BAD_XDR)        &&  \
+ ((err) != NFSERR_RESOURCE)       &&  \
+ ((err) != NFSERR_NOFILEHANDLE))
+
+enum nfs4_client_state {
+	NFS4CLNT_OK  = 0,
+};
+
+/*
+ * The nfs4_client identifies our client state to the server.
+ */
+struct nfs4_client {
+	struct list_head	cl_servers;	/* Global list of servers */
+	struct in_addr		cl_addr;	/* Server identifier */
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+	atomic_t		cl_count;
+
+	struct rpc_clnt *	cl_rpcclient;
+	struct rpc_cred *	cl_cred;
+
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+	struct work_struct	cl_recoverd;
+
+	wait_queue_head_t	cl_waitq;
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+};
+
+/*
+ * NFS4 state_owners and lock_owners are simply labels for ordered
+ * sequences of RPC calls. Their sole purpose is to provide once-only
+ * semantics by allowing the server to identify replayed requests.
+ *
+ * The ->so_sema is held during all state_owner seqid-mutating operations:
+ * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
+ * so_seqid.
+ */
+struct nfs4_state_owner {
+	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs4_client   *so_client;
+	u32                  so_id;      /* 32-bit identifier, unique */
+	struct semaphore     so_sema;
+	u32                  so_seqid;   /* protected by so_sema */
+	atomic_t	     so_count;
+
+	struct rpc_cred	     *so_cred;	 /* Associated cred */
+	struct list_head     so_states;
+	struct list_head     so_delegations;
+};
+
+/*
+ * struct nfs4_state maintains the client-side state for a given
+ * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
+ *
+ * OPEN:
+ * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
+ * we need to know how many files are open for reading or writing on a
+ * given inode. This information too is stored here.
+ *
+ * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+ */
+
+struct nfs4_lock_state {
+	struct list_head	ls_locks;	/* Other lock stateids */
+	fl_owner_t		ls_owner;	/* POSIX lock owner */
+#define NFS_LOCK_INITIALIZED 1
+	int			ls_flags;
+	u32			ls_seqid;
+	u32			ls_id;
+	nfs4_stateid		ls_stateid;
+	atomic_t		ls_count;
+};
+
+/* bits for nfs4_state->flags */
+enum {
+	LK_STATE_IN_USE,
+	NFS_DELEGATED_STATE,
+};
+
+struct nfs4_state {
+	struct list_head open_states;	/* List of states for the same state_owner */
+	struct list_head inode_states;	/* List of states for the same inode */
+	struct list_head lock_states;	/* List of subservient lock stateids */
+
+	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
+	struct inode *inode;		/* Pointer to the inode */
+
+	unsigned long flags;		/* Do we hold any locks? */
+	struct semaphore lock_sema;	/* Serializes file locking operations */
+	rwlock_t state_lock;		/* Protects the lock_states list */
+
+	nfs4_stateid stateid;
+
+	unsigned int nreaders;
+	unsigned int nwriters;
+	int state;			/* State on the server (R,W, or RW) */
+	atomic_t count;
+};
+
+
+struct nfs4_exception {
+	long timeout;
+	int retry;
+};
+
+struct nfs4_state_recovery_ops {
+	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
+	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
+};
+
+extern struct dentry_operations nfs4_dentry_operations;
+extern struct inode_operations nfs4_dir_inode_operations;
+
+/* nfs4proc.c */
+extern int nfs4_map_errors(int err);
+extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
+extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
+extern int nfs4_proc_async_renew(struct nfs4_client *);
+extern int nfs4_proc_renew(struct nfs4_client *);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+
+extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
+extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
+
+extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_statfs_bitmap[2];
+extern const u32 nfs4_pathconf_bitmap[2];
+extern const u32 nfs4_fsinfo_bitmap[2];
+
+/* nfs4renewd.c */
+extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
+extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_renew_state(void *);
+
+/* nfs4state.c */
+extern void init_nfsv4_state(struct nfs_server *);
+extern void destroy_nfsv4_state(struct nfs_server *);
+extern struct nfs4_client *nfs4_get_client(struct in_addr *);
+extern void nfs4_put_client(struct nfs4_client *clp);
+extern int nfs4_init_client(struct nfs4_client *clp);
+extern struct nfs4_client *nfs4_find_client(struct in_addr *);
+extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+
+extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+extern void nfs4_put_open_state(struct nfs4_state *);
+extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
+extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
+extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
+extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t);
+extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
+extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
+extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
+extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
+extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+
+extern const nfs4_stateid zero_stateid;
+
+/* nfs4xdr.c */
+extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus);
+extern struct rpc_procinfo nfs4_procedures[];
+
+struct nfs4_mount_data;
+
+/* callback_xdr.c */
+extern struct svc_version nfs4_callback_version1;
+
+#else
+
+#define init_nfsv4_state(server)  do { } while (0)
+#define destroy_nfsv4_state(server)       do { } while (0)
+#define nfs4_put_state_owner(inode, owner) do { } while (0)
+#define nfs4_put_open_state(state) do { } while (0)
+#define nfs4_close_state(a, b) do { } while (0)
+
+#endif /* CONFIG_NFS_V4 */
+#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d5cb3e80c3..a69c02b206c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
-extern nfs4_stateid zero_stateid;
-
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
 {
@@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = {
 	| FATTR4_WORD1_SPACE_TOTAL
 };
 
-u32 nfs4_pathconf_bitmap[2] = {
+const u32 nfs4_pathconf_bitmap[2] = {
 	FATTR4_WORD0_MAXLINK
 	| FATTR4_WORD0_MAXNAME,
 	0
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 667e06f1c64..a3001628ad3 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -53,6 +53,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 231cebce3c8..17b187f2d77 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -46,24 +46,18 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
-static DEFINE_SPINLOCK(state_spinlock);
-
-nfs4_stateid zero_stateid;
-
-#if 0
-nfs4_stateid one_stateid =
-	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#endif
+const nfs4_stateid zero_stateid;
 
+static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
 static void nfs4_recover_state(void *);
-extern void nfs4_renew_state(void *);
 
 void
 init_nfsv4_state(struct nfs_server *server)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5f4de05763c..e86406eff0e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -51,6 +51,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -660,8 +661,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1
 
 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 {
-	extern u32 nfs4_fattr_bitmap[];
-
 	return encode_getattr_two(xdr,
 			bitmask[0] & nfs4_fattr_bitmap[0],
 			bitmask[1] & nfs4_fattr_bitmap[1]);
@@ -669,8 +668,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 
 static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
 {
-	extern u32 nfs4_fsinfo_bitmap[];
-
 	return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
 			bitmask[1] & nfs4_fsinfo_bitmap[1]);
 }
@@ -969,7 +966,6 @@ static int encode_putrootfh(struct xdr_stream *xdr)
 
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
 {
-	extern nfs4_stateid zero_stateid;
 	nfs4_stateid stateid;
 	uint32_t *p;
 
@@ -1697,7 +1693,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs
  */
 static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
 {
-	extern u32 nfs4_pathconf_bitmap[2];
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
@@ -1718,7 +1713,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct
  */
 static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
 {
-	extern u32 nfs4_statfs_bitmap[];
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
-- 
cgit v1.2.3


From a656db998785324a818005bcf71bae6dcbbb3cf5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Remove unused NFS inode field readdir_timestamp.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9ccb15e8696..dffa21abd3e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -165,12 +165,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
-	 *	 throught inode->i_sem or some other mechanism.
+	 *	 through inode->i_sem or some other mechanism.
 	 */
-	if (page->index == 0) {
-		invalidate_inode_pages(inode->i_mapping);
-		NFS_I(inode)->readdir_timestamp = timestamp;
-	}
+	if (page->index == 0)
+		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
 	unlock_page(page);
 	return 0;
  error:
-- 
cgit v1.2.3


From 464a98bd70bae8c559cfc82af799faf44824ce64 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: cleanup: shrink struct nfs_open_context

 Remove the wait queue, and replace the functions that depended on it
 with wait_on_bit().

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    |  1 -
 fs/nfs/pagelist.c | 35 ++++++++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c80a81ff59c..a38d4b22d1f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -848,7 +848,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
-		init_waitqueue_head(&ctx->waitq);
 	}
 	return ctx;
 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4f1ba723848..80777f99a58 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -107,7 +107,7 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_clear_bit();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_clear_bit();
-	wake_up_all(&req->wb_context->waitq);
+	wake_up_bit(&req->wb_flags, PG_BUSY);
 	nfs_release_request(req);
 }
 
@@ -180,6 +180,17 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 	req->wb_list_head = head;
 }
 
+static int nfs_wait_bit_interruptible(void *word)
+{
+	int ret = 0;
+
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+	else
+		schedule();
+	return ret;
+}
+
 /**
  * nfs_wait_on_request - Wait for a request to complete.
  * @req: request to wait upon.
@@ -190,12 +201,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-	struct inode	*inode = req->wb_context->dentry->d_inode;
-        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
-
-	if (!NFS_WBACK_BUSY(req))
-		return 0;
-	return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+	sigset_t oldmask;
+	int ret = 0;
+
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		goto out;
+	/*
+	 * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
+	 *	 are not interrupted if intr flag is not set
+	 */
+	rpc_clnt_sigmask(clnt, &oldmask);
+	ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
+			nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
+	rpc_clnt_sigunmask(clnt, &oldmask);
+out:
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From 92cfc62cb8412c9563860b1bf70cd4701f03092e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Allow NFS versions to support different sets of inode
 operations.

 ACL support will require supporting additional inode operations in v4
 (getxattr, setxattr, listxattr).  This patch allows different protocol versions
 to support different inode operations by adding a file_inode_ops to the
 nfs_rpc_ops (to match the existing dir_inode_ops).

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    | 2 +-
 fs/nfs/nfs3proc.c | 1 +
 fs/nfs/nfs4proc.c | 1 +
 fs/nfs/proc.c     | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a38d4b22d1f..a82f0340744 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -686,7 +686,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = &nfs_file_inode_operations;
+		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3878494dfc2..53953a77571 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -826,6 +826,7 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a69c02b206c..a5a8cb3159a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2746,6 +2746,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d31b4d6e5a5..cedf636bcf3 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -622,6 +622,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
-- 
cgit v1.2.3


From ada70d9425bcc5e376fef8591e4e76e204c0834c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Add hooks to allow common NFS attribute code to clear
 cached acls

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a82f0340744..c45bd52cc1d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -64,6 +64,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
 
@@ -153,6 +154,7 @@ nfs_clear_inode(struct inode *inode)
 
 	nfs_wb_all(inode);
 	BUG_ON (!list_empty(&nfsi->open_files));
+	nfs_zap_acl_cache(inode);
 	cred = nfsi->cache_access.cred;
 	if (cred)
 		put_rpccred(cred);
@@ -587,9 +589,19 @@ nfs_zap_caches(struct inode *inode)
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+}
+
+static void nfs_zap_acl_cache(struct inode *inode)
+{
+	void (*clear_acl_cache)(struct inode *);
+
+	clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
+	if (clear_acl_cache != NULL)
+		clear_acl_cache(inode);
+	NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL;
 }
 
 /*
@@ -789,7 +801,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	nfs_end_data_update(inode);
 	unlock_kernel();
 	return error;
@@ -1033,6 +1045,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		/* This ensures we revalidate dentries */
 		nfsi->cache_change_attribute++;
 	}
+	if (flags & NFS_INO_INVALID_ACL)
+		nfs_zap_acl_cache(inode);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));
@@ -1183,7 +1197,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
@@ -1292,16 +1306,21 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 #endif
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	}
 
-	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	/* If ctime has changed we should definitely clear access+acl caches */
+	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
+		if (!data_unstable)
+			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	}
 	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
 	    inode->i_gid != fattr->gid)
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 
 	inode->i_mode = fattr->mode;
 	inode->i_nlink = fattr->nlink;
-- 
cgit v1.2.3


From 6b3b5496d7b261d6c9202008dc528e52dbd11e57 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Add {get,set,list}xattr methods for nfs4

 Add {get,set,list}xattr methods for nfs4.  The new methods are no-ops, to be
 used by subsequent ACL patch.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      |  3 +++
 fs/nfs/nfs4_fs.h  |  7 +++++++
 fs/nfs/nfs4proc.c | 39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dffa21abd3e..5720537bffd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -91,6 +91,9 @@ struct inode_operations nfs4_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.getxattr       = nfs4_getxattr,
+	.setxattr       = nfs4_setxattr,
+	.listxattr      = nfs4_listxattr,
 };
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 85cf3bd3692..d71f416bd9e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -176,6 +176,13 @@ struct nfs4_state_recovery_ops {
 
 extern struct dentry_operations nfs4_dentry_operations;
 extern struct inode_operations nfs4_dir_inode_operations;
+extern struct inode_operations nfs4_file_inode_operations;
+
+/* inode.c */
+extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
+extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
+
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a5a8cb3159a..1b14d17ae9a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2732,6 +2732,34 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	return status;
 }
 
+
+int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+		size_t buflen, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+/* The getxattr man page suggests returning -ENODATA for unknown attributes,
+ * and that's what we'll do for e.g. user attributes that haven't been set.
+ * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
+ * attributes in kernel-managed attribute namespaces. */
+ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
+		size_t buflen)
+{
+	return -EOPNOTSUPP;
+}
+
+ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+{
+	ssize_t len = 0;
+
+	if (buf && buflen < len)
+		return -ERANGE;
+	if (buf)
+		memcpy(buf, "", 0);
+	return 0;
+}
+
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
@@ -2742,11 +2770,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
 	.recover_lock	= nfs4_lock_expired,
 };
 
+static struct inode_operations nfs4_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.getxattr	= nfs4_getxattr,
+	.setxattr	= nfs4_setxattr,
+	.listxattr	= nfs4_listxattr,
+};
+
 struct nfs_rpc_ops	nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
-	.file_inode_ops	= &nfs_file_inode_operations,
+	.file_inode_ops	= &nfs4_file_inode_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
-- 
cgit v1.2.3


From 96928206961be05f22c3839f0097b610cc485b5d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: fix fattr size calculations

 Make nfs4 fattr size calculations more explicit, revising them downward a
 bit in the process.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e86406eff0e..8204926bb46 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -83,12 +83,16 @@ static int nfs_stat_to_errno(int);
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 				((3+NFS4_FHSIZE) >> 2))
-#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
+#define nfs4_fattr_bitmap_maxsz 3
+#define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
-#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
-#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
-                                nfs4_fattr_bitmap_maxsz)
+/* This is based on getfattr, which uses the most attributes: */
+#define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+#define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
+				nfs4_fattr_value_maxsz)
+#define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
@@ -123,11 +127,11 @@ static int nfs_stat_to_errno(int);
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
 				nfs4_path_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
@@ -206,7 +210,7 @@ static int nfs_stat_to_errno(int);
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
-                                nfs4_fattr_bitmap_maxsz + \
+                                nfs4_fattr_maxsz + \
                                 encode_getattr_maxsz)
 #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-- 
cgit v1.2.3


From 029d105e66e5a90850d5a09dad76815d0bcfcaa3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for reading NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 reading acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8204926bb46..6f1c003ee33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -365,6 +365,13 @@ static int nfs_stat_to_errno(int);
 				encode_delegreturn_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 				decode_delegreturn_maxsz)
+#define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_getacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + \
+				nfs4_fattr_bitmap_maxsz + 1)
 
 static struct {
 	unsigned int	mode;
@@ -1631,6 +1638,34 @@ out:
         return status;
 }
 
+/*
+ * Encode a GETACL request
+ */
+static int
+nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
+		struct nfs_getaclargs *args)
+{
+	struct xdr_stream xdr;
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int replen, status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
+	/* set up reply buffer: */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+		args->acl_pages, args->acl_pgbase, args->acl_len);
+out:
+	return status;
+}
+
 /*
  * Encode a WRITE request
  */
@@ -3125,6 +3160,47 @@ static int decode_renew(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 
+static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+		size_t *acl_len)
+{
+	uint32_t *savep;
+	uint32_t attrlen,
+		 bitmap[2] = {0};
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int status;
+
+	*acl_len = 0;
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto out;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto out;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto out;
+
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
+		int hdrlen, recvd;
+
+		/* We ignore &savep and don't do consistency checks on
+		 * the attr length.  Let userspace figure it out.... */
+		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		recvd = req->rq_rcv_buf.len - hdrlen;
+		if (attrlen > recvd) {
+			printk(KERN_WARNING "NFS: server cheating in getattr"
+					" acl reply: attrlen %u > recvd %u\n",
+					attrlen, recvd);
+			return -EINVAL;
+		}
+		if (attrlen <= *acl_len)
+			xdr_read_pages(xdr, attrlen);
+		*acl_len = attrlen;
+	}
+
+out:
+	return status;
+}
+
 static int
 decode_savefh(struct xdr_stream *xdr)
 {
@@ -3417,6 +3493,29 @@ out:
 }
 
 
+/*
+ * Decode GETACL response
+ */
+static int
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_getacl(&xdr, rqstp, acl_len);
+
+out:
+	return status;
+}
+
 /*
  * Decode CLOSE response
  */
@@ -4017,6 +4116,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(READDIR,		enc_readdir,	dec_readdir),
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
+  PROC(GETACL,		enc_getacl,	dec_getacl),
 };
 
 struct rpc_version		nfs_version4 = {
-- 
cgit v1.2.3


From aa1870af92d8f6d6db0883696516a83ff2b695a6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: ACL support for the NFSv4 client: read

 Client-side support for NFSv4 ACLs.  Exports the raw xdr code via the
 system.nfs4_acl extended attribute.  It is up to userspace to decode the acl
 (and to provide correctly xdr'd acls on setxattr), and to convert to/from
 POSIX ACLs if desired.

 This patch provides only the read support.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 65 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b14d17ae9a..c91c09938a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2162,6 +2162,60 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static inline int nfs4_server_supports_acls(struct nfs_server *server)
+{
+	return (server->caps & NFS_CAP_ACLS)
+		&& (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+		&& (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+}
+
+/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that
+ * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on
+ * the stack.
+ */
+#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+
+static void buf_to_pages(const void *buf, size_t buflen,
+		struct page **pages, unsigned int *pgbase)
+{
+	const void *p = buf;
+
+	*pgbase = offset_in_page(buf);
+	p -= *pgbase;
+	while (p < buf + buflen) {
+		*(pages++) = virt_to_page(p);
+		p += PAGE_CACHE_SIZE;
+	}
+}
+
+static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_getaclargs args = {
+		.fh = NFS_FH(inode),
+		.acl_pages = pages,
+		.acl_len = buflen,
+	};
+	size_t resp_len = buflen;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
+		.rpc_argp = &args,
+		.rpc_resp = &resp_len,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (buflen && resp_len > buflen)
+		return -ERANGE;
+	if (ret == 0)
+		ret = resp_len;
+	return ret;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 {
@@ -2733,6 +2787,8 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 }
 
 
+#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+
 int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
 		size_t buflen, int flags)
 {
@@ -2746,18 +2802,23 @@ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
 ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
 		size_t buflen)
 {
-	return -EOPNOTSUPP;
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	return nfs4_proc_get_acl(inode, buf, buflen);
 }
 
 ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
 {
-	ssize_t len = 0;
+	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
 
 	if (buf && buflen < len)
 		return -ERANGE;
 	if (buf)
-		memcpy(buf, "", 0);
-	return 0;
+		memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+	return len;
 }
 
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
-- 
cgit v1.2.3


From 23ec6965c20db96bc8ea7af0ec178f074dd31c40 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for writing NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 writing acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6f1c003ee33..325cd6d4f23 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -372,6 +372,13 @@ static int nfs_stat_to_errno(int);
 				decode_putfh_maxsz + \
 				op_decode_hdr_maxsz + \
 				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_enc_setacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 4 + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -471,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	 * In the worst-case, this would be
 	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
 	 *          = 36 bytes, plus any contribution from variable-length fields
-	 *            such as owner/group/acl's.
+	 *            such as owner/group.
 	 */
 	len = 16;
 
@@ -1095,6 +1102,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
 	return 0;
 }
 
+static int
+encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4+sizeof(zero_stateid.data));
+	WRITE32(OP_SETATTR);
+	WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+	RESERVE_SPACE(2*4);
+	WRITE32(1);
+	WRITE32(FATTR4_WORD0_ACL);
+	if (arg->acl_len % 4)
+		return -EINVAL;
+	RESERVE_SPACE(4);
+	WRITE32(arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	return 0;
+}
+
 static int
 encode_savefh(struct xdr_stream *xdr)
 {
@@ -3492,6 +3518,48 @@ out:
 
 }
 
+/*
+ * Encode an SETACL request
+ */
+static int
+nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 2,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if (status)
+                goto out;
+        status = encode_setacl(&xdr, args);
+out:
+        return status;
+}
+/*
+ * Decode SETACL response
+ */
+static int
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_setattr(&xdr, res);
+out:
+	return status;
+}
 
 /*
  * Decode GETACL response
@@ -4117,6 +4185,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
   PROC(GETACL,		enc_getacl,	dec_getacl),
+  PROC(SETACL,		enc_setacl,	dec_setacl),
 };
 
 struct rpc_version		nfs_version4 = {
-- 
cgit v1.2.3


From 4b580ee3dc00f9828a9a7aad2724f448fdc94075 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] NFSv4: ACL support for the NFSv4 client: write

 Client-side write support for NFSv4 ACLs.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c91c09938a5..d969dd13e7d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2216,6 +2216,29 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	return ret;
 }
 
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_setaclargs arg = {
+		.fh		= NFS_FH(inode),
+		.acl_pages	= pages,
+		.acl_len	= buflen,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	return ret;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 {
@@ -2792,7 +2815,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
 		size_t buflen, int flags)
 {
-	return -EOPNOTSUPP;
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	if (!S_ISREG(inode->i_mode) &&
+	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		return -EPERM;
+
+	return nfs4_proc_set_acl(inode, buf, buflen);
 }
 
 /* The getxattr man page suggests returning -ENODATA for unknown attributes,
-- 
cgit v1.2.3


From e50a1c2e1f816c81eed6a589019052cb44189267 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] NFSv4: client-side caching NFSv4 ACLs

 Add nfs4_acl field to the nfs_inode, and use it to cache acls.  Only cache
 acls of size up to a page.  Also prepare for up to a page of acl data even
 when the user doesn't pass in a buffer, as when they want to get the acl
 length to decide what size buffer to allocate.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    |   7 ++-
 fs/nfs/nfs4proc.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 123 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c45bd52cc1d..350c48c1263 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -142,10 +142,6 @@ nfs_delete_inode(struct inode * inode)
 	clear_inode(inode);
 }
 
-/*
- * For the moment, the only task for the NFS clear_inode method is to
- * release the mmap credential
- */
 static void
 nfs_clear_inode(struct inode *inode)
 {
@@ -1923,6 +1919,9 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0;
+#ifdef CONFIG_NFS_V4
+	nfsi->nfs4_acl = NULL;
+#endif /* CONFIG_NFS_V4 */
 	return &nfsi->vfs_inode;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d969dd13e7d..128d01cfea1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2188,9 +2188,75 @@ static void buf_to_pages(const void *buf, size_t buflen,
 	}
 }
 
-static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+struct nfs4_cached_acl {
+	int cached;
+	size_t len;
+	char data[];
+};
+
+static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	kfree(nfsi->nfs4_acl);
+	nfsi->nfs4_acl = acl;
+	spin_unlock(&inode->i_lock);
+}
+
+static void nfs4_zap_acl_attr(struct inode *inode)
+{
+	nfs4_set_cached_acl(inode, NULL);
+}
+
+static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_cached_acl *acl;
+	int ret = -ENOENT;
+
+	spin_lock(&inode->i_lock);
+	acl = nfsi->nfs4_acl;
+	if (acl == NULL)
+		goto out;
+	if (buf == NULL) /* user is just asking for length */
+		goto out_len;
+	if (acl->cached == 0)
+		goto out;
+	ret = -ERANGE; /* see getxattr(2) man page */
+	if (acl->len > buflen)
+		goto out;
+	memcpy(buf, acl->data, acl->len);
+out_len:
+	ret = acl->len;
+out:
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len)
+{
+	struct nfs4_cached_acl *acl;
+
+	if (buf && acl_len <= PAGE_SIZE) {
+		acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 1;
+		memcpy(acl->data, buf, acl_len);
+	} else {
+		acl = kmalloc(sizeof(*acl), GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 0;
+	}
+	acl->len = acl_len;
+out:
+	nfs4_set_cached_acl(inode, acl);
+}
+
+static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
 	struct page *pages[NFS4ACL_MAXPAGES];
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
@@ -2198,24 +2264,66 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 		.acl_len = buflen,
 	};
 	size_t resp_len = buflen;
+	void *resp_buf;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
 		.rpc_argp = &args,
 		.rpc_resp = &resp_len,
 	};
+	struct page *localpage = NULL;
 	int ret;
 
-	if (!nfs4_server_supports_acls(server))
-		return -EOPNOTSUPP;
-	buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	if (buflen < PAGE_SIZE) {
+		/* As long as we're doing a round trip to the server anyway,
+		 * let's be prepared for a page of acl data. */
+		localpage = alloc_page(GFP_KERNEL);
+		resp_buf = page_address(localpage);
+		if (localpage == NULL)
+			return -ENOMEM;
+		args.acl_pages[0] = localpage;
+		args.acl_pgbase = 0;
+		args.acl_len = PAGE_SIZE;
+	} else {
+		resp_buf = buf;
+		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	}
 	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-	if (buflen && resp_len > buflen)
-		return -ERANGE;
-	if (ret == 0)
-		ret = resp_len;
+	if (ret)
+		goto out_free;
+	if (resp_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, resp_len);
+	else
+		nfs4_write_cached_acl(inode, resp_buf, resp_len);
+	if (buf) {
+		ret = -ERANGE;
+		if (resp_len > buflen)
+			goto out_free;
+		if (localpage)
+			memcpy(buf, resp_buf, resp_len);
+	}
+	ret = resp_len;
+out_free:
+	if (localpage)
+		__free_page(localpage);
 	return ret;
 }
 
+static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	ret = nfs_revalidate_inode(server, inode);
+	if (ret < 0)
+		return ret;
+	ret = nfs4_read_cached_acl(inode, buf, buflen);
+	if (ret != -ENOENT)
+		return ret;
+	return nfs4_get_acl_uncached(inode, buf, buflen);
+}
+
 static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -2236,6 +2344,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 		return -EOPNOTSUPP;
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	if (ret == 0)
+		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
 }
 
@@ -2907,6 +3017,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.file_open      = nfs4_proc_file_open,
 	.file_release   = nfs4_proc_file_release,
 	.lock		= nfs4_proc_lock,
+	.clear_acl_cache = nfs4_zap_acl_attr,
 };
 
 /*
-- 
cgit v1.2.3


From 6a19275ada9137435da58990c8f8d3f58e170bf1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] RPC: [PATCH] improve rpcauthauth_create error returns

 Currently we return -ENOMEM for every single failure to create a new auth.
 This is actually accurate for auth_null and auth_unix, but for auth_gss it's a
 bit confusing.

 Allow rpcauth_create (and the ->create methods) to return errors.  With this
 patch, the user may sometimes see an EINVAL instead.  Whee.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c     | 27 +++++++++++++++++----------
 fs/nfs/nfs4state.c |  3 ++-
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 350c48c1263..97b3fe7ece6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -160,11 +160,10 @@ nfs_clear_inode(struct inode *inode)
 void
 nfs_umount_begin(struct super_block *sb)
 {
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_clnt	*rpc;
+	struct rpc_clnt	*rpc = NFS_SB(sb)->client;
 
 	/* -EIO all pending I/O */
-	if ((rpc = server->client) != NULL)
+	if (!IS_ERR(rpc))
 		rpc_killall_tasks(rpc);
 }
 
@@ -450,11 +449,14 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return PTR_ERR(server->client);
 	/* RFC 2623, sec 2.3.2 */
 	if (authflavor != RPC_AUTH_UNIX) {
+		struct rpc_auth *auth;
+
 		server->client_sys = rpc_clone_client(server->client);
 		if (IS_ERR(server->client_sys))
 			return PTR_ERR(server->client_sys);
-		if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
-			return -ENOMEM;
+		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
+		if (IS_ERR(auth))
+			return PTR_ERR(auth);
 	} else {
 		atomic_inc(&server->client->cl_count);
 		server->client_sys = server->client;
@@ -1450,6 +1452,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
+	server->client = server->client_sys = ERR_PTR(-EINVAL);
 
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
@@ -1506,9 +1509,9 @@ static void nfs_kill_super(struct super_block *s)
 
 	kill_anon_super(s);
 
-	if (server->client != NULL && !IS_ERR(server->client))
+	if (!IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
+	if (!IS_ERR(server->client_sys))
 		rpc_shutdown_client(server->client_sys);
 
 	if (!(server->flags & NFS_MOUNT_NONLM))
@@ -1650,7 +1653,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	}
 
 	down_write(&clp->cl_sem);
-	if (clp->cl_rpcclient == NULL) {
+	if (IS_ERR(clp->cl_rpcclient)) {
 		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
@@ -1711,9 +1714,12 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	}
 
 	if (clnt->cl_auth->au_flavor != authflavour) {
-		if (rpcauth_create(authflavour, clnt) == NULL) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(authflavour, clnt);
+		if (IS_ERR(auth)) {
 			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return -ENOMEM;
+			return PTR_ERR(auth);
 		}
 	}
 
@@ -1788,6 +1794,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
+	server->client = server->client_sys = ERR_PTR(-EINVAL);
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 17b187f2d77..591ad1d5188 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -110,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr)
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	init_waitqueue_head(&clp->cl_waitq);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_OK;
 	return clp;
@@ -131,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp)
 	if (clp->cl_cred)
 		put_rpccred(clp->cl_cred);
 	nfs_idmap_delete(clp);
-	if (clp->cl_rpcclient)
+	if (!IS_ERR(clp->cl_rpcclient))
 		rpc_shutdown_client(clp->cl_rpcclient);
 	kfree(clp);
 	nfs_callback_down();
-- 
cgit v1.2.3


From a838cc49d9a7d5652262a6d1b628628cadffa877 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] NFSD: Add NFS3ERR_NOTSUPP to the nfsd error mapping table

 Add the missing NFS3ERR_NOTSUPP error code (defined in NFSv3) to the
 system-to-protocol-error table in nfsd.  The nfsacl extension uses this error
 code.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfsproc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 757f9d20803..0aa1b9603d7 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -591,6 +591,7 @@ nfserrno (int errno)
 		{ nfserr_dropit, -ENOMEM },
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_io, -ETXTBSY },
+		{ nfserr_notsupp, -EOPNOTSUPP },
 		{ -1, -EIO }
 	};
 	int	i;
-- 
cgit v1.2.3


From a257cdd0e2179630d3201c32ba14d7fcb3c3a055 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:26 +0000
Subject: [PATCH] NFSD: Add server support for NFSv3 ACLs.

 This adds functions for encoding and decoding POSIX ACLs for the NFSACL
 protocol extension, and the GETACL and SETACL RPCs.  The implementation is
 compatible with NFSACL in Solaris.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig             |  24 ++++
 fs/Makefile            |   1 +
 fs/nfs_common/Makefile |   7 ++
 fs/nfs_common/nfsacl.c | 257 +++++++++++++++++++++++++++++++++++++
 fs/nfsd/Makefile       |   2 +
 fs/nfsd/nfs2acl.c      | 336 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs3acl.c      | 267 +++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs3xdr.c      |  13 ++
 fs/nfsd/nfssvc.c       |  27 ++++
 fs/nfsd/nfsxdr.c       |  11 ++
 fs/nfsd/vfs.c          | 107 +++++++++++++++-
 11 files changed, 1051 insertions(+), 1 deletion(-)
 create mode 100644 fs/nfs_common/Makefile
 create mode 100644 fs/nfs_common/nfsacl.c
 create mode 100644 fs/nfsd/nfs2acl.c
 create mode 100644 fs/nfsd/nfs3acl.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 178e27494b7..d44b04d9b0a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1353,6 +1353,7 @@ config NFSD
 	select LOCKD
 	select SUNRPC
 	select EXPORTFS
+	select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL
 	help
 	  If you want your Linux box to act as an NFS *server*, so that other
 	  computers on your local network which support NFS can access certain
@@ -1376,6 +1377,10 @@ config NFSD
 	  To compile the NFS server support as a module, choose M here: the
 	  module will be called nfsd.  If unsure, say N.
 
+config NFSD_V2_ACL
+	bool
+	depends on NFSD
+
 config NFSD_V3
 	bool "Provide NFSv3 server support"
 	depends on NFSD
@@ -1383,6 +1388,16 @@ config NFSD_V3
 	  If you would like to include the NFSv3 server as well as the NFSv2
 	  server, say Y here.  If unsure, say Y.
 
+config NFSD_V3_ACL
+	bool "Provide server support for the NFSv3 ACL protocol extension"
+	depends on NFSD_V3
+	select NFSD_V2_ACL
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists on exported file systems. NFS clients should
+	  be compiled with the NFSv3 ACL protocol extension; see the
+	  CONFIG_NFS_V3_ACL option.  If unsure, say N.
+
 config NFSD_V4
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	depends on NFSD_V3 && EXPERIMENTAL
@@ -1427,6 +1442,15 @@ config LOCKD_V4
 config EXPORTFS
 	tristate
 
+config NFS_ACL_SUPPORT
+	tristate
+	select FS_POSIX_ACL
+
+config NFS_COMMON
+	bool
+	depends on NFSD || NFS_FS
+	default y
+
 config SUNRPC
 	tristate
 
diff --git a/fs/Makefile b/fs/Makefile
index 443f2bc56cc..fc92e59e9fa 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o xattr_acl.o
+obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
new file mode 100644
index 00000000000..f689ed82af3
--- /dev/null
+++ b/fs/nfs_common/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
+
+nfs_acl-objs := nfsacl.o
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
new file mode 100644
index 00000000000..18c58c32e32
--- /dev/null
+++ b/fs/nfs_common/nfsacl.c
@@ -0,0 +1,257 @@
+/*
+ * fs/nfs_common/nfsacl.c
+ *
+ *  Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+/*
+ * The Solaris nfsacl protocol represents some ACLs slightly differently
+ * than POSIX 1003.1e draft 17 does (and we do):
+ *
+ *  - Minimal ACLs always have an ACL_MASK entry, so they have
+ *    four instead of three entries.
+ *  - The ACL_MASK entry in such minimal ACLs always has the same
+ *    permissions as the ACL_GROUP_OBJ entry. (In extended ACLs
+ *    the ACL_MASK and ACL_GROUP_OBJ entries may differ.)
+ *  - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ
+ *    entries contain the identifiers of the owner and owning group.
+ *    (In POSIX ACLs we always set them to ACL_UNDEFINED_ID).
+ *  - ACL entries in the kernel are kept sorted in ascending order
+ *    of (e_tag, e_id). Solaris ACLs are unsorted.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/nfsacl.h>
+#include <linux/nfs3.h>
+#include <linux/sort.h>
+
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(nfsacl_encode);
+EXPORT_SYMBOL(nfsacl_decode);
+
+struct nfsacl_encode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+	int typeflag;
+	uid_t uid;
+	gid_t gid;
+};
+
+static int
+xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_encode_desc *nfsacl_desc =
+		(struct nfsacl_encode_desc *) desc;
+	u32 *p = (u32 *) elem;
+
+	if (nfsacl_desc->count < nfsacl_desc->acl->a_count) {
+		struct posix_acl_entry *entry =
+			&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+
+		*p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
+		switch(entry->e_tag) {
+			case ACL_USER_OBJ:
+				*p++ = htonl(nfsacl_desc->uid);
+				break;
+			case ACL_GROUP_OBJ:
+				*p++ = htonl(nfsacl_desc->gid);
+				break;
+			case ACL_USER:
+			case ACL_GROUP:
+				*p++ = htonl(entry->e_id);
+				break;
+			default:  /* Solaris depends on that! */
+				*p++ = 0;
+				break;
+		}
+		*p++ = htonl(entry->e_perm & S_IRWXO);
+	} else {
+		const struct posix_acl_entry *pa, *pe;
+		int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE;
+
+		FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) {
+			if (pa->e_tag == ACL_GROUP_OBJ) {
+				group_obj_perm = pa->e_perm & S_IRWXO;
+				break;
+			}
+		}
+		/* fake up ACL_MASK entry */
+		*p++ = htonl(ACL_MASK | nfsacl_desc->typeflag);
+		*p++ = htonl(0);
+		*p++ = htonl(group_obj_perm);
+	}
+
+	return 0;
+}
+
+unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag)
+{
+	int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
+	struct nfsacl_encode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.array_len = encode_entries ? entries : 0,
+			.xcode = xdr_nfsace_encode,
+		},
+		.acl = acl,
+		.typeflag = typeflag,
+		.uid = inode->i_uid,
+		.gid = inode->i_gid,
+	};
+	int err;
+
+	if (entries > NFS_ACL_MAX_ENTRIES ||
+	    xdr_encode_word(buf, base, entries))
+		return -EINVAL;
+	err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (!err)
+		err = 8 + nfsacl_desc.desc.elem_size *
+			  nfsacl_desc.desc.array_len;
+	return err;
+}
+
+struct nfsacl_decode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+};
+
+static int
+xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_decode_desc *nfsacl_desc =
+		(struct nfsacl_decode_desc *) desc;
+	u32 *p = (u32 *) elem;
+	struct posix_acl_entry *entry;
+
+	if (!nfsacl_desc->acl) {
+		if (desc->array_len > NFS_ACL_MAX_ENTRIES)
+			return -EINVAL;
+		nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL);
+		if (!nfsacl_desc->acl)
+			return -ENOMEM;
+		nfsacl_desc->count = 0;
+	}
+
+	entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+	entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
+	entry->e_id = ntohl(*p++);
+	entry->e_perm = ntohl(*p++);
+
+	switch(entry->e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_USER:
+		case ACL_GROUP_OBJ:
+		case ACL_GROUP:
+		case ACL_OTHER:
+			if (entry->e_perm & ~S_IRWXO)
+				return -EINVAL;
+			break;
+		case ACL_MASK:
+			/* Solaris sometimes sets additonal bits in the mask */
+			entry->e_perm &= S_IRWXO;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+cmp_acl_entry(const void *x, const void *y)
+{
+	const struct posix_acl_entry *a = x, *b = y;
+
+	if (a->e_tag != b->e_tag)
+		return a->e_tag - b->e_tag;
+	else if (a->e_id > b->e_id)
+		return 1;
+	else if (a->e_id < b->e_id)
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL.
+ */
+static int
+posix_acl_from_nfsacl(struct posix_acl *acl)
+{
+	struct posix_acl_entry *pa, *pe,
+	       *group_obj = NULL, *mask = NULL;
+
+	if (!acl)
+		return 0;
+
+	sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
+	     cmp_acl_entry, NULL);
+
+	/* Clear undefined identifier fields and find the ACL_GROUP_OBJ
+	   and ACL_MASK entries. */
+	FOREACH_ACL_ENTRY(pa, acl, pe) {
+		switch(pa->e_tag) {
+			case ACL_USER_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+			case ACL_GROUP_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				group_obj = pa;
+				break;
+			case ACL_MASK:
+				mask = pa;
+				/* fall through */
+			case ACL_OTHER:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+		}
+	}
+	if (acl->a_count == 4 && group_obj && mask &&
+	    mask->e_perm == group_obj->e_perm) {
+		/* remove bogus ACL_MASK entry */
+		memmove(mask, mask+1, (3 - (mask - acl->a_entries)) *
+				      sizeof(struct posix_acl_entry));
+		acl->a_count = 3;
+	}
+	return 0;
+}
+
+unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl)
+{
+	struct nfsacl_decode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.xcode = pacl ? xdr_nfsace_decode : NULL,
+		},
+	};
+	u32 entries;
+	int err;
+
+	if (xdr_decode_word(buf, base, &entries) ||
+	    entries > NFS_ACL_MAX_ENTRIES)
+		return -EINVAL;
+	err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (err)
+		return err;
+	if (pacl) {
+		if (entries != nfsacl_desc.desc.array_len ||
+		    posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) {
+			posix_acl_release(nfsacl_desc.acl);
+			return -EINVAL;
+		}
+		*pacl = nfsacl_desc.acl;
+	}
+	if (aclcnt)
+		*aclcnt = entries;
+	return 8 + nfsacl_desc.desc.elem_size *
+		   nfsacl_desc.desc.array_len;
+}
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index b8680a247f8..9f043f44c92 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o
 nfsd-objs		:= $(nfsd-y)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
new file mode 100644
index 00000000000..7cbf0682b2f
--- /dev/null
+++ b/fs/nfsd/nfs2acl.c
@@ -0,0 +1,336 @@
+/*
+ * linux/fs/nfsd/nfsacl.c
+ *
+ * Process version 2 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfssvc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfssvc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	return nfserr;
+}
+
+/*
+ * Check file attributes
+ */
+static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
+		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+{
+	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+}
+
+/*
+ * Check file access
+ */
+static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+		struct nfsd3_accessres *resp)
+{
+	int nfserr;
+
+	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
+			SVCFH_fmt(&argp->fh),
+			argp->access);
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->access = argp->access;
+	nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+	return nfserr;
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *argp)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p++);
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (argp->mask & NFS_ACL) ?
+			  &argp->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (argp->mask & NFS_DFACL) ?
+				  &argp->acl_default : NULL);
+	return (n > 0);
+}
+
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->access = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+	struct inode *inode = dentry->d_inode;
+	int w = nfsacl_size(
+		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+	struct kvec *head = rqstp->rq_res.head;
+	unsigned int base;
+	int n;
+
+	if (dentry == NULL || dentry->d_inode == NULL)
+		return 0;
+	inode = dentry->d_inode;
+
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->mask);
+	if (!xdr_ressize_check(rqstp, p))
+		return 0;
+	base = (char *)p - (char *)head->iov_base;
+
+	rqstp->rq_res.page_len = w;
+	while (w > 0) {
+		if (!svc_take_res_page(rqstp))
+			return 0;
+		w -= PAGE_SIZE;
+	}
+
+	n = nfsacl_encode(&rqstp->rq_res, base, inode,
+			  resp->acl_access,
+			  resp->mask & NFS_ACL, 0);
+	if (n > 0)
+		n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+				  resp->acl_default,
+				  resp->mask & NFS_DFACL,
+				  NFS_ACL_DEFAULT);
+	if (n <= 0)
+		return 0;
+	return 1;
+}
+
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_attrstat *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessres *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->access);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *resp)
+{
+	fh_put(&resp->fh);
+	return 1;
+}
+
+#define nfsaclsvc_decode_voidargs	NULL
+#define nfsaclsvc_encode_voidres	NULL
+#define nfsaclsvc_release_void		NULL
+#define nfsd3_fhandleargs	nfsd_fhandle
+#define nfsd3_attrstatres	nfsd_attrstat
+#define nfsd3_voidres		nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsacld_proc_##name,		\
+   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+   (kxdrproc_t) nfsaclsvc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures2[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(getattr, fhandle,	attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(access,	access,		access,		fhandle,  RC_NOCACHE, ST+AT+1),
+};
+
+struct svc_version	nfsd_acl_version2 = {
+		.vs_vers	= 2,
+		.vs_nproc	= 5,
+		.vs_proc	= nfsd_acl_procedures2,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
new file mode 100644
index 00000000000..64ba40572fe
--- /dev/null
+++ b/fs/nfsd/nfs3acl.c
@@ -0,0 +1,267 @@
+/*
+ * linux/fs/nfsd/nfs3acl.c
+ *
+ * Process version 3 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs3.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfs3svc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd3_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfs3svc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *args)
+{
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *args)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p++);
+	if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (args->mask & NFS_ACL) ?
+			  &args->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (args->mask & NFS_DFACL) ?
+				  &args->acl_default : NULL);
+	return (n > 0);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0 && dentry && dentry->d_inode) {
+		struct inode *inode = dentry->d_inode;
+		int w = nfsacl_size(
+			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+		struct kvec *head = rqstp->rq_res.head;
+		unsigned int base;
+		int n;
+
+		*p++ = htonl(resp->mask);
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+		base = (char *)p - (char *)head->iov_base;
+
+		rqstp->rq_res.page_len = w;
+		while (w > 0) {
+			if (!svc_take_res_page(rqstp))
+				return 0;
+			w -= PAGE_SIZE;
+		}
+
+		n = nfsacl_encode(&rqstp->rq_res, base, inode,
+				  resp->acl_access,
+				  resp->mask & NFS_ACL, 0);
+		if (n > 0)
+			n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+					  resp->acl_default,
+					  resp->mask & NFS_DFACL,
+					  NFS_ACL_DEFAULT);
+		if (n <= 0)
+			return 0;
+	} else
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+
+	return 1;
+}
+
+/* SETACL */
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_attrstat *resp)
+{
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+#define nfs3svc_decode_voidargs		NULL
+#define nfs3svc_release_void		NULL
+#define nfsd3_setaclres			nfsd3_attrstat
+#define nfsd3_voidres			nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd3_proc_##name,		\
+   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+   (kxdrproc_t) nfs3svc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures3[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
+};
+
+struct svc_version	nfsd_acl_version3 = {
+		.vs_vers	= 3,
+		.vs_nproc	= 3,
+		.vs_proc	= nfsd_acl_procedures3,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
+
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 11f806835c5..e0e134d6bab 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + XDR_QUADLEN(size);
 }
 
+/* Helper function for NFSv3 ACL code */
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
@@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 }
 
+/* Helper for NFSv3 ACLs */
+u32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_post_op_attr(rqstp, p, fhp);
+}
+
 /*
  * Enocde weak cache consistency data
  */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 02ded7cfbdc..79b25b19fec 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -31,6 +31,7 @@
 #include <linux/nfsd/stats.h>
 #include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
+#include <linux/nfsacl.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -362,6 +363,31 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 	return 1;
 }
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static struct svc_stat	nfsd_acl_svcstats;
+static struct svc_version *	nfsd_acl_version[] = {
+	[2] = &nfsd_acl_version2,
+	[3] = &nfsd_acl_version3,
+};
+
+#define NFSD_ACL_NRVERS		(sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
+static struct svc_program	nfsd_acl_program = {
+	.pg_prog		= NFS_ACL_PROGRAM,
+	.pg_nvers		= NFSD_ACL_NRVERS,
+	.pg_vers		= nfsd_acl_version,
+	.pg_name		= "nfsd",
+	.pg_stats		= &nfsd_acl_svcstats,
+};
+
+static struct svc_stat	nfsd_acl_svcstats = {
+	.program	= &nfsd_acl_program,
+};
+
+#define nfsd_acl_program_p	&nfsd_acl_program
+#else
+#define nfsd_acl_program_p	NULL
+#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
 extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
 
 static struct svc_version *	nfsd_version[] = {
@@ -376,6 +402,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
 struct svc_program		nfsd_program = {
+	.pg_next		= nfsd_acl_program_p,
 	.pg_prog		= NFS_PROGRAM,		/* program number */
 	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
 	.pg_vers		= nfsd_version,		/* version table */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 948b08287c9..b45999ff33e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + (NFS_FHSIZE >> 2);
 }
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
@@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 }
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_fattr(rqstp, p, fhp);
+}
 
 /*
  * XDR decode functions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e3e9d217236..ae3940dc85c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -46,8 +46,9 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
-#ifdef CONFIG_NFSD_V4
+#include <linux/xattr_acl.h>
 #include <linux/posix_acl.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr_acl.h>
 #include <linux/xattr.h>
@@ -1857,3 +1858,107 @@ nfsd_racache_init(int cache_size)
 	nfsdstats.ra_size = cache_size;
 	return 0;
 }
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *
+nfsd_get_posix_acl(struct svc_fh *fhp, int type)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	ssize_t size;
+	struct posix_acl *acl;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr)
+		return ERR_PTR(-EOPNOTSUPP);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = XATTR_NAME_ACL_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = XATTR_NAME_ACL_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0);
+
+	if (size < 0) {
+		acl = ERR_PTR(size);
+		goto getout;
+	} else if (size > 0) {
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value) {
+			acl = ERR_PTR(-ENOMEM);
+			goto getout;
+		}
+		size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
+		if (size < 0) {
+			acl = ERR_PTR(size);
+			goto getout;
+		}
+	}
+	acl = posix_acl_from_xattr(value, size);
+
+getout:
+	kfree(value);
+	return acl;
+}
+
+int
+nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	size_t size;
+	int error;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op ||
+	    !inode->i_op->setxattr || !inode->i_op->removexattr)
+		return -EOPNOTSUPP;
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = XATTR_NAME_ACL_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = XATTR_NAME_ACL_DEFAULT;
+			break;
+		default:
+			return -EOPNOTSUPP;
+	}
+
+	if (acl && acl->a_count) {
+		size = xattr_acl_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+		size = posix_acl_to_xattr(acl, value, size);
+		if (size < 0) {
+			error = size;
+			goto getout;
+		}
+	} else
+		size = 0;
+
+	if (!fhp->fh_locked)
+		fh_lock(fhp);  /* unlocking is done automatically */
+	if (size)
+		error = inode->i_op->setxattr(fhp->fh_dentry, name,
+					      value, size, 0);
+	else {
+		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
+			error = 0;
+		else {
+			error = inode->i_op->removexattr(fhp->fh_dentry, name);
+			if (error == -ENODATA)
+				error = 0;
+		}
+	}
+
+getout:
+	kfree(value);
+	return error;
+}
+#endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
-- 
cgit v1.2.3


From b7fa0554cf1ba6d6895cd0a5b02989a26e0bc704 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Add support for NFSv3 ACLs

 This adds acl support fo nfs clients via the NFSACL protocol extension, by
 implementing the getxattr, listxattr, setxattr, and removexattr iops for the
 system.posix_acl_access and system.posix_acl_default attributes.  This patch
 implements a dumb version that uses no caching (and thus adds some overhead).
 (Another patch in this patchset adds caching as well.)

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig        |  11 ++
 fs/nfs/Makefile   |   1 +
 fs/nfs/dir.c      |  21 ++++
 fs/nfs/file.c     |  12 +++
 fs/nfs/inode.c    |  36 ++++++-
 fs/nfs/nfs3acl.c  | 303 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs3proc.c |   7 +-
 fs/nfs/nfs3xdr.c  | 147 ++++++++++++++++++++++++++
 fs/nfs/nfsroot.c  |   9 ++
 9 files changed, 541 insertions(+), 6 deletions(-)
 create mode 100644 fs/nfs/nfs3acl.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index d44b04d9b0a..a7c0cc3203c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1268,6 +1268,7 @@ config NFS_FS
 	depends on INET
 	select LOCKD
 	select SUNRPC
+	select NFS_ACL_SUPPORT if NFS_V3_ACL
 	help
 	  If you are connected to some other (usually local) Unix computer
 	  (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing
@@ -1310,6 +1311,16 @@ config NFS_V3
 
 	  If unsure, say Y.
 
+config NFS_V3_ACL
+	bool "Provide client support for the NFSv3 ACL protocol extension"
+	depends on NFS_V3
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists.  The server should also be compiled with
+	  the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option.
+
+	  If unsure, say N.
+
 config NFS_V4
 	bool "Provide NFSv4 client support (EXPERIMENTAL)"
 	depends on NFS_FS && EXPERIMENTAL
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b4baa031edf..8b3bb715d17 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5720537bffd..2c6a9594568 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -75,6 +75,27 @@ struct inode_operations nfs_dir_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_dir_inode_operations = {
+	.create		= nfs_create,
+	.lookup		= nfs_lookup,
+	.link		= nfs_link,
+	.unlink		= nfs_unlink,
+	.symlink	= nfs_symlink,
+	.mkdir		= nfs_mkdir,
+	.rmdir		= nfs_rmdir,
+	.mknod		= nfs_mknod,
+	.rename		= nfs_rename,
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_V3 */
+
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 55c90759249..a606708264e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_v3 */
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 97b3fe7ece6..440b9cbb6f8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -108,6 +108,21 @@ static struct rpc_program	nfs_program = {
 	.pipe_dir_name		= "/nfs",
 };
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name =			"nfsacl",
+	.number =		NFS_ACL_PROGRAM,
+	.nrvers =		sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+	.version =		nfsacl_version,
+	.stats =		&nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 {
@@ -165,6 +180,9 @@ nfs_umount_begin(struct super_block *sb)
 	/* -EIO all pending I/O */
 	if (!IS_ERR(rpc))
 		rpc_killall_tasks(rpc);
+	rpc = NFS_SB(sb)->client_acl;
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
 }
 
 
@@ -461,8 +479,17 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		atomic_inc(&server->client->cl_count);
 		server->client_sys = server->client;
 	}
-
 	if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3_ACL
+		if (!(server->flags & NFS_MOUNT_NOACL)) {
+			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+			/* No errors! Assume that Sun nfsacls are supported */
+			if (!IS_ERR(server->client_acl))
+				server->caps |= NFS_CAP_ACLS;
+		}
+#else
+		server->flags &= ~NFS_MOUNT_NOACL;
+#endif /* CONFIG_NFS_V3_ACL */
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
@@ -546,6 +573,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
@@ -1452,7 +1480,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = ERR_PTR(-EINVAL);
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
@@ -1513,6 +1541,8 @@ static void nfs_kill_super(struct super_block *s)
 		rpc_shutdown_client(server->client);
 	if (!IS_ERR(server->client_sys))
 		rpc_shutdown_client(server->client_sys);
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
 
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
@@ -1794,7 +1824,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = ERR_PTR(-EINVAL);
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
new file mode 100644
index 00000000000..393ba79fc14
--- /dev/null
+++ b/fs/nfs/nfs3acl.c
@@ -0,0 +1,303 @@
+#include <linux/fs.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/xattr_acl.h>
+#include <linux/nfsacl.h>
+
+#define NFSDBG_FACILITY	NFSDBG_PROC
+
+ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int pos=0, len=0;
+
+#	define output(s) do {						\
+			if (pos + sizeof(s) <= size) {			\
+				memcpy(buffer + pos, s, sizeof(s));	\
+				pos += sizeof(s);			\
+			}						\
+			len += sizeof(s);				\
+		} while(0)
+
+	acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		output("system.posix_acl_access");
+		posix_acl_release(acl);
+	}
+
+	if (S_ISDIR(inode->i_mode)) {
+		acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		if (acl) {
+			output("system.posix_acl_default");
+			posix_acl_release(acl);
+		}
+	}
+
+#	undef output
+
+	if (!buffer || len <= size)
+		return len;
+	return -ERANGE;
+}
+
+ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error = 0;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = nfs3_proc_getacl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	else if (acl) {
+		if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
+			error = -ENODATA;
+		else
+			error = posix_acl_to_xattr(acl, buffer, size);
+		posix_acl_release(acl);
+	} else
+		error = -ENODATA;
+
+	return error;
+}
+
+int nfs3_setxattr(struct dentry *dentry, const char *name,
+	     const void *value, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	error = nfs3_proc_setacl(inode, type, acl);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+int nfs3_removexattr(struct dentry *dentry, const char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	int type;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	return nfs3_proc_setacl(inode, type, NULL);
+}
+
+struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_getaclargs args = {
+		.fh = NFS_FH(inode),
+		/* The xdr layer may allocate pages here. */
+		.pages = pages,
+	};
+	struct nfs3_getaclres res = {
+		.fattr =	&fattr,
+	};
+	struct posix_acl *acl = NULL;
+	int status, count;
+
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	switch (type) {
+		case ACL_TYPE_ACCESS:
+			args.mask = NFS_ACLCNT|NFS_ACL;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			if (!S_ISDIR(inode->i_mode))
+				return NULL;
+			args.mask = NFS_DFACLCNT|NFS_DFACL;
+			break;
+
+		default:
+			return ERR_PTR(-EINVAL);
+	}
+
+	dprintk("NFS call getacl\n");
+	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
+			  &args, &res, 0);
+	dprintk("NFS reply getacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL extension not supported; disabling\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+		default:
+			goto getout;
+	}
+	if ((args.mask & res.mask) != args.mask) {
+		status = -EIO;
+		goto getout;
+	}
+
+	if (res.acl_access != NULL) {
+		if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) {
+			posix_acl_release(res.acl_access);
+			res.acl_access = NULL;
+		}
+	}
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = res.acl_access;
+			res.acl_access = NULL;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = res.acl_default;
+			res.acl_default = NULL;
+	}
+
+getout:
+	posix_acl_release(res.acl_access);
+	posix_acl_release(res.acl_default);
+
+	if (status != 0) {
+		posix_acl_release(acl);
+		acl = ERR_PTR(status);
+	}
+	return acl;
+}
+
+static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		  struct posix_acl *dfacl)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_setaclargs args = {
+		.inode = inode,
+		.mask = NFS_ACL,
+		.acl_access = acl,
+		.pages = pages,
+	};
+	int status, count;
+
+	status = -EOPNOTSUPP;
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		goto out;
+
+	/* We are doing this here, because XDR marshalling can only
+	   return -ENOMEM. */
+	status = -ENOSPC;
+	if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (S_ISDIR(inode->i_mode)) {
+		args.mask |= NFS_DFACL;
+		args.acl_default = dfacl;
+	}
+
+	dprintk("NFS call setacl\n");
+	nfs_begin_data_update(inode);
+	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
+			  &args, &fattr, 0);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+	nfs_end_data_update(inode);
+	dprintk("NFS reply setacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL SETACL RPC not supported"
+					"(will not retry)\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+	}
+out:
+	return status;
+}
+
+int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct posix_acl *alloc = NULL, *dfacl = NULL;
+	int status;
+
+	if (S_ISDIR(inode->i_mode)) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				alloc = dfacl = nfs3_proc_getacl(inode,
+						ACL_TYPE_DEFAULT);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				dfacl = acl;
+				alloc = acl = nfs3_proc_getacl(inode,
+						ACL_TYPE_ACCESS);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			default:
+				return -EINVAL;
+		}
+	} else if (type != ACL_TYPE_ACCESS)
+			return -EINVAL;
+
+	if (acl == NULL) {
+		alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		if (IS_ERR(alloc))
+			goto fail;
+	}
+	status = nfs3_proc_setacls(inode, acl, dfacl);
+	posix_acl_release(alloc);
+	return status;
+
+fail:
+	return PTR_ERR(alloc);
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 53953a77571..d03bac0cc42 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
+#include <linux/nfs_mount.h>
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -45,7 +46,7 @@ static inline int
 nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
 {
 	struct rpc_message msg = {
-		.rpc_proc	= &nfs3_procedures[proc],
+		.rpc_proc	= &clnt->cl_procinfo[proc],
 		.rpc_argp	= argp,
 		.rpc_resp	= resp,
 	};
@@ -825,8 +826,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 struct nfs_rpc_ops	nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
-	.dir_inode_ops	= &nfs_dir_inode_operations,
-	.file_inode_ops	= &nfs_file_inode_operations,
+	.dir_inode_ops	= &nfs3_dir_inode_operations,
+	.file_inode_ops	= &nfs3_file_inode_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a3593d47e5a..a4437fb177f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,6 +21,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfsacl.h>
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -79,6 +80,11 @@ extern int			nfs_stat_to_errno(int);
 #define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
 #define NFS3_commitres_sz	(1+NFS3_wcc_data_sz+2)
 
+#define ACL3_getaclargs_sz	(NFS3_fh_sz+1)
+#define ACL3_setaclargs_sz	(NFS3_fh_sz+1+2*(2+5*3))
+#define ACL3_getaclres_sz	(1+NFS3_post_op_attr_sz+1+2*(2+5*3))
+#define ACL3_setaclres_sz	(1+NFS3_post_op_attr_sz)
+
 /*
  * Map file type to S_IFMT bits
  */
@@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 	return 0;
 }
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Encode GETACL arguments
+ */
+static int
+nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p,
+		    struct nfs3_getaclargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->mask);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	if (args->mask & (NFS_ACL | NFS_DFACL)) {
+		/* Inline the page array */
+		replen = (RPC_REPHDRSIZE + auth->au_rslack +
+			  ACL3_getaclres_sz) << 2;
+		xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0,
+				 NFSACL_MAXPAGES << PAGE_SHIFT);
+	}
+	return 0;
+}
+
+/*
+ * Encode SETACL arguments
+ */
+static int
+nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
+                   struct nfs3_setaclargs *args)
+{
+	struct xdr_buf *buf = &req->rq_snd_buf;
+	unsigned int base, len_in_head, len = nfsacl_size(
+		(args->mask & NFS_ACL)   ? args->acl_access  : NULL,
+		(args->mask & NFS_DFACL) ? args->acl_default : NULL);
+	int count, err;
+
+	p = xdr_encode_fhandle(p, NFS_FH(args->inode));
+	*p++ = htonl(args->mask);
+	base = (char *)p - (char *)buf->head->iov_base;
+	/* put as much of the acls into head as possible. */
+	len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
+	len -= len_in_head;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + len_in_head);
+
+	for (count = 0; (count << PAGE_SHIFT) < len; count++) {
+		args->pages[count] = alloc_page(GFP_KERNEL);
+		if (!args->pages[count]) {
+			while (count)
+				__free_page(args->pages[--count]);
+			return -ENOMEM;
+		}
+	}
+	xdr_encode_pages(buf, args->pages, 0, len);
+
+	err = nfsacl_encode(buf, base, args->inode,
+			    (args->mask & NFS_ACL) ?
+			    args->acl_access : NULL, 1, 0);
+	if (err > 0)
+		err = nfsacl_encode(buf, base + err, args->inode,
+				    (args->mask & NFS_DFACL) ?
+				    args->acl_default : NULL, 1,
+				    NFS_ACL_DEFAULT);
+	return (err > 0) ? 0 : err;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 /*
  * NFS XDR decode functions
  */
@@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
 	return 0;
 }
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Decode GETACL reply
+ */
+static int
+nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p,
+		   struct nfs3_getaclres *res)
+{
+	struct xdr_buf *buf = &req->rq_rcv_buf;
+	int status = ntohl(*p++);
+	struct posix_acl **acl;
+	unsigned int *aclcnt;
+	int err, base;
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	res->mask = ntohl(*p++);
+	if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		return -EINVAL;
+	base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base;
+
+	acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL;
+	aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL;
+	err = nfsacl_decode(buf, base, aclcnt, acl);
+
+	acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL;
+	aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL;
+	if (err > 0)
+		err = nfsacl_decode(buf, base + err, aclcnt, acl);
+	return (err > 0) ? 0 : err;
+}
+
+/*
+ * Decode setacl reply.
+ */
+static int
+nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int status = ntohl(*p++);
+
+	if (status)
+		return -nfs_stat_to_errno(status);
+	xdr_decode_post_op_attr(p, fattr);
+	return 0;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 #ifndef MAX
 # define MAX(a, b)	(((a) > (b))? (a) : (b))
 #endif
@@ -1021,3 +1143,28 @@ struct rpc_version		nfs_version3 = {
 	.procs			= nfs3_procedures
 };
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_procinfo	nfs3_acl_procedures[] = {
+	[ACLPROC3_GETACL] = {
+		.p_proc = ACLPROC3_GETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
+		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
+		.p_timer = 1,
+	},
+	[ACLPROC3_SETACL] = {
+		.p_proc = ACLPROC3_SETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
+		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
+		.p_timer = 0,
+	},
+};
+
+struct rpc_version		nfsacl_version3 = {
+	.number			= 3,
+	.nrprocs		= sizeof(nfs3_acl_procedures)/
+				  sizeof(nfs3_acl_procedures[0]),
+	.procs			= nfs3_acl_procedures,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index fd5bc596fe8..1b272a135a3 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -124,6 +124,7 @@ enum {
 	Opt_soft, Opt_hard, Opt_intr,
 	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
 	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+	Opt_acl, Opt_noacl,
 	/* Error token */
 	Opt_err
 };
@@ -158,6 +159,8 @@ static match_table_t __initdata tokens = {
 	{Opt_udp, "udp"},
 	{Opt_tcp, "proto=tcp"},
 	{Opt_tcp, "tcp"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
 	{Opt_err, NULL}
 	
 };
@@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf)
 			case Opt_tcp:
 				nfs_data.flags |= NFS_MOUNT_TCP;
 				break;
+			case Opt_acl:
+				nfs_data.flags &= ~NFS_MOUNT_NOACL;
+				break;
+			case Opt_noacl:
+				nfs_data.flags |= NFS_MOUNT_NOACL;
+				break;
 			default : 
 				return 0;
 		}
-- 
cgit v1.2.3


From 055ffbea0596942579b0dae71d5dab78de8135f6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Fix handling of the umask when an NFSv3 default acl is
 present.

 NFSv3 has no concept of a umask on the server side: The client applies
 the umask locally, and sends the effective permissions to the server.
 This behavior is wrong when files are created in a directory that has a
 default ACL.  In this case, the umask is supposed to be ignored, and
 only the default ACL determines the file's effective permissions.

 Usually its the server's task to conditionally apply the umask.  But
 since the server knows nothing about the umask, we have to do it on the
 client side.  This patch tries to fetch the parent directory's default
 ACL before creating a new file, computes the appropriate create mode to
 send to the server, and finally sets the new file's access and default
 acl appropriately.

 Many thanks to Buck Huppmann <buchk@pobox.com> for sending the initial
 version of this patch, as well as for arguing why we need this change.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    |  5 +++++
 fs/nfs/nfs3acl.c  | 29 +++++++++++++++++++++++++++++
 fs/nfs/nfs3proc.c | 36 ++++++++++++++++++++++++++++++------
 3 files changed, 64 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 440b9cbb6f8..50a03f1504a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -490,6 +490,11 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 #else
 		server->flags &= ~NFS_MOUNT_NOACL;
 #endif /* CONFIG_NFS_V3_ACL */
+		/*
+		 * The VFS shouldn't apply the umask to mode bits. We will
+		 * do so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 393ba79fc14..89b6468700e 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -301,3 +301,32 @@ int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
 fail:
 	return PTR_ERR(alloc);
 }
+
+int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode)
+{
+	struct posix_acl *dfacl, *acl;
+	int error = 0;
+
+	dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT);
+	if (IS_ERR(dfacl)) {
+		error = PTR_ERR(dfacl);
+		return (error == -EOPNOTSUPP) ? 0 : error;
+	}
+	if (!dfacl)
+		return 0;
+	acl = posix_acl_clone(dfacl, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!acl)
+		goto out_release_dfacl;
+	error = posix_acl_create_masq(acl, &mode);
+	if (error < 0)
+		goto out_release_acl;
+	error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ?
+						      dfacl : NULL);
+out_release_acl:
+	posix_acl_release(acl);
+out_release_dfacl:
+	posix_acl_release(dfacl);
+	return error;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d03bac0cc42..a9ddc196224 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -314,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
-	int			status;
+	mode_t mode = sattr->ia_mode;
+	int status;
 
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
@@ -324,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		arg.verifier[1] = current->pid;
 	}
 
+	sattr->ia_mode &= ~current->fs->umask;
+
 again:
 	dir_attr.valid = 0;
 	fattr.valid = 0;
@@ -370,6 +373,9 @@ again:
 		nfs_refresh_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 	dprintk("NFS reply create: %d\n", status);
 	return status;
@@ -539,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
-	int			status;
+	int mode = sattr->ia_mode;
+	int status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	dir_attr.valid = 0;
 	fattr.valid = 0;
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mkdir: %d\n", status);
 	return status;
 }
@@ -642,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	mode_t mode = sattr->ia_mode;
 	int status;
 
 	switch (sattr->ia_mode & S_IFMT) {
@@ -654,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
 			MAJOR(rdev), MINOR(rdev));
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	dir_attr.valid = 0;
 	fattr.valid = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mknod: %d\n", status);
 	return status;
 }
-- 
cgit v1.2.3


From 5c6a9f7d92291c832d47e792ed1fafa44acb066e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Cache the NFSv3 acls.

 Attach acls to inodes in the icache to avoid unnecessary GETACL RPC
 round-trips.  As long as the client doesn't retrieve any acls itself, only the
 default acls of exiting directories and the default and access acls of new
 directories will end up in the cache, which preserves some memory compared to
 always caching the access and default acl of all files.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3acl.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++++--------
 fs/nfs/nfs3proc.c |   1 +
 2 files changed, 86 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 89b6468700e..451112ff9aa 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -113,6 +113,69 @@ int nfs3_removexattr(struct dentry *dentry, const char *name)
 	return nfs3_proc_setacl(inode, type, NULL);
 }
 
+static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
+{
+	if (nfsi->acl_access != ERR_PTR(-EAGAIN)) {
+		posix_acl_release(nfsi->acl_access);
+		nfsi->acl_access = ERR_PTR(-EAGAIN);
+	}
+	if (nfsi->acl_default != ERR_PTR(-EAGAIN)) {
+		posix_acl_release(nfsi->acl_default);
+		nfsi->acl_default = ERR_PTR(-EAGAIN);
+	}
+}
+
+void nfs3_forget_cached_acls(struct inode *inode)
+{
+	dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id,
+		inode->i_ino);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct posix_acl *acl = ERR_PTR(-EAGAIN);
+
+	spin_lock(&inode->i_lock);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = nfsi->acl_access;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = nfsi->acl_default;
+			break;
+
+		default:
+			return ERR_PTR(-EINVAL);
+	}
+	if (acl == ERR_PTR(-EAGAIN))
+		acl = ERR_PTR(-EAGAIN);
+	else
+		acl = posix_acl_dup(acl);
+	spin_unlock(&inode->i_lock);
+	dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
+		inode->i_ino, type, acl);
+	return acl;
+}
+
+static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
+		    struct posix_acl *dfacl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id,
+		inode->i_ino, acl, dfacl);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	nfsi->acl_access = posix_acl_dup(acl);
+	nfsi->acl_default = posix_acl_dup(dfacl);
+	spin_unlock(&inode->i_lock);
+}
+
 struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -126,26 +189,32 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 	struct nfs3_getaclres res = {
 		.fattr =	&fattr,
 	};
-	struct posix_acl *acl = NULL;
+	struct posix_acl *acl;
 	int status, count;
 
 	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	switch (type) {
-		case ACL_TYPE_ACCESS:
-			args.mask = NFS_ACLCNT|NFS_ACL;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			if (!S_ISDIR(inode->i_mode))
-				return NULL;
-			args.mask = NFS_DFACLCNT|NFS_DFACL;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
-	}
+	status = nfs_revalidate_inode(server, inode);
+	if (status < 0)
+		return ERR_PTR(status);
+	acl = nfs3_get_cached_acl(inode, type);
+	if (acl != ERR_PTR(-EAGAIN))
+		return acl;
+	acl = NULL;
+
+	/*
+	 * Only get the access acl when explicitly requested: We don't
+	 * need it for access decisions, and only some applications use
+	 * it. Applications which request the access acl first are not
+	 * penalized from this optimization.
+	 */
+	if (type == ACL_TYPE_ACCESS)
+		args.mask |= NFS_ACLCNT|NFS_ACL;
+	if (S_ISDIR(inode->i_mode))
+		args.mask |= NFS_DFACLCNT|NFS_DFACL;
+	if (args.mask == 0)
+		return NULL;
 
 	dprintk("NFS call getacl\n");
 	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
@@ -180,6 +249,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 			res.acl_access = NULL;
 		}
 	}
+	nfs3_cache_acls(inode, res.acl_access, res.acl_default);
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index a9ddc196224..7851569b31c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -882,4 +882,5 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
+	.clear_acl_cache = nfs3_forget_cached_acls,
 };
-- 
cgit v1.2.3


From 458818ed76d3f495f9f32373c936456c9427f759 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Fix up v3 ACL caching code

 Initialize the inode cache values correctly.
 Clean up __nfs3_forget_cached_acls()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c   |  4 ++++
 fs/nfs/nfs3acl.c | 11 ++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a03f1504a..8a8d57d9d66 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1961,6 +1961,10 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0;
+#ifdef CONFIG_NFS_V3_ACL
+	nfsi->acl_access = ERR_PTR(-EAGAIN);
+	nfsi->acl_default = ERR_PTR(-EAGAIN);
+#endif
 #ifdef CONFIG_NFS_V4
 	nfsi->nfs4_acl = NULL;
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 451112ff9aa..ee3536fc84a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -115,11 +115,11 @@ int nfs3_removexattr(struct dentry *dentry, const char *name)
 
 static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
 {
-	if (nfsi->acl_access != ERR_PTR(-EAGAIN)) {
+	if (!IS_ERR(nfsi->acl_access)) {
 		posix_acl_release(nfsi->acl_access);
 		nfsi->acl_access = ERR_PTR(-EAGAIN);
 	}
-	if (nfsi->acl_default != ERR_PTR(-EAGAIN)) {
+	if (!IS_ERR(nfsi->acl_default)) {
 		posix_acl_release(nfsi->acl_default);
 		nfsi->acl_default = ERR_PTR(-EAGAIN);
 	}
@@ -137,7 +137,7 @@ void nfs3_forget_cached_acls(struct inode *inode)
 static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct posix_acl *acl = ERR_PTR(-EAGAIN);
+	struct posix_acl *acl = ERR_PTR(-EINVAL);
 
 	spin_lock(&inode->i_lock);
 	switch(type) {
@@ -150,12 +150,13 @@ static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
 			break;
 
 		default:
-			return ERR_PTR(-EINVAL);
+			goto out;
 	}
-	if (acl == ERR_PTR(-EAGAIN))
+	if (IS_ERR(acl))
 		acl = ERR_PTR(-EAGAIN);
 	else
 		acl = posix_acl_dup(acl);
+out:
 	spin_unlock(&inode->i_lock);
 	dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
 		inode->i_ino, type, acl);
-- 
cgit v1.2.3


From 213484254c65e3c39c59df454132748b1367f816 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:28 +0000
Subject: [PATCH] fix nfsacl pointer arithmetic and pg_class initialization
 bugs

* Pointer arithmetic bug: p is in word units. This fixes a memory
  corruption with big acls.
* Initialize pg_class to prevent a NULL pointer access.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3xdr.c | 2 +-
 fs/nfsd/nfssvc.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a4437fb177f..db4a904810a 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -677,7 +677,7 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
 	/* put as much of the acls into head as possible. */
 	len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
 	len -= len_in_head;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + len_in_head);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2));
 
 	for (count = 0; (count << PAGE_SHIFT) < len; count++) {
 		args->pages[count] = alloc_page(GFP_KERNEL);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 79b25b19fec..904df604e86 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -376,6 +376,7 @@ static struct svc_program	nfsd_acl_program = {
 	.pg_nvers		= NFSD_ACL_NRVERS,
 	.pg_vers		= nfsd_acl_version,
 	.pg_name		= "nfsd",
+	.pg_class		= "nfsd",
 	.pg_stats		= &nfsd_acl_svcstats,
 };
 
-- 
cgit v1.2.3


From b7ef19560f496fd3942e41e728950e5b5c9a461b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 22 Jun 2005 17:16:28 +0000
Subject: [PATCH] NFSv4: fs/nfs/nfs4proc.c: small simplification

 The Coverity checker noticed that such a simplification was possible.

 Signed-off-by: Adrian Bunk <bunk@stusta.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 128d01cfea1..7ff23707256 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -123,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
-		readdir->cookie = (cookie > 2) ? cookie : 0;
+		readdir->cookie = cookie;
 		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
 		return;
 	}
-- 
cgit v1.2.3


From 3e9d41543b16e6117267edc0ca058c40f888d81a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 22 Jun 2005 17:16:28 +0000
Subject: [PATCH] NFSv4: empty array fix

 Older gcc's don't like this.

 fs/nfs/nfs4proc.c:2194: field `data' has incomplete type

 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ff23707256..3f281a857e3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2191,7 +2191,7 @@ static void buf_to_pages(const void *buf, size_t buflen,
 struct nfs4_cached_acl {
 	int cached;
 	size_t len;
-	char data[];
+	char data[0];
 };
 
 static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
-- 
cgit v1.2.3


From c56c2750229154f6a1cfee533e0a911da3923b5a Mon Sep 17 00:00:00 2001
From: Reuben Farrelly <reuben-lkml@reub.net>
Date: Wed, 22 Jun 2005 17:16:28 +0000
Subject: [PATCH] NFSv4: Fix build warning

 From: Reuben Farrelly <reuben-lkml@reub.net>

 With gcc-4.0:

 fs/nfs/nfs4proc.c:2976: error: static declaration of
 'nfs4_file_inode_operations' follows non-static declaration
 fs/nfs/nfs4_fs.h:179: error: previous declaration of
 'nfs4_file_inode_operations' was here

 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d71f416bd9e..7c6f1d668fb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -176,7 +176,6 @@ struct nfs4_state_recovery_ops {
 
 extern struct dentry_operations nfs4_dentry_operations;
 extern struct inode_operations nfs4_dir_inode_operations;
-extern struct inode_operations nfs4_file_inode_operations;
 
 /* inode.c */
 extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
-- 
cgit v1.2.3


From 455a396710b71a743b28da2ed2185e5a9b38e26f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:28 +0000
Subject: [PATCH] NFSv4: Fix an Oops in the callback code.

 The changeset "trond.myklebust@fys.uio.no|ChangeSet|20050322152404|16979"
 (RPC: Ensure XDR iovec length is initialized correctly in call_header)
 causes the NFSv4 callback code to BUG() due to an incorrectly initialized
 scratch buffer.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_xdr.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c99677ec58f..7c33b9a81a9 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -411,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-	rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
-- 
cgit v1.2.3


From 00a926422765064cb28e218d4837411c88bf6a3e Mon Sep 17 00:00:00 2001
From: Olivier Galibert <galibert@pobox.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Hide NFS server-generated readdir cookies from userland

 NFSv3 currently returns the unsigned 64-bit cookie directly to
 userspace. The following patch causes the kernel to generate
 loff_t offsets for the benefit of userland.
 The current server-generated READDIR cookie is cached in the
 nfs_open_context instead of in filp->f_pos, so we still end up work
 correctly under directory insertions/deletion.

 Signed-off-by: Olivier Galibert <galibert@pobox.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c   | 114 +++++++++++++++++++++++++++++++++++++++++++++------------
 fs/nfs/inode.c |   2 +
 2 files changed, 92 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c6a9594568..fceef29c65a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -141,7 +141,9 @@ typedef struct {
 	struct page	*page;
 	unsigned long	page_index;
 	u32		*ptr;
-	u64		target;
+	u64		target_cookie;
+	int		target_index;
+	int		current_index;
 	struct nfs_entry *entry;
 	decode_dirent_t	decode;
 	int		plus;
@@ -225,14 +227,14 @@ void dir_page_release(nfs_readdir_descriptor_t *desc)
 
 /*
  * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry.
+ * to readdir, find the next entry with cookie 'desc->target_cookie'.
  *
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * return the offset within the buffer of the next entry to be
  * read.
  */
 static inline
-int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
+int find_dirent(nfs_readdir_descriptor_t *desc)
 {
 	struct nfs_entry *entry = desc->entry;
 	int		loop_count = 0,
@@ -240,7 +242,7 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
 
 	while((status = dir_decode(desc)) == 0) {
 		dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
-		if (entry->prev_cookie == desc->target)
+		if (entry->prev_cookie == desc->target_cookie)
 			break;
 		if (loop_count++ > 200) {
 			loop_count = 0;
@@ -252,8 +254,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
 }
 
 /*
- * Find the given page, and call find_dirent() in order to try to
- * return the next entry.
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the entry at offset 'desc->target_index'.
+ *
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+int find_dirent_index(nfs_readdir_descriptor_t *desc)
+{
+	struct nfs_entry *entry = desc->entry;
+	int		loop_count = 0,
+			status;
+
+	for(;;) {
+		status = dir_decode(desc);
+		if (status)
+			break;
+
+		dfprintk(VFS, "NFS: found cookie %Lu at index %d\n", (long long)entry->cookie, desc->current_index);
+
+		if (desc->target_index == desc->current_index) {
+			desc->target_cookie = entry->cookie;
+			break;
+		}
+		desc->current_index++;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
+	return status;
+}
+
+/*
+ * Find the given page, and call find_dirent() or find_dirent_index in
+ * order to try to return the next entry.
  */
 static inline
 int find_dirent_page(nfs_readdir_descriptor_t *desc)
@@ -276,7 +314,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	/* NOTE: Someone else may have changed the READDIRPLUS flag */
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
-	status = find_dirent(desc, page);
+	if (desc->target_cookie)
+		status = find_dirent(desc);
+	else
+		status = find_dirent_index(desc);
 	if (status < 0)
 		dir_page_release(desc);
  out:
@@ -291,7 +332,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
  * Recurse through the page cache pages, and return a
  * filled nfs_entry structure of the next directory entry if possible.
  *
- * The target for the search is 'desc->target'.
+ * The target for the search is 'desc->target_cookie' if non-0,
+ * 'desc->target_index' otherwise
  */
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
@@ -299,7 +341,19 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 	int		loop_count = 0;
 	int		res;
 
-	dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
+	if (desc->target_cookie)
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target_cookie);
+	else
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie number %d\n", desc->target_index);
+
+	/* Always search-by-index from the beginning of the cache */
+	if (!(desc->target_cookie)) {
+		desc->page_index = 0;
+		desc->entry->cookie = desc->entry->prev_cookie = 0;
+		desc->entry->eof = 0;
+		desc->current_index = 0;
+	}
+
 	for (;;) {
 		res = find_dirent_page(desc);
 		if (res != -EAGAIN)
@@ -332,11 +386,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct file	*file = desc->file;
 	struct nfs_entry *entry = desc->entry;
 	struct dentry	*dentry = NULL;
+	struct nfs_open_context *ctx = file->private_data;
 	unsigned long	fileid;
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -356,10 +411,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		}
 
 		res = filldir(dirent, entry->name, entry->len, 
-			      entry->prev_cookie, fileid, d_type);
+			      file->f_pos, fileid, d_type);
 		if (res < 0)
 			break;
-		file->f_pos = desc->target = entry->cookie;
+		file->f_pos++;
+		desc->target_cookie = entry->cookie;
 		if (dir_decode(desc) != 0) {
 			desc->page_index ++;
 			break;
@@ -369,10 +425,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 			schedule();
 		}
 	}
+	ctx->dir_pos        = file->f_pos;
+	ctx->dir_cookie     = desc->target_cookie;
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target_cookie, res);
 	return res;
 }
 
@@ -398,14 +456,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		status = -ENOMEM;
 		goto out;
 	}
-	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
+	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target_cookie,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
@@ -414,7 +472,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
 		if ((status = dir_decode(desc)) == 0)
-			desc->entry->prev_cookie = desc->target;
+			desc->entry->prev_cookie = desc->target_cookie;
 	} else
 		status = -EIO;
 	if (status < 0)
@@ -435,13 +493,15 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	goto out;
 }
 
-/* The file offset position is now represented as a true offset into the
- * page cache as is the case in most of the other filesystems.
+/* The file offset position represents the dirent entry number.  A
+   last cookie cache takes care of the common case of reading the
+   whole directory.
  */
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct dentry	*dentry = filp->f_dentry;
 	struct inode	*inode = dentry->d_inode;
+	struct nfs_open_context *ctx = filp->private_data;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
 	struct nfs_entry my_entry;
@@ -458,17 +518,22 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 
 	/*
-	 * filp->f_pos points to the file offset in the page cache.
-	 * but if the cache has meanwhile been zapped, we need to
-	 * read from the last dirent to revalidate f_pos
-	 * itself.
+	 * filp->f_pos points to the dirent entry number.
+	 * ctx->dir_pos has the number of the cached cookie.  We have
+	 * to either find the entry with the appropriate number or
+	 * revalidate the cookie.
 	 */
 	memset(desc, 0, sizeof(*desc));
 
 	desc->file = filp;
-	desc->target = filp->f_pos;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
+	desc->target_index = filp->f_pos;
+
+	if (filp->f_pos == ctx->dir_pos)
+		desc->target_cookie = ctx->dir_cookie;
+	else
+		desc->target_cookie = 0;
 
 	my_entry.cookie = my_entry.prev_cookie = 0;
 	my_entry.eof = 0;
@@ -478,9 +543,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
+
 		if (res == -EBADCOOKIE) {
 			/* This means either end of directory */
-			if (desc->entry->cookie != desc->target) {
+			if (desc->target_cookie && desc->entry->cookie != desc->target_cookie) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
 				if (res >= 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8a8d57d9d66..9fa02e7984a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -891,6 +891,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
+		ctx->dir_pos = 0;
+		ctx->dir_cookie = 0;
 	}
 	return ctx;
 }
-- 
cgit v1.2.3


From f0dd2136da6d2070e12bfa6d199b136318e666c7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Clean up readdir changes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c   | 85 +++++++++++++++++++++++++++++++++-------------------------
 fs/nfs/inode.c |  1 -
 2 files changed, 48 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fceef29c65a..b38a57e78a6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -51,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
 static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
+static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
 struct file_operations nfs_dir_operations = {
+	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
 	.open		= nfs_opendir,
@@ -141,9 +143,8 @@ typedef struct {
 	struct page	*page;
 	unsigned long	page_index;
 	u32		*ptr;
-	u64		target_cookie;
-	int		target_index;
-	int		current_index;
+	u64		*dir_cookie;
+	loff_t		current_index;
 	struct nfs_entry *entry;
 	decode_dirent_t	decode;
 	int		plus;
@@ -227,7 +228,7 @@ void dir_page_release(nfs_readdir_descriptor_t *desc)
 
 /*
  * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry with cookie 'desc->target_cookie'.
+ * to readdir, find the next entry with cookie '*desc->dir_cookie'.
  *
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * return the offset within the buffer of the next entry to be
@@ -241,8 +242,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
-		if (entry->prev_cookie == desc->target_cookie)
+		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
 			loop_count = 0;
@@ -255,7 +256,7 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 
 /*
  * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the entry at offset 'desc->target_index'.
+ * to readdir, find the entry at offset 'desc->file->f_pos'.
  *
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * return the offset within the buffer of the next entry to be
@@ -273,10 +274,10 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 		if (status)
 			break;
 
-		dfprintk(VFS, "NFS: found cookie %Lu at index %d\n", (long long)entry->cookie, desc->current_index);
+		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
 
-		if (desc->target_index == desc->current_index) {
-			desc->target_cookie = entry->cookie;
+		if (desc->file->f_pos == desc->current_index) {
+			*desc->dir_cookie = entry->cookie;
 			break;
 		}
 		desc->current_index++;
@@ -314,7 +315,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	/* NOTE: Someone else may have changed the READDIRPLUS flag */
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
-	if (desc->target_cookie)
+	if (*desc->dir_cookie != 0)
 		status = find_dirent(desc);
 	else
 		status = find_dirent_index(desc);
@@ -332,8 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
  * Recurse through the page cache pages, and return a
  * filled nfs_entry structure of the next directory entry if possible.
  *
- * The target for the search is 'desc->target_cookie' if non-0,
- * 'desc->target_index' otherwise
+ * The target for the search is '*desc->dir_cookie' if non-0,
+ * 'desc->file->f_pos' otherwise
  */
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
@@ -341,18 +342,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 	int		loop_count = 0;
 	int		res;
 
-	if (desc->target_cookie)
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target_cookie);
-	else
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie number %d\n", desc->target_index);
-
 	/* Always search-by-index from the beginning of the cache */
-	if (!(desc->target_cookie)) {
+	if (*desc->dir_cookie == 0) {
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
 		desc->page_index = 0;
 		desc->entry->cookie = desc->entry->prev_cookie = 0;
 		desc->entry->eof = 0;
 		desc->current_index = 0;
-	}
+	} else
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
 
 	for (;;) {
 		res = find_dirent_page(desc);
@@ -386,7 +384,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct file	*file = desc->file;
 	struct nfs_entry *entry = desc->entry;
 	struct dentry	*dentry = NULL;
-	struct nfs_open_context *ctx = file->private_data;
 	unsigned long	fileid;
 	int		loop_count = 0,
 			res;
@@ -415,7 +412,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		if (res < 0)
 			break;
 		file->f_pos++;
-		desc->target_cookie = entry->cookie;
+		*desc->dir_cookie = entry->cookie;
 		if (dir_decode(desc) != 0) {
 			desc->page_index ++;
 			break;
@@ -425,12 +422,10 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 			schedule();
 		}
 	}
-	ctx->dir_pos        = file->f_pos;
-	ctx->dir_cookie     = desc->target_cookie;
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target_cookie, res);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -456,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target_cookie);
+	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		status = -ENOMEM;
 		goto out;
 	}
-	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target_cookie,
+	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
@@ -472,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
 		if ((status = dir_decode(desc)) == 0)
-			desc->entry->prev_cookie = desc->target_cookie;
+			desc->entry->prev_cookie = *desc->dir_cookie;
 	} else
 		status = -EIO;
 	if (status < 0)
@@ -501,7 +496,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct dentry	*dentry = filp->f_dentry;
 	struct inode	*inode = dentry->d_inode;
-	struct nfs_open_context *ctx = filp->private_data;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
 	struct nfs_entry my_entry;
@@ -519,21 +513,16 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 	/*
 	 * filp->f_pos points to the dirent entry number.
-	 * ctx->dir_pos has the number of the cached cookie.  We have
+	 * *desc->dir_cookie has the cookie for the next entry. We have
 	 * to either find the entry with the appropriate number or
 	 * revalidate the cookie.
 	 */
 	memset(desc, 0, sizeof(*desc));
 
 	desc->file = filp;
+	desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
-	desc->target_index = filp->f_pos;
-
-	if (filp->f_pos == ctx->dir_pos)
-		desc->target_cookie = ctx->dir_cookie;
-	else
-		desc->target_cookie = 0;
 
 	my_entry.cookie = my_entry.prev_cookie = 0;
 	my_entry.eof = 0;
@@ -546,7 +535,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 		if (res == -EBADCOOKIE) {
 			/* This means either end of directory */
-			if (desc->target_cookie && desc->entry->cookie != desc->target_cookie) {
+			if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
 				if (res >= 0)
@@ -579,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	return 0;
 }
 
+loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+{
+	down(&filp->f_dentry->d_inode->i_sem);
+	switch (origin) {
+		case 1:
+			offset += filp->f_pos;
+		case 0:
+			if (offset >= 0)
+				break;
+		default:
+			offset = -EINVAL;
+			goto out;
+	}
+	if (offset != filp->f_pos) {
+		filp->f_pos = offset;
+		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
+	}
+out:
+	up(&filp->f_dentry->d_inode->i_sem);
+	return offset;
+}
+
 /*
  * All directory operations under NFS are synchronous, so fsync()
  * is a dummy operation.
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9fa02e7984a..6300e05e946 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -891,7 +891,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
-		ctx->dir_pos = 0;
 		ctx->dir_cookie = 0;
 	}
 	return ctx;
-- 
cgit v1.2.3


From 202b50dc127cf4714ffdcc6a64f1648373f9414f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFSv4: Ensure that propagate NFSv4 state errors to the
 reclaim code

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 51 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3f281a857e3..91e7fe867d5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -269,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	int err;
 	do {
 		err = _nfs4_open_reclaim(sp, state);
-		switch (err) {
-			case 0:
-			case -NFS4ERR_STALE_CLIENTID:
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				return err;
-		}
-		err = nfs4_handle_exception(server, err, &exception);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -508,6 +503,20 @@ out_stale:
 	goto out_nodeleg;
 }
 
+static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+{
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_open_expired(sp, state, dentry);
+		if (err == -NFS4ERR_DELAY)
+			nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -520,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&state->inode->i_lock);
-		status = _nfs4_open_expired(sp, state, ctx->dentry);
+		status = nfs4_do_open_expired(sp, state, ctx->dentry);
 		put_nfs_open_context(ctx);
 		return status;
 	}
@@ -2842,12 +2851,32 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 
 static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 1);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 1);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 
 static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 0);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 0);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
-- 
cgit v1.2.3


From 08e9eac42edab63bce14b5c8419771f3c92aa3f4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFSv4: Fix up races in nfs4_proc_setattr()

 If we do not hold a valid stateid that is open for writes, there is little
 point in doing an extra open of the file, as the RFC does not appear to
 mandate this...

 Make setattr use the correct stateid if we're holding mandatory byte
 range locks.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 55 +++++++++++++++++++------------------------------------
 1 file changed, 19 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 91e7fe867d5..af80b598148 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -756,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
         fattr->valid = 0;
 
-	if (state != NULL)
+	if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
-	if (sattr->ia_valid & ATTR_SIZE)
-		nfs4_copy_stateid(&arg.stateid, state, NULL);
-	else
+		nfs4_copy_stateid(&arg.stateid, state, current->files);
+	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
 	return rpc_call_sync(server->client, &msg, 0);
@@ -1124,47 +1123,31 @@ static int
 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		  struct iattr *sattr)
 {
-	struct inode *		inode = dentry->d_inode;
-	int			size_change = sattr->ia_valid & ATTR_SIZE;
-	struct nfs4_state	*state = NULL;
-	int need_iput = 0;
+	struct rpc_cred *cred;
+	struct inode *inode = dentry->d_inode;
+	struct nfs4_state *state;
 	int status;
 
 	fattr->valid = 0;
 	
-	if (size_change) {
-		struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-		if (IS_ERR(cred))
-			return PTR_ERR(cred);
+	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	/* Search for an existing WRITE delegation first */
+	state = nfs4_open_delegated(inode, FMODE_WRITE, cred);
+	if (!IS_ERR(state)) {
+		/* NB: nfs4_open_delegated() bumps the inode->i_count */
+		iput(inode);
+	} else {
+		/* Search for an existing open(O_WRITE) stateid */
 		state = nfs4_find_state(inode, cred, FMODE_WRITE);
-		if (state == NULL) {
-			state = nfs4_open_delegated(dentry->d_inode,
-					FMODE_WRITE, cred);
-			if (IS_ERR(state))
-				state = nfs4_do_open(dentry->d_parent->d_inode,
-						dentry, FMODE_WRITE,
-						NULL, cred);
-			need_iput = 1;
-		}
-		put_rpccred(cred);
-		if (IS_ERR(state))
-			return PTR_ERR(state);
-
-		if (state->inode != inode) {
-			printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
-			status = -EIO;
-			goto out;
-		}
 	}
+
 	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
 			NFS_FH(inode), sattr, state);
-out:
-	if (state) {
-		inode = state->inode;
+	if (state != NULL)
 		nfs4_close_state(state, FMODE_WRITE);
-		if (need_iput)
-			iput(inode);
-	}
+	put_rpccred(cred);
 	return status;
 }
 
-- 
cgit v1.2.3


From 951a143b3fcf15cfa9d38250b7462f821db241db Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Fix the file size revalidation

 Instead of looking at whether or not the file is open for writes before
 we accept to update the length using the server value, we should rather
 be looking at whether or not we are currently caching any writes.

 Failure to do so means in particular that we're not updating the file
 length correctly after obtaining a POSIX or BSD lock.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c |  2 +-
 fs/nfs/inode.c  | 69 +++++++++++++++------------------------------------------
 fs/nfs/write.c  |  4 ++--
 3 files changed, 21 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 68df803f27c..d6a30c844de 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -517,7 +517,7 @@ retry:
 	result = tot_bytes;
 
 out:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	return result;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6300e05e946..b2d16758ced 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1147,27 +1147,6 @@ void nfs_end_data_update(struct inode *inode)
 	atomic_dec(&nfsi->data_updates);
 }
 
-/**
- * nfs_end_data_update_defer
- * @inode - pointer to inode
- * Declare end of the operations that will update file data
- * This will defer marking the inode as needing revalidation
- * unless there are no other pending updates.
- */
-void nfs_end_data_update_defer(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	if (atomic_dec_and_test(&nfsi->data_updates)) {
-		/* Mark the attribute cache for revalidation */
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
-		/* Directories and symlinks: invalidate page cache too */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-			nfsi->flags |= NFS_INO_INVALID_DATA;
-		nfsi->cache_change_attribute ++;
-	}
-}
-
 /**
  * nfs_refresh_inode - verify consistency of the inode attribute cache
  * @inode - pointer to inode
@@ -1222,8 +1201,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
 				|| cur_size != new_isize)
 			nfsi->flags |= NFS_INO_INVALID_ATTR;
-	} else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
+	} else if (new_isize != cur_size && nfsi->npages == 0)
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1257,10 +1236,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	__u64		new_size;
-	loff_t		new_isize;
+	loff_t cur_isize, new_isize;
 	unsigned int	invalid = 0;
-	loff_t		cur_isize;
 	int data_unstable;
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
@@ -1293,49 +1270,39 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	/* Are we racing with known updates of the metadata on the server? */
 	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
 
-	/* Check if the file size agrees */
-	new_size = fattr->size;
+	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	cur_isize = i_size_read(inode);
-	if (cur_isize != new_size) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
-		/*
-		 * If we have pending writebacks, things can get
-		 * messy.
-		 */
-		if (S_ISREG(inode->i_mode) && data_unstable) {
-			if (new_isize > cur_isize) {
+	if (new_isize != cur_isize) {
+		/* Do we perhaps have any outstanding writes? */
+		if (nfsi->npages == 0) {
+			/* No, but did we race with nfs_end_data_update()? */
+			if (verifier  ==  nfsi->cache_change_attribute) {
 				inode->i_size = new_isize;
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+				invalid |= NFS_INO_INVALID_DATA;
 			}
-		} else {
+			invalid |= NFS_INO_INVALID_ATTR;
+		} else if (new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 		}
+		dprintk("NFS: isize change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 	}
 
-	/*
-	 * Note: we don't check inode->i_mtime since pipes etc.
-	 *       can change this value in VFS without requiring a
-	 *	 cache revalidation.
-	 */
+	/* Check if the mtime agrees */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
+		dprintk("NFS: mtime change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 		if (!data_unstable)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4)
 	    && nfsi->change_attr != fattr->change_attr) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 		       inode->i_sb->s_id, inode->i_ino);
-#endif
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6f7a4af3bc4..c574d551f02 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		ClearPageError(page);
 
 io_error:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	return written ? written : result;
 }
@@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	nfsi->npages--;
 	if (!nfsi->npages) {
 		spin_unlock(&nfsi->req_lock);
-		nfs_end_data_update_defer(inode);
+		nfs_end_data_update(inode);
 		iput(inode);
 	} else
 		spin_unlock(&nfsi->req_lock);
-- 
cgit v1.2.3


From 7d52e86274e09fce8ac8f963e3605a84d0a305a7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Cleanup of caching code, and slight optimization of
 writes.

 Unless we're doing O_APPEND writes, we really don't care about revalidating
 the file length. Just make sure that we catch any page cache invalidations.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c  | 12 +++++++++---
 fs/nfs/inode.c | 44 +++++++++++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a606708264e..40436857ed4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -333,9 +333,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	if (result)
-		goto out;
+	/*
+	 * O_APPEND implies that we must revalidate the file length.
+	 */
+	if (iocb->ki_filp->f_flags & O_APPEND) {
+		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+		if (result)
+			goto out;
+	} else
+		nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 
 	result = count;
 	if (!count)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b2d16758ced..a3922f4cc0a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1062,21 +1062,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (verifier == nfsi->cache_change_attribute)
 		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 	/* Do the page cache invalidation */
-	if (flags & NFS_INO_INVALID_DATA) {
-		if (S_ISREG(inode->i_mode)) {
-			if (filemap_fdatawrite(inode->i_mapping) == 0)
-				filemap_fdatawait(inode->i_mapping);
-			nfs_wb_all(inode);
-		}
-		nfsi->flags &= ~NFS_INO_INVALID_DATA;
-		invalidate_inode_pages2(inode->i_mapping);
-		memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
-		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode));
-		/* This ensures we revalidate dentries */
-		nfsi->cache_change_attribute++;
-	}
+	nfs_revalidate_mapping(inode, inode->i_mapping);
 	if (flags & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
@@ -1115,6 +1101,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 }
 
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (nfsi->flags & NFS_INO_INVALID_DATA) {
+		if (S_ISREG(inode->i_mode)) {
+			if (filemap_fdatawrite(mapping) == 0)
+				filemap_fdatawait(mapping);
+			nfs_wb_all(inode);
+		}
+		invalidate_inode_pages2(mapping);
+		nfsi->flags &= ~NFS_INO_INVALID_DATA;
+		if (S_ISDIR(inode->i_mode)) {
+			memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+			/* This ensures we revalidate child dentries */
+			nfsi->cache_change_attribute++;
+		}
+		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode));
+	}
+}
+
 /**
  * nfs_begin_data_update
  * @inode - pointer to inode
-- 
cgit v1.2.3


From fe51beecc55d0b0dce289e4758e7c529a642f63e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Ensure that fstat() always returns the correct mtime

 Even if the file is open for writes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c  | 28 ++++++++++++++++++++++------
 fs/nfs/inode.c | 24 ++++++++++++++++--------
 2 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 40436857ed4..5621ba9885f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -127,6 +127,21 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
+/**
+ * nfs_revalidate_file - Revalidate the page cache & related metadata
+ * @inode - pointer to inode struct
+ * @file - pointer to file
+ */
+static int nfs_revalidate_file(struct inode *inode, struct file *filp)
+{
+	int retval = 0;
+
+	if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	nfs_revalidate_mapping(inode, filp->f_mapping);
+	return 0;
+}
+
 /**
  * nfs_revalidate_size - Revalidate the file size
  * @inode - pointer to inode struct
@@ -149,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 		goto force_reval;
 	if (nfsi->npages != 0)
 		return 0;
-	return nfs_revalidate_inode(server, inode);
+	if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+		return 0;
 force_reval:
 	return __nfs_revalidate_inode(server, inode);
 }
@@ -210,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long) pos);
 
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	result = nfs_revalidate_file(inode, iocb->ki_filp);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -228,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long long) *ppos);
 
-	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	res = nfs_revalidate_file(inode, filp);
 	if (!res)
 		res = generic_file_sendfile(filp, ppos, count, actor, target);
 	return res;
@@ -244,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	status = nfs_revalidate_file(inode, file);
 	if (!status)
 		status = generic_file_mmap(file, vma);
 	return status;
@@ -340,8 +356,8 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
 		if (result)
 			goto out;
-	} else
-		nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+	}
+	nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 
 	result = count;
 	if (!count)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a3922f4cc0a..4f545f382ba 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -620,9 +620,9 @@ nfs_zap_caches(struct inode *inode)
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 }
 
 static void nfs_zap_acl_cache(struct inode *inode)
@@ -1055,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		goto out;
 	}
 	flags = nfsi->flags;
+	nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE;
 	/*
 	 * We may need to keep the attributes marked as invalid if
 	 * we raced with nfs_end_attr_update().
@@ -1187,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
 				&& nfsi->change_attr == fattr->pre_change_attr)
 			nfsi->change_attr = fattr->change_attr;
-		if (!data_unstable && nfsi->change_attr != fattr->change_attr)
+		if (nfsi->change_attr != fattr->change_attr) {
 			nfsi->flags |= NFS_INO_INVALID_ATTR;
+			if (!data_unstable)
+				nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+		}
 	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -1211,12 +1215,16 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	}
 
 	/* Verify a few of the more important attributes */
-	if (!data_unstable) {
-		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
-				|| cur_size != new_isize)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
-	} else if (new_isize != cur_size && nfsi->npages == 0)
+	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (!data_unstable)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
+	if (cur_size != new_isize) {
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (nfsi->npages == 0)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
-- 
cgit v1.2.3


From ab0a3dbedc51037f3d2e22ef67717a987b3d15e2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Write optimization for short files and small O_SYNC
 writes.

 Use stable writes if we can see that we are only going to put a single
 write on the wire.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c574d551f02..79b621a545b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -750,7 +750,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 * is entirely in cache, it may be more efficient to avoid
 	 * fragmenting write requests.
 	 */
-	if (PageUptodate(page) && inode->i_flock == NULL) {
+	if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
 		loff_t end_offs = i_size_read(inode) - 1;
 		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
 
@@ -1342,8 +1342,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
-	if (res)
-		error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+	if (res) {
+		struct nfs_server *server = NFS_SERVER(inode);
+
+		/* For single writes, FLUSH_STABLE is more efficient */
+		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
+			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
+				how |= FLUSH_STABLE;
+		}
+		error = nfs_flush_list(&head, server->wpages, how);
+	}
 	if (error < 0)
 		return error;
 	return res;
-- 
cgit v1.2.3


From c6a556b88adfacd2af90be84357c8165d716c27d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Make searching and waiting on busy writeback requests
 more efficient.

 Basically copies the VFS's method for tracking writebacks and applies
 it to the struct nfs_page.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 29 ++++++++++++++++++++++++++++-
 fs/nfs/read.c     |  3 ---
 fs/nfs/write.c    | 19 +++++++++----------
 3 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 80777f99a58..356a33bb38a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -111,6 +111,33 @@ void nfs_unlock_request(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+/**
+ * nfs_set_page_writeback_locked - Lock a request for writeback
+ * @req:
+ */
+int nfs_set_page_writeback_locked(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	if (!nfs_lock_request(req))
+		return 0;
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	return 1;
+}
+
+/**
+ * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ */
+void nfs_clear_page_writeback(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	spin_unlock(&nfsi->req_lock);
+	nfs_unlock_request(req);
+}
+
 /**
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
@@ -301,7 +328,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst,
 		if (req->wb_index > idx_end)
 			break;
 
-		if (!nfs_lock_request(req))
+		if (!nfs_set_page_writeback_locked(req))
 			continue;
 		nfs_list_remove_request(req);
 		nfs_list_add_request(req, dst);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a0042fb5863..6f866b8aa2d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 
-	nfs_lock_request(new);
 	nfs_list_add_request(new, &one_request);
 	nfs_pagein_one(&one_request, inode);
 	return 0;
@@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 
 	nfs_clear_request(req);
 	nfs_release_request(req);
-	nfs_unlock_request(req);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page)
 	}
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
-	nfs_lock_request(new);
 	nfs_list_add_request(new, desc->head);
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 79b621a545b..58a39b0486a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -503,13 +503,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 
 	spin_lock(&nfsi->req_lock);
 	next = idx_start;
-	while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
 			break;
 
 		next = req->wb_index + 1;
-		if (!NFS_WBACK_BUSY(req))
-			continue;
+		BUG_ON(!NFS_WBACK_BUSY(req));
 
 		atomic_inc(&req->wb_count);
 		spin_unlock(&nfsi->req_lock);
@@ -821,7 +820,7 @@ out:
 #else
 	nfs_inode_remove_request(req);
 #endif
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 }
 
 static inline int flush_task_priority(int how)
@@ -952,7 +951,7 @@ out_bad:
 		nfs_writedata_free(data);
 	}
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 	return -ENOMEM;
 }
 
@@ -1002,7 +1001,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
@@ -1029,7 +1028,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return error;
 }
@@ -1121,7 +1120,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 		nfs_inode_remove_request(req);
 #endif
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 }
 
@@ -1278,7 +1277,7 @@ nfs_commit_list(struct list_head *head, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
@@ -1324,7 +1323,7 @@ nfs_commit_done(struct rpc_task *task)
 		dprintk(" mismatch\n");
 		nfs_mark_request_dirty(req);
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 		res++;
 	}
 	sub_page_state(nr_unstable,res);
-- 
cgit v1.2.3


From 3da28eb1c6545fe73263a24eba0996217490e1eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NFS: Replace nfs_page insertion sort with a radix sort

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    |  2 +-
 fs/nfs/pagelist.c | 86 ++++++++++++++++++++++++++++++++++++-------------------
 fs/nfs/write.c    | 71 +++++++++++++++++++++------------------------
 3 files changed, 89 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4f545f382ba..4845911f1c6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -135,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync)
 	int flags = sync ? FLUSH_WAIT : 0;
 	int ret;
 
-	ret = nfs_commit_inode(inode, 0, 0, flags);
+	ret = nfs_commit_inode(inode, flags);
 	if (ret < 0)
 		return ret;
 	return 0;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 356a33bb38a..d53857b148e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -177,36 +177,6 @@ nfs_release_request(struct nfs_page *req)
 	nfs_page_free(req);
 }
 
-/**
- * nfs_list_add_request - Insert a request into a sorted list
- * @req: request
- * @head: head of list into which to insert the request.
- *
- * Note that the wb_list is sorted by page index in order to facilitate
- * coalescing of requests.
- * We use an insertion sort that is optimized for the case of appended
- * writes.
- */
-void
-nfs_list_add_request(struct nfs_page *req, struct list_head *head)
-{
-	struct list_head *pos;
-
-#ifdef NFS_PARANOIA
-	if (!list_empty(&req->wb_list)) {
-		printk(KERN_ERR "NFS: Add to list failed!\n");
-		BUG();
-	}
-#endif
-	list_for_each_prev(pos, head) {
-		struct nfs_page	*p = nfs_list_entry(pos);
-		if (p->wb_index < req->wb_index)
-			break;
-	}
-	list_add(&req->wb_list, pos);
-	req->wb_list_head = head;
-}
-
 static int nfs_wait_bit_interruptible(void *word)
 {
 	int ret = 0;
@@ -291,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 	return npages;
 }
 
+#define NFS_SCAN_MAXENTRIES 16
+/**
+ * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
+ * @nfsi: NFS inode
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+	      unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+	struct nfs_page *req;
+	unsigned long idx_end;
+	int found, i;
+	int res;
+
+	res = 0;
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	for (;;) {
+		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+				(void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES,
+				NFS_PAGE_TAG_DIRTY);
+		if (found <= 0)
+			break;
+		for (i = 0; i < found; i++) {
+			req = pgvec[i];
+			if (req->wb_index > idx_end)
+				goto out;
+
+			idx_start = req->wb_index + 1;
+
+			if (nfs_set_page_writeback_locked(req)) {
+				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+						req->wb_index, NFS_PAGE_TAG_DIRTY);
+				nfs_list_remove_request(req);
+				nfs_list_add_request(req, dst);
+				res++;
+			}
+		}
+	}
+out:
+	return res;
+}
+
 /**
  * nfs_scan_list - Scan a list for matching requests
  * @head: One of the NFS inode request lists
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 58a39b0486a..5130eda231d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		if (err < 0)
 			goto out;
 	}
-	err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
+	err = nfs_commit_inode(inode, wb_priority(wbc));
 	if (err > 0) {
 		wbc->nr_to_write -= err;
 		err = 0;
@@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_set(&nfsi->nfs_page_tree,
+			req->wb_index, NFS_PAGE_TAG_DIRTY);
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
@@ -537,12 +539,15 @@ static int
 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
-	nfsi->ndirty -= res;
-	sub_page_state(nr_dirty,res);
-	if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	int res = 0;
+
+	if (nfsi->ndirty != 0) {
+		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
+		nfsi->ndirty -= res;
+		sub_page_state(nr_dirty,res);
+		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	}
 	return res;
 }
 
@@ -561,11 +566,14 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
-	nfsi->ncommit -= res;
-	if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	int res = 0;
+
+	if (nfsi->ncommit != 0) {
+		res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+		nfsi->ncommit -= res;
+		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	}
 	return res;
 }
 #endif
@@ -1209,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data, int how)
 {
 	struct rpc_task		*task = &data->task;
-	struct nfs_page		*first, *last;
+	struct nfs_page		*first;
 	struct inode		*inode;
-	loff_t			start, end, len;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
-	last = nfs_list_entry(data->pages.prev);
 	inode = first->wb_context->dentry->d_inode;
 
-	/*
-	 * Determine the offset range of requests in the COMMIT call.
-	 * We rely on the fact that data->pages is an ordered list...
-	 */
-	start = req_offset(first);
-	end = req_offset(last) + last->wb_bytes;
-	len = end - start;
-	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
-	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
-		len = 0;
-
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 
 	data->args.fh     = NFS_FH(data->inode);
-	data->args.offset = start;
-	data->args.count  = len;
-	data->res.count   = len;
+	/* Note: we always request a commit of the entire inode */
+	data->args.offset = 0;
+	data->args.count  = 0;
+	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	
@@ -1357,8 +1353,7 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
-		    unsigned int npages, int how)
+int nfs_commit_inode(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
@@ -1366,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
 				error = 0;
 
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_commit(inode, &head, idx_start, npages);
+	res = nfs_scan_commit(inode, &head, 0, 0);
+	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		res += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&nfsi->req_lock);
 		error = nfs_commit_list(&head, how);
-	} else
-		spin_unlock(&nfsi->req_lock);
-	if (error < 0)
-		return error;
+		if (error < 0)
+			return error;
+	}
 	return res;
 }
 #endif
@@ -1396,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 			error = nfs_flush_inode(inode, idx_start, npages, how);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (error == 0)
-			error = nfs_commit_inode(inode, idx_start, npages, how);
+			error = nfs_commit_inode(inode, how);
 #endif
 	} while (error > 0);
 	return error;
-- 
cgit v1.2.3


From 80fec4c62e2cf544ac26e53f3e0d2f73df6820b9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] VFS: Ensure that all the on-stack struct file_lock call
 fl_release_private

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 3fa6a7ce57a..a0bc03495bd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 			goto out;
 		else
@@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 			goto out;
 		else
@@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp)
 			.fl_end = OFFSET_MAX,
 		};
 		filp->f_op->flock(filp, F_SETLKW, &fl);
+		if (fl.fl_ops && fl.fl_ops->fl_release_private)
+			fl.fl_ops->fl_release_private(&fl);
 	}
 
 	lock_kernel();
-- 
cgit v1.2.3


From 4f15e2b1f4f3a56e46201714b39436c32218d547 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NLM: cleanup for blocked locks.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index ef7103b8c5b..44adb84183b 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -31,7 +31,7 @@ static int			reclaimer(void *ptr);
  * This is the representation of a blocked client lock.
  */
 struct nlm_wait {
-	struct nlm_wait *	b_next;		/* linked list */
+	struct list_head	b_list;		/* linked list */
 	wait_queue_head_t	b_wait;		/* where to wait on */
 	struct nlm_host *	b_host;
 	struct file_lock *	b_lock;		/* local file lock */
@@ -39,7 +39,7 @@ struct nlm_wait {
 	u32			b_status;	/* grant callback status */
 };
 
-static struct nlm_wait *	nlm_blocked;
+static LIST_HEAD(nlm_blocked);
 
 /*
  * Block on a lock
@@ -55,8 +55,7 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
 	block.b_lock   = fl;
 	init_waitqueue_head(&block.b_wait);
 	block.b_status = NLM_LCK_BLOCKED;
-	block.b_next   = nlm_blocked;
-	nlm_blocked    = &block;
+	list_add(&block.b_list, &nlm_blocked);
 
 	/* Remember pseudo nsm state */
 	pstate = host->h_state;
@@ -71,12 +70,7 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
 	 */
 	sleep_on_timeout(&block.b_wait, 30*HZ);
 
-	for (head = &nlm_blocked; *head; head = &(*head)->b_next) {
-		if (*head == &block) {
-			*head = block.b_next;
-			break;
-		}
-	}
+	list_del(&block.b_list);
 
 	if (!signalled()) {
 		*statp = block.b_status;
@@ -105,7 +99,7 @@ nlmclnt_grant(struct nlm_lock *lock)
 	 * Look up blocked request based on arguments. 
 	 * Warning: must not use cookie to match it!
 	 */
-	for (block = nlm_blocked; block; block = block->b_next) {
+	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (nlm_compare_locks(block->b_lock, &lock->fl))
 			break;
 	}
@@ -230,7 +224,7 @@ restart:
 	host->h_reclaiming = 0;
 
 	/* Now, wake up all processes that sleep on a blocked lock */
-	for (block = nlm_blocked; block; block = block->b_next) {
+	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (block->b_host == host) {
 			block->b_status = NLM_LCK_DENIED_GRACE_PERIOD;
 			wake_up(&block->b_wait);
-- 
cgit v1.2.3


From ecdbf769b2cb8903e07cd482334c714d89fd1146 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NLM: fix a client-side race on blocking locks.

 If the lock blocks, the server may send us a GRANTED message that
 races with the reply to our LOCK request. Make sure that we catch
 the GRANTED by queueing up our request on the nlm_blocked list
 before we send off the first LOCK rpc call.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c | 99 +++++++++++++++++++++++++++++++----------------------
 fs/lockd/clntproc.c | 40 +++++++++++++++++-----
 2 files changed, 90 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 44adb84183b..006bb9e1457 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -42,23 +42,51 @@ struct nlm_wait {
 static LIST_HEAD(nlm_blocked);
 
 /*
- * Block on a lock
+ * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int
-nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
+int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+{
+	struct nlm_wait *block;
+
+	BUG_ON(req->a_block != NULL);
+	block = kmalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+	block->b_host = host;
+	block->b_lock = fl;
+	init_waitqueue_head(&block->b_wait);
+	block->b_status = NLM_LCK_BLOCKED;
+
+	list_add(&block->b_list, &nlm_blocked);
+	req->a_block = block;
+
+	return 0;
+}
+
+void nlmclnt_finish_block(struct nlm_rqst *req)
 {
-	struct nlm_wait	block, **head;
-	int		err;
-	u32		pstate;
+	struct nlm_wait *block = req->a_block;
 
-	block.b_host   = host;
-	block.b_lock   = fl;
-	init_waitqueue_head(&block.b_wait);
-	block.b_status = NLM_LCK_BLOCKED;
-	list_add(&block.b_list, &nlm_blocked);
+	if (block == NULL)
+		return;
+	req->a_block = NULL;
+	list_del(&block->b_list);
+	kfree(block);
+}
 
-	/* Remember pseudo nsm state */
-	pstate = host->h_state;
+/*
+ * Block on a lock
+ */
+long nlmclnt_block(struct nlm_rqst *req, long timeout)
+{
+	struct nlm_wait	*block = req->a_block;
+	long ret;
+
+	/* A borken server might ask us to block even if we didn't
+	 * request it. Just say no!
+	 */
+	if (!req->a_args.block)
+		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
 	 * to lose callbacks, however, so we're going to poll from
@@ -68,23 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
 	 * a 1 minute timeout would do. See the comment before
 	 * nlmclnt_lock for an explanation.
 	 */
-	sleep_on_timeout(&block.b_wait, 30*HZ);
+	ret = wait_event_interruptible_timeout(block->b_wait,
+			block->b_status != NLM_LCK_BLOCKED,
+			timeout);
 
-	list_del(&block.b_list);
-
-	if (!signalled()) {
-		*statp = block.b_status;
-		return 0;
+	if (block->b_status != NLM_LCK_BLOCKED) {
+		req->a_res.status = block->b_status;
+		block->b_status = NLM_LCK_BLOCKED;
 	}
 
-	/* Okay, we were interrupted. Cancel the pending request
-	 * unless the server has rebooted.
-	 */
-	if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
-		printk(KERN_NOTICE
-			"lockd: CANCEL call failed (errno %d)\n", -err);
-
-	return -ERESTARTSYS;
+	return ret;
 }
 
 /*
@@ -94,27 +115,23 @@ u32
 nlmclnt_grant(struct nlm_lock *lock)
 {
 	struct nlm_wait	*block;
+	u32 res = nlm_lck_denied;
 
 	/*
 	 * Look up blocked request based on arguments. 
 	 * Warning: must not use cookie to match it!
 	 */
 	list_for_each_entry(block, &nlm_blocked, b_list) {
-		if (nlm_compare_locks(block->b_lock, &lock->fl))
-			break;
+		if (nlm_compare_locks(block->b_lock, &lock->fl)) {
+			/* Alright, we found a lock. Set the return status
+			 * and wake up the caller
+			 */
+			block->b_status = NLM_LCK_GRANTED;
+			wake_up(&block->b_wait);
+			res = nlm_granted;
+		}
 	}
-
-	/* Ooops, no blocked request found. */
-	if (block == NULL)
-		return nlm_lck_denied;
-
-	/* Alright, we found the lock. Set the return status and
-	 * wake up the caller.
-	 */
-	block->b_status = NLM_LCK_GRANTED;
-	wake_up(&block->b_wait);
-
-	return nlm_granted;
+	return res;
 }
 
 /*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a4407619b1f..fd77ed1d710 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -21,6 +21,7 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
+#define NLMCLNT_POLL_TIMEOUT	(30*HZ)
 
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	int		status;
+	long timeout;
+	int status;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
@@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		goto out;
 	}
 
-	do {
-		if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) {
-			if (resp->status != NLM_LCK_BLOCKED)
-				break;
-			status = nlmclnt_block(host, fl, &resp->status);
-		}
+	if (req->a_args.block) {
+		status = nlmclnt_prepare_block(req, host, fl);
 		if (status < 0)
 			goto out;
-	} while (resp->status == NLM_LCK_BLOCKED && req->a_args.block);
+	}
+	for(;;) {
+		status = nlmclnt_call(req, NLMPROC_LOCK);
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		/* Wait on an NLM blocking lock */
+		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
+		/* Did a reclaimer thread notify us of a server reboot? */
+		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
+			continue;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		if (timeout >= 0)
+			continue;
+		/* We were interrupted. Send a CANCEL request to the server
+		 * and exit
+		 */
+		status = (int)timeout;
+		goto out_unblock;
+	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
@@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
+out_unblock:
+	nlmclnt_finish_block(req);
+	/* Cancel the blocked request if it is still pending */
+	if (resp->status == NLM_LCK_BLOCKED)
+		nlmclnt_cancel(host, fl);
 out:
 	nlmclnt_release_lockargs(req);
 	return status;
-- 
cgit v1.2.3


From 8d0a8a9d0ec790086c64d210af413ac351d89e35 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:32 +0000
Subject: [PATCH] NFSv4: Clean up nfs4 lock state accounting

 Ensure that lock owner structures are not released prematurely.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h   |   9 +--
 fs/nfs/nfs4proc.c  |  69 +++++++++------------
 fs/nfs/nfs4state.c | 178 ++++++++++++++++++++++++-----------------------------
 3 files changed, 112 insertions(+), 144 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7c6f1d668fb..ec1a22d7b87 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -128,6 +128,7 @@ struct nfs4_state_owner {
 
 struct nfs4_lock_state {
 	struct list_head	ls_locks;	/* Other lock stateids */
+	struct nfs4_state *	ls_state;	/* Pointer to open state */
 	fl_owner_t		ls_owner;	/* POSIX lock owner */
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
@@ -153,7 +154,7 @@ struct nfs4_state {
 
 	unsigned long flags;		/* Do we hold any locks? */
 	struct semaphore lock_sema;	/* Serializes file locking operations */
-	rwlock_t state_lock;		/* Protects the lock_states list */
+	spinlock_t state_lock;		/* Protects the lock_states list */
 
 	nfs4_stateid stateid;
 
@@ -225,12 +226,8 @@ extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
 extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
-extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
-extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t);
-extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
+extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
-extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
 
 extern const nfs4_stateid zero_stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index af80b598148..0ddc20102d4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2626,14 +2626,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (lsp)
-		nlo.id = lsp->ls_id; 
-	else {
-		spin_lock(&clp->cl_lock);
-		nlo.id = nfs4_alloc_lockowner_id(clp);
-		spin_unlock(&clp->cl_lock);
-	}
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
+		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
+	nlo.id = lsp->ls_id; 
 	arg.u.lockt = &nlo;
 	status = rpc_call_sync(server->client, &msg, 0);
 	if (!status) {
@@ -2654,8 +2651,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		request->fl_pid = 0;
 		status = 0;
 	}
-	if (lsp)
-		nfs4_put_lock_state(lsp);
+out:
 	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	return status;
@@ -2715,28 +2711,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	};
 	struct nfs4_lock_state *lsp;
 	struct nfs_locku_opargs luargs;
-	int status = 0;
+	int status;
 			
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (!lsp)
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
 		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
-	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
-		luargs.seqid = lsp->ls_seqid;
-		memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
-		arg.u.locku = &luargs;
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		nfs4_increment_lock_seqid(status, lsp);
-	}
+	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
+		goto out;
+	luargs.seqid = lsp->ls_seqid;
+	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+	arg.u.locku = &luargs;
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_lock_seqid(status, lsp);
 
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
 				sizeof(lsp->ls_stateid));
-		nfs4_notify_unlck(state, request, lsp);
-	}
-	nfs4_put_lock_state(lsp);
 out:
 	up(&state->lock_sema);
 	if (status == 0)
@@ -2762,7 +2756,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_lock_state *lsp;
+	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
@@ -2784,9 +2778,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	};
 	int status;
 
-	lsp = nfs4_get_lock_state(state, request->fl_owner);
-	if (lsp == NULL)
-		return -ENOMEM;
 	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
 		struct nfs4_state_owner *owner = state->owner;
 		struct nfs_open_to_lock otl = {
@@ -2808,27 +2799,26 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		* seqid mutating errors */
 		nfs4_increment_seqid(status, owner);
 		up(&owner->so_sema);
+		if (status == 0) {
+			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+			lsp->ls_seqid++;
+		}
 	} else {
 		struct nfs_exist_lock el = {
 			.seqid = lsp->ls_seqid,
 		};
 		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
 		largs.u.exist_lock = &el;
-		largs.new_lock_owner = 0;
 		arg.u.lock = &largs;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		/* increment seqid on success, and * seqid mutating errors*/
+		nfs4_increment_lock_seqid(status, lsp);
 	}
-	/* increment seqid on success, and * seqid mutating errors*/
-	nfs4_increment_lock_seqid(status, lsp);
 	/* save the returned stateid. */
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		if (!reclaim)
-			nfs4_notify_setlk(state, request, lsp);
-	} else if (status == -NFS4ERR_DENIED)
+	else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
-	nfs4_put_lock_state(lsp);
 	return status;
 }
 
@@ -2869,7 +2859,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
-	status = _nfs4_do_setlk(state, cmd, request, 0);
+	status = nfs4_set_lock_state(state, request);
+	if (status == 0)
+		status = _nfs4_do_setlk(state, cmd, request, 0);
 	up(&state->lock_sema);
 	if (status == 0) {
 		/* Note: we always want to sleep here! */
@@ -2927,7 +2919,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 		if (signalled())
 			break;
 	} while(status < 0);
-
 	return status;
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 591ad1d5188..afe587d82f1 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -360,7 +360,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	init_MUTEX(&state->lock_sema);
-	rwlock_init(&state->state_lock);
+	spin_lock_init(&state->state_lock);
 	return state;
 }
 
@@ -542,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 	return NULL;
 }
 
-struct nfs4_lock_state *
-nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
-{
-	struct nfs4_lock_state *lsp;
-	read_lock(&state->state_lock);
-	lsp = __nfs4_find_lock_state(state, fl_owner);
-	read_unlock(&state->state_lock);
-	return lsp;
-}
-
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
@@ -568,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 		return NULL;
 	lsp->ls_flags = 0;
 	lsp->ls_seqid = 0;	/* arbitrary */
-	lsp->ls_id = -1; 
 	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
-	INIT_LIST_HEAD(&lsp->ls_locks);
 	spin_lock(&clp->cl_lock);
 	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
 	spin_unlock(&clp->cl_lock);
+	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 }
 
@@ -585,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  *
  * The caller must be holding state->lock_sema and clp->cl_sem
  */
-struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
-	struct nfs4_lock_state * lsp;
+	struct nfs4_lock_state *lsp, *new = NULL;
 	
-	lsp = nfs4_find_lock_state(state, owner);
-	if (lsp == NULL)
-		lsp = nfs4_alloc_lock_state(state, owner);
+	for(;;) {
+		spin_lock(&state->state_lock);
+		lsp = __nfs4_find_lock_state(state, owner);
+		if (lsp != NULL)
+			break;
+		if (new != NULL) {
+			new->ls_state = state;
+			list_add(&new->ls_locks, &state->lock_states);
+			set_bit(LK_STATE_IN_USE, &state->flags);
+			lsp = new;
+			new = NULL;
+			break;
+		}
+		spin_unlock(&state->state_lock);
+		new = nfs4_alloc_lock_state(state, owner);
+		if (new == NULL)
+			return NULL;
+	}
+	spin_unlock(&state->state_lock);
+	kfree(new);
 	return lsp;
 }
 
 /*
- * Byte-range lock aware utility to initialize the stateid of read/write
- * requests.
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
  */
-void
-nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
-	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
-		struct nfs4_lock_state *lsp;
+	struct nfs4_state *state;
 
-		lsp = nfs4_find_lock_state(state, fl_owner);
-		if (lsp) {
-			memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
-			nfs4_put_lock_state(lsp);
-			return;
-		}
-	}
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (lsp == NULL)
+		return;
+	state = lsp->ls_state;
+	if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
+		return;
+	list_del(&lsp->ls_locks);
+	if (list_empty(&state->lock_states))
+		clear_bit(LK_STATE_IN_USE, &state->flags);
+	spin_unlock(&state->state_lock);
+	kfree(lsp);
 }
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
-}
+	struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
 
-/* 
-* Check to see if the request lock (type FL_UNLK) effects the fl lock.
-*
-* fl and request must have the same posix owner
-*
-* return: 
-* 0 -> fl not effected by request
-* 1 -> fl consumed by request
-*/
+	dst->fl_u.nfs4_fl.owner = lsp;
+	atomic_inc(&lsp->ls_count);
+}
 
-static int
-nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+static void nfs4_fl_release_lock(struct file_lock *fl)
 {
-	if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
-		return 1;
-	return 0;
+	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 
-/*
- * Post an initialized lock_state on the state->lock_states list.
- */
-void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+static struct file_lock_operations nfs4_fl_lock_ops = {
+	.fl_copy_lock = nfs4_fl_copy_lock,
+	.fl_release_private = nfs4_fl_release_lock,
+};
+
+int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 {
-	if (!list_empty(&lsp->ls_locks))
-		return;
-	atomic_inc(&lsp->ls_count);
-	write_lock(&state->state_lock);
-	list_add(&lsp->ls_locks, &state->lock_states);
-	set_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	struct nfs4_lock_state *lsp;
+
+	if (fl->fl_ops != NULL)
+		return 0;
+	lsp = nfs4_get_lock_state(state, fl->fl_owner);
+	if (lsp == NULL)
+		return -ENOMEM;
+	fl->fl_u.nfs4_fl.owner = lsp;
+	fl->fl_ops = &nfs4_fl_lock_ops;
+	return 0;
 }
 
-/* 
- * to decide to 'reap' lock state:
- * 1) search i_flock for file_locks with fl.lock_state = to ls.
- * 2) determine if unlock will consume found lock. 
- * 	if so, reap
- *
- * 	else, don't reap.
- *
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
  */
-void
-nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
-	struct inode *inode = state->inode;
-	struct file_lock *fl;
+	struct nfs4_lock_state *lsp;
 
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_POSIX))
-			continue;
-		if (fl->fl_owner != lsp->ls_owner)
-			continue;
-		/* Exit if we find at least one lock which is not consumed */
-		if (nfs4_check_unlock(fl,request) == 0)
-			return;
-	}
+	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+		return;
 
-	write_lock(&state->state_lock);
-	list_del_init(&lsp->ls_locks);
-	if (list_empty(&state->lock_states))
-		clear_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	spin_lock(&state->state_lock);
+	lsp = __nfs4_find_lock_state(state, fl_owner);
+	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+	spin_unlock(&state->state_lock);
 	nfs4_put_lock_state(lsp);
 }
 
 /*
- * Release reference to lock_state, and free it if we see that
- * it is no longer in use
- */
-void
-nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
 {
-	if (!atomic_dec_and_test(&lsp->ls_count))
-		return;
-	BUG_ON (!list_empty(&lsp->ls_locks));
-	kfree(lsp);
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		lsp->ls_seqid++;
 }
 
 /*
-- 
cgit v1.2.3


From 97d312d037e63e7c8ac004ffe3072f82a6d45495 Mon Sep 17 00:00:00 2001
From: Manoj Naik <manoj@almaden.ibm.com>
Date: Wed, 22 Jun 2005 17:16:39 +0000
Subject: [PATCH] NFSv4: add support for rdattr_error in NFSv4 readdir
 requests.

 Request RDATTR_ERROR as an attribute in readdir to distinguish between a
 directory being within an absent filesystem or one (or more) of its entries.

 Signed-off-by: Manoj Naik <manoj@almaden.ibm.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 325cd6d4f23..4d655d252c6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1014,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	int replen;
 	uint32_t *p;
 
@@ -1024,13 +1028,13 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	WRITE32(readdir->count >> 1);  /* We're not doing readdirplus */
 	WRITE32(readdir->count);
 	WRITE32(2);
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
-		WRITE32(0);
-		WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
-	} else {
-		WRITE32(FATTR4_WORD0_FILEID);
-		WRITE32(0);
-	}
+	/* Switch to mounted_on_fileid if the server supports it */
+	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+		attrs[0] &= ~FATTR4_WORD0_FILEID;
+	else
+		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	WRITE32(attrs[0] & readdir->bitmask[0]);
+	WRITE32(attrs[1] & readdir->bitmask[1]);
 
 	/* set up reply kvec
 	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -4060,6 +4064,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
 	}
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
+		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
+			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
+			/* Ignore the return value of rdattr_error for now */
+			p++;
+			len--;
+		}
 		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
 			xdr_decode_hyper(p, &entry->ino);
 		else if (bitmap[0] == FATTR4_WORD0_FILEID)
-- 
cgit v1.2.3


From 6ebf3656fd18430d90fbb3199b31d08178c37134 Mon Sep 17 00:00:00 2001
From: Manoj Naik <manoj@almaden.ibm.com>
Date: Wed, 22 Jun 2005 17:16:39 +0000
Subject: [PATCH] NFSv4: Map a couple of NFSv4 errors to EINVAL.

 This shows up on running tar over NFSv4.

 Signed-off-by: Manoj Naik <manoj@almaden.ibm.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4d655d252c6..577b4429c8f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4109,6 +4109,8 @@ static struct {
 	{ NFS4ERR_DQUOT,	EDQUOT		},
 	{ NFS4ERR_STALE,	ESTALE		},
 	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
+	{ NFS4ERR_BADOWNER,	EINVAL		},
+	{ NFS4ERR_BADNAME,	EINVAL		},
 	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
 	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
 	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
-- 
cgit v1.2.3


From eadf4598e7ec37a234e70e965bd335860e58bda4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:39 +0000
Subject: [PATCH] NFS: Add debugging code to NFSv4 readdir

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c |  5 +++++
 fs/nfs/nfs4xdr.c  | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0ddc20102d4..1b76f80aedb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1722,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	int			status;
 
+	dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__,
+			dentry->d_parent->d_name.name,
+			dentry->d_name.name,
+			(unsigned long long)cookie);
 	lock_kernel();
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
@@ -1729,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 	unlock_kernel();
+	dprintk("%s: returns %d\n", __FUNCTION__, status);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 577b4429c8f..6c564ef9489 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1035,6 +1035,13 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
 	WRITE32(attrs[0] & readdir->bitmask[0]);
 	WRITE32(attrs[1] & readdir->bitmask[1]);
+	dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n",
+			__FUNCTION__,
+			(unsigned long long)readdir->cookie,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1],
+			attrs[0] & readdir->bitmask[0],
+			attrs[1] & readdir->bitmask[1]);
 
 	/* set up reply kvec
 	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1043,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
 			 readdir->pgbase, readdir->count);
+	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
+			__FUNCTION__, replen, readdir->pages,
+			readdir->pgbase, readdir->count);
 
 	return 0;
 }
@@ -3066,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 		return status;
 	READ_BUF(8);
 	COPYMEM(readdir->verifier.data, 8);
+	dprintk("%s: verifier = 0x%x%x\n",
+			__FUNCTION__,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1]);
+
 
 	hdrlen = (char *) p - (char *) iov->iov_base;
 	recvd = rcvbuf->len - hdrlen;
@@ -3080,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	for (nr = 0; *p++; nr++) {
 		if (p + 3 > end)
 			goto short_pkt;
+		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
 		len = ntohl(*p++);	/* filename length */
 		if (len > NFS4_MAXNAMLEN) {
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 		}
+		dprintk("filename = %*s\n", len, (char *)p);
 		p += XDR_QUADLEN(len);
 		if (p + 1 > end)
 			goto short_pkt;
@@ -3105,6 +3122,7 @@ out:
 	kunmap_atomic(kaddr, KM_USER0);
 	return 0;
 short_pkt:
+	dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	if (!nr) {
-- 
cgit v1.2.3


From 991114c6fa6a21d1fa4d544abe78592352860c82 Mon Sep 17 00:00:00 2001
From: Alexander Viro <aviro@redhat.com>
Date: Thu, 23 Jun 2005 00:09:01 -0700
Subject: [PATCH] fix for prune_icache()/forced final iput() races

Based on analysis and a patch from Russ Weight <rweight@us.ibm.com>

There is a race condition that can occur if an inode is allocated and then
released (using iput) during the ->fill_super functions.  The race
condition is between kswapd and mount.

For most filesystems this can only happen in an error path when kswapd is
running concurrently.  For isofs, however, the error can occur in a more
common code path (which is how the bug was found).

The logic here is "we want final iput() to free inode *now* instead of
letting it sit in cache if fs is going down or had not quite come up".  The
problem is with kswapd seeing such inodes in the middle of being killed and
happily taking over.

The clean solution would be to tell kswapd to leave those inodes alone and
let our final iput deal with them.  I.e.  add a new flag
(I_FORCED_FREEING), set it before write_inode_now() there and make
prune_icache() leave those alone.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f3628..1f9a3a2b89b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
 			continue;
 		if (!test(inode, data))
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -525,7 +525,7 @@ repeat:
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & I_FREEING))
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
 		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
-		spin_unlock(&inode_lock);
-		if (!sb || (sb->s_flags & MS_ACTIVE))
+		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+			spin_unlock(&inode_lock);
 			return;
+		}
+		inode->i_state |= I_WILL_FREE;
+		spin_unlock(&inode_lock);
 		write_inode_now(inode, 1);
 		spin_lock(&inode_lock);
+		inode->i_state &= ~I_WILL_FREE;
 		inodes_stat.nr_unused--;
 		hlist_del_init(&inode->i_hash);
 	}
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
-	inode->i_state|=I_FREEING;
+	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 	if (inode->i_data.nrpages)
-- 
cgit v1.2.3


From ac20427ef6aa63da663bdc88b71d16f7394f5e23 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Thu, 23 Jun 2005 00:09:11 -0700
Subject: [PATCH] add check to /proc/devices read routines

Patch to add check to get_chrdev_list and get_blkdev_list to prevent reads
of /proc/devices from spilling over the provided page if more than 4096
bytes of string data are generated from all the registered character and
block devices in a system

Signed-off-by: Neil Horman <nhorman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/char_dev.c       | 13 ++++++++++++-
 fs/proc/proc_misc.c |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909f..e82aac9cc2f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
 
 	down(&chrdevs_lock);
 	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-		for (cd = chrdevs[i]; cd; cd = cd->next)
+		for (cd = chrdevs[i]; cd; cd = cd->next) {
+			/*
+			 * if the current name, plus the 5 extra characters
+			 * in the device line for this entry
+			 * would run us off the page, we're done
+			 */
+			if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
+				goto page_full;
+
+
 			len += sprintf(page+len, "%3d %s\n",
 				       cd->major, cd->name);
+		}
 	}
+page_full:
 	up(&chrdevs_lock);
 
 	return len;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 63a9fbf1ac5..94b570ad037 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -451,7 +451,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
 	int len = get_chrdev_list(page);
-	len += get_blkdev_list(page+len);
+	len += get_blkdev_list(page+len, len);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.3


From 5f45f1a78fbac3cc859ec10c5366e97d20d40fa2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:12 -0700
Subject: [PATCH] remove duplicate get_dentry functions in various places

Various filesystem drivers have grown a get_dentry() function that's a
duplicate of lookup_one_len, except that it doesn't take a maximum length
argument and doesn't check for \0 or / in the passed in filename.

Switch all these places to use lookup_one_len.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Greg KH <greg@kroah.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/debugfs/inode.c | 12 +-----------
 fs/sysfs/dir.c     |  5 +++--
 fs/sysfs/file.c    |  5 +++--
 fs/sysfs/group.c   |  4 +++-
 fs/sysfs/inode.c   | 10 ----------
 fs/sysfs/sysfs.h   |  1 -
 6 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699e..a86ac4aeaed 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static struct dentry * get_dentry(struct dentry *parent, const char *name)
-{               
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}               
-
 static struct super_block *debug_get_sb(struct file_system_type *fs_type,
 				        int flags, const char *dev_name,
 					void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 
 	*dentry = NULL;
 	down(&parent->d_inode->i_sem);
-	*dentry = get_dentry (parent, name);
+	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
 			error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 37d7a6875d8..59734ba1ee6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include "sysfs.h"
 
 DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
 	down(&p->d_inode->i_sem);
-	*d = sysfs_get_dentry(p,n);
+	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
 		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
 		if (!error) {
@@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 
 	down(&parent->d_inode->i_sem);
 
-	new_dentry = sysfs_get_dentry(parent, new_name);
+	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
 	if (!IS_ERR(new_dentry)) {
   		if (!new_dentry->d_inode) {
 			error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 849aac11546..e9cfa39f409 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/dnotify.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 
@@ -400,7 +401,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		/* make sure dentry is really there */
 		if (victim->d_inode && 
@@ -443,7 +444,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		if (victim->d_inode &&
 		    (victim->d_parent->d_inode == dir->d_inode)) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea702..122145b0895 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/dcache.h>
+#include <linux/namei.h>
 #include <linux/err.h>
 #include "sysfs.h"
 
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
 	struct dentry * dir;
 
 	if (grp->name)
-		dir = sysfs_get_dentry(kobj->dentry,grp->name);
+		dir = lookup_one_len(grp->name, kobj->dentry,
+				strlen(grp->name));
 	else
 		dir = dget(kobj->dentry);
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 565cac1d420..8de13bafaa7 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 	return error;
 }
 
-struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
-{
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}
-
 /*
  * Get the name for corresponding element represented by the given sysfs_dirent
  */
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 29da6f5f07c..3f8953e0e5d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
-extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
 
 extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
 extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
-- 
cgit v1.2.3


From 84de856ed30c568c2bb7b9ac0679772bd2737d9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:16 -0700
Subject: [PATCH] quota: consolidate code surrounding vfs_quota_on_mount

Move some code duplicated in both callers into vfs_quota_on_mount

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c          | 23 +++++++++++++++++++----
 fs/ext3/super.c     | 18 ++----------------
 fs/reiserfs/super.c | 21 +++------------------
 3 files changed, 24 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907c..343c0365561 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1519,14 +1519,29 @@ out_path:
  * This function is used when filesystem needs to initialize quotas
  * during mount time.
  */
-int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+		int format_id, int type)
 {
+	struct qstr name = {.name = qf_name, .len = 0, .len = strlen(qf_name)};
+	struct dentry *dentry;
 	int error;
 
+	dentry = lookup_hash(&name, sb->s_root);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
 	error = security_quota_on(dentry);
-	if (error)
-		return error;
-	return vfs_quota_on_inode(dentry->d_inode, type, format_id);
+	if (!error)
+		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+
+	/*
+	 * Now invalidate and put the dentry - quota got its own reference
+	 * to inode and dentry has at least wrong hash so we had better
+	 * throw it away.
+	 */
+	d_invalidate(dentry);
+	dput(dentry);
+	return error;
 }
 
 /* Generic routine for getting common part of quota structure */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233ef..9630fbfdc24 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2348,22 +2348,8 @@ static int ext3_write_info(struct super_block *sb, int type)
  */
 static int ext3_quota_on_mount(struct super_block *sb, int type)
 {
-	int err;
-	struct dentry *dentry;
-	struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
-			     .hash = 0,
-			     .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
-
-	dentry = lookup_hash(&name, sb->s_root);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
-	/* Now invalidate and put the dentry - quota got its own reference
-	 * to inode and dentry has at least wrong hash so we had better
-	 * throw it away */
-	d_invalidate(dentry);
-	dput(dentry);
-	return err;
+	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+			EXT3_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b8774498..aae0779ed5b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1932,27 +1932,12 @@ static int reiserfs_write_info(struct super_block *sb, int type)
 }
 
 /*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
+ * Turn on quotas during mount time - we need to find the quota file and such...
  */
 static int reiserfs_quota_on_mount(struct super_block *sb, int type)
 {
-    int err;
-    struct dentry *dentry;
-    struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
-                         .hash = 0,
-                         .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
-
-    dentry = lookup_hash(&name, sb->s_root);
-    if (IS_ERR(dentry))
-            return PTR_ERR(dentry);
-    err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
-    /* Now invalidate and put the dentry - quota got its own reference
-     * to inode and dentry has at least wrong hash so we had better
-     * throw it away */
-    d_invalidate(dentry);
-    dput(dentry);
-    return err;
+	return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+			REISERFS_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
-- 
cgit v1.2.3


From 2fa389c5eb8c97d621653184d2adf5fdbd4a3167 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:16 -0700
Subject: [PATCH] quota: sanitize dentry handling in vfs_quota_on_mount

Use lookup_one_len instead of opencoding a simplified lookup using
lookup_hash with a fake hash.

Also there's no need anymore for the d_invalidate as we have a completely
valid dentry now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 343c0365561..37212b039a4 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1522,11 +1522,10 @@ out_path:
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 		int format_id, int type)
 {
-	struct qstr name = {.name = qf_name, .len = 0, .len = strlen(qf_name)};
 	struct dentry *dentry;
 	int error;
 
-	dentry = lookup_hash(&name, sb->s_root);
+	dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -1534,12 +1533,6 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 	if (!error)
 		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
 
-	/*
-	 * Now invalidate and put the dentry - quota got its own reference
-	 * to inode and dentry has at least wrong hash so we had better
-	 * throw it away.
-	 */
-	d_invalidate(dentry);
 	dput(dentry);
 	return error;
 }
-- 
cgit v1.2.3


From d6e711448137ca3301512cec41a2c2ce852b3d0a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 23 Jun 2005 00:09:43 -0700
Subject: [PATCH] setuid core dump

Add a new `suid_dumpable' sysctl:

This value can be used to query and set the core dump mode for setuid
or otherwise protected/tainted binaries. The modes are

0 - (default) - traditional behaviour.  Any process which has changed
    privilege levels or is execute only will not be dumped

1 - (debug) - all processes dump core when possible.  The core dump is
    owned by the current user and no security is applied.  This is intended
    for system debugging situations only.  Ptrace is unchecked.

2 - (suidsafe) - any binary which normally would not be dumped is dumped
    readable by root only.  This allows the end user to remove such a dump but
    not access it directly.  For security reasons core dumps in this mode will
    not overwrite one another or other files.  This mode is appropriate when
    adminstrators are attempting to debug problems in a normal environment.

(akpm:

> > +EXPORT_SYMBOL(suid_dumpable);
>
> EXPORT_SYMBOL_GPL?

No problem to me.

> >  	if (current->euid == current->uid && current->egid == current->gid)
> >  		current->mm->dumpable = 1;
>
> Should this be SUID_DUMP_USER?

Actually the feedback I had from last time was that the SUID_ defines
should go because its clearer to follow the numbers. They can go
everywhere (and there are lots of places where dumpable is tested/used
as a bool in untouched code)

> Maybe this should be renamed to `dump_policy' or something.  Doing that
> would help us catch any code which isn't using the #defines, too.

Fair comment. The patch was designed to be easy to maintain for Red Hat
rather than for merging. Changing that field would create a gigantic
diff because it is used all over the place.

)

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c      | 23 +++++++++++++++++++++--
 fs/proc/base.c |  6 ++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c0..48871917d36 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
 
 int core_uses_pid;
 char core_pattern[65] = "core";
+int suid_dumpable = 0;
+
+EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
 static struct linux_binfmt *formats;
@@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	if (current->euid == current->uid && current->egid == current->gid)
 		current->mm->dumpable = 1;
+	else
+		current->mm->dumpable = suid_dumpable;
+
 	name = bprm->filename;
 
 	/* Copies the binary name from after last slash */
@@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	    permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
 	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
 		suid_keys(current);
-		current->mm->dumpable = 0;
+		current->mm->dumpable = suid_dumpable;
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	struct inode * inode;
 	struct file * file;
 	int retval = 0;
+	int fsuid = current->fsuid;
+	int flag = 0;
 
 	binfmt = current->binfmt;
 	if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
+
+	/*
+	 *	We cannot trust fsuid as being the "true" uid of the
+	 *	process nor do we know its entire history. We only know it
+	 *	was tainted so we dump it as root in mode 2.
+	 */
+	if (mm->dumpable == 2) {	/* Setuid core dump mode */
+		flag = O_EXCL;		/* Stop rewrite attacks */
+		current->fsuid = 0;	/* Dump root private */
+	}
 	mm->dumpable = 0;
 	init_completion(&mm->core_done);
 	spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
  	lock_kernel();
 	format_corename(corename, core_pattern, signr);
 	unlock_kernel();
-	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
 	inode = file->f_dentry->d_inode;
@@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 close_fail:
 	filp_close(file, NULL);
 fail_unlock:
+	current->fsuid = fsuid;
 	complete_all(&mm->core_done);
 fail:
 	return retval;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd9..ace151fa487 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
 	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		goto out;
 	rmb();
-	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+	if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
 		goto out;
 	if (security_ptrace(current, task))
 		goto out;
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
 	if (mm)
 		dumpable = mm->dumpable;
 	task_unlock(task);
-	return dumpable;
+	if(dumpable == 1)
+		return 1;
+	return 0;
 }
 
 
-- 
cgit v1.2.3


From acfa1823d33859b0db77701726c9ca5ccc6e6f25 Mon Sep 17 00:00:00 2001
From: Andreas Dilger <adilger@clusterfs.com>
Date: Thu, 23 Jun 2005 00:09:45 -0700
Subject: [PATCH] Support for dx directories in ext3_get_parent (NFSD)

Henrik Grubbstrom noted:

The 2.6.10 ext3_get_parent attempts to use ext3_find_entry to look up the
entry "..", which fails for dx directories since ".." is not present in the
directory hash table.  The patch below solves this by looking up the dotdot
entry in the dx_root block.

Typical symptoms of the above bug are intermittent claims by nfsd that
files or directories are missing on exported ext3 filesystems.

cf https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=3D150759 and
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=3D144556

ext3_get_parent() is IMHO the wrong place to fix this bug as it introduces
a lot of internals from htree into that function.  Instead, I think this
should be fixed in ext3_find_entry() as in the below patch.  This has the
added advantage that it works for any callers of ext3_find_entry() and not
just ext3_lookup_parent().

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Henrik Grubbstrom <grubba@grubba.org>
Cc: <ext2-devel@lists.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/namei.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a0..60e44e6dd7a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 	struct inode *dir = dentry->d_parent->d_inode;
 
 	sb = dir->i_sb;
-	if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
-		return NULL;
+	/* NFS may look up ".." - look at dx_root directory block */
+	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+		if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+			return NULL;
+	} else {
+		frame = frames;
+		frame->bh = NULL;			/* for dx_release() */
+		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
+		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
+	}
 	hash = hinfo.hash;
 	do {
 		block = dx_get_block(frame->at);
-- 
cgit v1.2.3


From c663e5d80ebec426916ad2aa5400c7ec99aa572e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:49 -0700
Subject: [PATCH] add some comments to lookup_create()

In a duplicate of lookup_create in the af_unix code Al commented what's
going on nicely, so let's bring that over to lookup_create before the copy
is going away (I'll send a patch soon)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index a7f7f44119b..fa8df81ce8c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1577,19 +1577,35 @@ do_link:
  *
  * Simple function to lookup and return a dentry and create it
  * if it doesn't exist.  Is SMP-safe.
+ *
+ * Returns with nd->dentry->d_inode->i_sem locked.
  */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
-	struct dentry *dentry;
+	struct dentry *dentry = ERR_PTR(-EEXIST);
 
 	down(&nd->dentry->d_inode->i_sem);
-	dentry = ERR_PTR(-EEXIST);
+	/*
+	 * Yucky last component or no last component at all?
+	 * (foo/., foo/.., /////)
+	 */
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+
+	/*
+	 * Do the final lookup.
+	 */
 	dentry = lookup_hash(&nd->last, nd->dentry);
 	if (IS_ERR(dentry))
 		goto fail;
+
+	/*
+	 * Special case - lookup gave negative, but... we had foo/bar/
+	 * From the vfs_mknod() POV we just have a negative dentry -
+	 * all is fine. Let's be bastards - you had / on the end, you've
+	 * been asking for (non-existent) directory. -ENOENT for you.
+	 */
 	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
 		goto enoent;
 	return dentry;
-- 
cgit v1.2.3


From af4d2ecbf007b7df3db7a41eedccdc05b8006d0b Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Thu, 23 Jun 2005 00:09:50 -0700
Subject: [PATCH] Fix of bogus file max limit messages

This patch fixes incorrect and bogus kernel messages that file-max limit
reached when the allocation fails

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-Off-By: Denis Lunev <den@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file_table.c | 57 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686b..fa7849fae13 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -63,42 +63,45 @@ static inline void file_free(struct file *f)
  */
 struct file *get_empty_filp(void)
 {
-static int old_max;
+	static int old_max;
 	struct file * f;
 
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files < files_stat.max_files ||
-				capable(CAP_SYS_ADMIN)) {
-		f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
-		if (f) {
-			memset(f, 0, sizeof(*f));
-			if (security_file_alloc(f)) {
-				file_free(f);
-				goto fail;
-			}
-			eventpoll_init_file(f);
-			atomic_set(&f->f_count, 1);
-			f->f_uid = current->fsuid;
-			f->f_gid = current->fsgid;
-			rwlock_init(&f->f_owner.lock);
-			/* f->f_version: 0 */
-			INIT_LIST_HEAD(&f->f_list);
-			f->f_maxcount = INT_MAX;
-			return f;
-		}
-	}
-
+	if (files_stat.nr_files >= files_stat.max_files &&
+				!capable(CAP_SYS_ADMIN))
+		goto over;
+
+	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	if (f == NULL)
+		goto fail;
+
+	memset(f, 0, sizeof(*f));
+	if (security_file_alloc(f))
+		goto fail_sec;
+
+	eventpoll_init_file(f);
+	atomic_set(&f->f_count, 1);
+	f->f_uid = current->fsuid;
+	f->f_gid = current->fsgid;
+	rwlock_init(&f->f_owner.lock);
+	/* f->f_version: 0 */
+	INIT_LIST_HEAD(&f->f_list);
+	f->f_maxcount = INT_MAX;
+	return f;
+
+over:
 	/* Ran out of filps - report that */
-	if (files_stat.max_files >= old_max) {
+	if (files_stat.nr_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
 					files_stat.max_files);
-		old_max = files_stat.max_files;
-	} else {
-		/* Big problems... */
-		printk(KERN_WARNING "VFS: filp allocation failed\n");
+		old_max = files_stat.nr_files;
 	}
+	goto fail;
+
+fail_sec:
+	file_free(f);
 fail:
 	return NULL;
 }
-- 
cgit v1.2.3


From 618f06362ae3f60f95d7b0e666de25ee6ae35679 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Thu, 23 Jun 2005 00:09:54 -0700
Subject: [PATCH] O(1) sb list traversing on syncs

This patch removes O(n^2) super block loops in sync_inodes(),
sync_filesystems() etc.  in favour of using __put_super_and_need_restart()
which I introduced earlier.  We faced a noticably long freezes on sb
syncing when there are thousands of super blocks in the system.

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fs-writeback.c | 64 ++++++++++++++++++------------------------
 fs/quota.c        | 60 ++++++++++++++++------------------------
 fs/super.c        | 83 ++++++++++++++++++++++++++++++-------------------------
 3 files changed, 96 insertions(+), 111 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa5821..e94ab398b71 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Find a superblock with inodes that need to be synced
- */
-static struct super_block *get_super_to_sync(void)
-{
-	struct super_block *sb;
-restart:
-	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 /**
  * sync_inodes - writes all inodes to disk
  * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-void sync_inodes(int wait)
+static void __sync_inodes(int wait)
 {
 	struct super_block *sb;
 
-	set_sb_syncing(0);
-	while ((sb = get_super_to_sync()) != NULL) {
-		sync_inodes_sb(sb, 0);
-		sync_blockdev(sb->s_bdev);
-		drop_super(sb);
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_syncing)
+			continue;
+		sb->s_syncing = 1;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root) {
+			sync_inodes_sb(sb, wait);
+			sync_blockdev(sb->s_bdev);
+		}
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
+}
+
+void sync_inodes(int wait)
+{
+	set_sb_syncing(0);
+	__sync_inodes(0);
+
 	if (wait) {
 		set_sb_syncing(0);
-		while ((sb = get_super_to_sync()) != NULL) {
-			sync_inodes_sb(sb, 1);
-			sync_blockdev(sb->s_bdev);
-			drop_super(sb);
-		}
+		__sync_inodes(1);
 	}
 }
 
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a2..f5d1cff5519 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 	return error;
 }
 
-static struct super_block *get_super_to_sync(int type)
-{
-	struct list_head *head;
-	int cnt, dirty;
-
-restart:
-	spin_lock(&sb_lock);
-	list_for_each(head, &super_blocks) {
-		struct super_block *sb = list_entry(head, struct super_block, s_list);
-
-		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 static void quota_sync_sb(struct super_block *sb, int type)
 {
 	int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
+	int cnt, dirty;
+
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
+		return;
 	}
-	else {
-		while ((sb = get_super_to_sync(type)) != NULL) {
-			if (sb->s_qcop->quota_sync)
-				quota_sync_sb(sb, type);
-			drop_super(sb);
-		}
+
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		/* This test just improves performance so it needn't be reliable... */
+		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
+				dirty = 1;
+		if (!dirty)
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root && sb->s_qcop->quota_sync)
+			quota_sync_sb(sb, type);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
 }
 
 /* Copy parameters and call proper function */
diff --git a/fs/super.c b/fs/super.c
index 573bcc81bb8..25bc1ec6bc5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
  */
 void sync_supers(void)
 {
-	struct super_block * sb;
-restart:
+	struct super_block *sb;
+
 	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.next);
-	while (sb != sb_entry(&super_blocks))
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			write_super(sb);
-			drop_super(sb);
-			goto restart;
-		} else
-			sb = sb_entry(sb->s_list.next);
+			up_read(&sb->s_umount);
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto restart;
+		}
+	}
 	spin_unlock(&sb_lock);
 }
 
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
 
 	down(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
 			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
 		sb->s_need_sync_fs = 1;
 	}
-	spin_unlock(&sb_lock);
 
 restart:
-	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_need_sync_fs)
 			continue;
 		sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
 		down_read(&sb->s_umount);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
-		drop_super(sb);
-		goto restart;
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
 	spin_unlock(&sb_lock);
 	up(&mutex);
@@ -422,21 +423,25 @@ restart:
 
 struct super_block * get_super(struct block_device *bdev)
 {
-	struct list_head *p;
+	struct super_block *sb;
+
 	if (!bdev)
 		return NULL;
-rescan:
+
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_bdev == bdev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev == bdev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
  
 struct super_block * user_get_super(dev_t dev)
 {
-	struct list_head *p;
+	struct super_block *sb;
 
-rescan:
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_dev ==  dev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_dev ==  dev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
-- 
cgit v1.2.3


From ef3daeda7b58f046f94b26637d500354038d39f4 Mon Sep 17 00:00:00 2001
From: Yoav Zach <yoav_zach@yahoo.com>
Date: Thu, 23 Jun 2005 00:09:58 -0700
Subject: [PATCH] Don't force O_LARGEFILE for 32 bit processes on ia64

In ia64 kernel, the O_LARGEFILE flag is forced when opening a file.  This
is problematic for execution of 32 bit processes, which are not largefile
aware, either by SW emulation or by HW execution.

For such processes, the problem is two-fold:

1) When trying to open a file that is larger than 4G
   the operation should fail, but it's not
2) Writing to offset larger than 4G should fail, but
   it's not

The proposed patch takes advantage of the way 32 bit processes are
identified in ia64 systems.  Such processes have PER_LINUX32 for their
personality.  With the patch, the ia64 kernel will not enforce the
O_LARGEFILE flag if the current process has PER_LINUX32 set.  The behavior
for all other architectures remains unchanged.

Signed-off-by: Yoav Zach <yoav.zach@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 963bd81a44c..2ebb72c1a87 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
+#include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 
@@ -935,9 +936,9 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 	char * tmp;
 	int fd, error;
 
-#if BITS_PER_LONG != 32
-	flags |= O_LARGEFILE;
-#endif
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
 	tmp = getname(filename);
 	fd = PTR_ERR(tmp);
 	if (!IS_ERR(tmp)) {
-- 
cgit v1.2.3


From dfb388bf8a328f206bba33933dd97230f412238b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 23 Jun 2005 00:10:02 -0700
Subject: [PATCH] factor out common code in sys_fsync/sys_fdatasync

This patch consolidates sys_fsync and sys_fdatasync.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 45 ++++++++++-----------------------------------
 1 file changed, 10 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 0befa724ab9..12bdb279112 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-asmlinkage long sys_fsync(unsigned int fd)
+static long do_fsync(unsigned int fd, int datasync)
 {
 	struct file * file;
 	struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
 	if (!file)
 		goto out;
 
-	mapping = file->f_mapping;
-
 	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
 		goto out_putf;
 	}
 
+	mapping = file->f_mapping;
+
 	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
 	 * which could cause livelocks in fsync_buffers_list
 	 */
 	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 0);
+	err = file->f_op->fsync(file, file->f_dentry, datasync);
 	if (!ret)
 		ret = err;
 	up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
 	return ret;
 }
 
-asmlinkage long sys_fdatasync(unsigned int fd)
+asmlinkage long sys_fsync(unsigned int fd)
 {
-	struct file * file;
-	struct address_space *mapping;
-	int ret, err;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	ret = -EINVAL;
-	if (!file->f_op || !file->f_op->fsync)
-		goto out_putf;
-
-	mapping = file->f_mapping;
-
-	current->flags |= PF_SYNCWRITE;
-	ret = filemap_fdatawrite(mapping);
-	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 1);
-	if (!ret)
-		ret = err;
-	up(&mapping->host->i_sem);
-	err = filemap_fdatawait(mapping);
-	if (!ret)
-		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
+	return do_fsync(fd, 0);
+}
 
-out_putf:
-	fput(file);
-out:
-	return ret;
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+	return do_fsync(fd, 1);
 }
 
 /*
-- 
cgit v1.2.3


From b030a4dd609e167da7f73c2d1fa5af864a0aea17 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 23 Jun 2005 00:10:03 -0700
Subject: [PATCH] Remove eventpoll macro obfuscation

This patch gets rid of some macro obfuscation from fs/eventpoll.c by
removing slab allocator wrappers and converting macros to static inline
functions.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 195 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 110 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655..6ab1dd0ca90 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
-/* Macro to allocate a "struct epitem" from the slab cache */
-#define EPI_MEM_ALLOC()	(struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct epitem" to the slab cache */
-#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
-
-/* Macro to allocate a "struct eppoll_entry" from the slab cache */
-#define PWQ_MEM_ALLOC()	(struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct eppoll_entry" to the slab cache */
-#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
-
-/* Fast test to see if the file is an evenpoll file */
-#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
-
-/* Setup the structure that is used as key for the rb-tree */
-#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
-
-/* Compare rb-tree keys */
-#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
-			    ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
-
-/* Special initialization for the rb-tree node to detect linkage */
-#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
-
-/* Removes a node from the rb-tree and marks it for a fast is-linked check */
-#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
-
-/* Fast check to verify that the item is linked to the main rb-tree */
-#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
-
-/*
- * Remove the item from the list and perform its initialization.
- * This is useful for us because we can test if the item is linked
- * using "EP_IS_LINKED(p)".
- */
-#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
-
-/* Tells us if the item is currently linked */
-#define EP_IS_LINKED(p) (!list_empty(p))
-
-/* Get the "struct epitem" from a wait queue pointer */
-#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
-
-/* Get the "struct epitem" from an epoll queue wrapper */
-#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
-
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
-
-
 struct epoll_filefd {
 	struct file *file;
 	int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
 
 
+/* Fast test to see if the file is an evenpoll file */
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
+
+/* Setup the structure that is used as key for the rb-tree */
+static inline void ep_set_ffd(struct epoll_filefd *ffd,
+			      struct file *file, int fd)
+{
+	ffd->file = file;
+	ffd->fd = fd;
+}
+
+/* Compare rb-tree keys */
+static inline int ep_cmp_ffd(struct epoll_filefd *p1,
+			     struct epoll_filefd *p2)
+{
+	return (p1->file > p2->file ? +1:
+	        (p1->file < p2->file ? -1 : p1->fd - p2->fd));
+}
+
+/* Special initialization for the rb-tree node to detect linkage */
+static inline void ep_rb_initnode(struct rb_node *n)
+{
+	n->rb_parent = n;
+}
+
+/* Removes a node from the rb-tree and marks it for a fast is-linked check */
+static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
+{
+	rb_erase(n, r);
+	n->rb_parent = n;
+}
+
+/* Fast check to verify that the item is linked to the main rb-tree */
+static inline int ep_rb_linked(struct rb_node *n)
+{
+	return n->rb_parent != n;
+}
+
+/*
+ * Remove the item from the list and perform its initialization.
+ * This is useful for us because we can test if the item is linked
+ * using "ep_is_linked(p)".
+ */
+static inline void ep_list_del(struct list_head *p)
+{
+	list_del(p);
+	INIT_LIST_HEAD(p);
+}
+
+/* Tells us if the item is currently linked */
+static inline int ep_is_linked(struct list_head *p)
+{
+	return !list_empty(p);
+}
+
+/* Get the "struct epitem" from a wait queue pointer */
+static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
+{
+	return container_of(p, struct eppoll_entry, wait)->base;
+}
+
+/* Get the "struct epitem" from an epoll queue wrapper */
+static inline struct epitem * ep_item_from_epqueue(poll_table *p)
+{
+	return container_of(p, struct ep_pqueue, pt)->epi;
+}
+
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_hash_event(int op)
+{
+	return op != EPOLL_CTL_DEL;
+}
+
 /* Initialize the poll safe wake up structure */
 static void ep_poll_safewake_init(struct poll_safewake *psw)
 {
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
 		epi = list_entry(lsthead->next, struct epitem, fllink);
 
 		ep = epi->ep;
-		EP_LIST_DEL(&epi->fllink);
+		ep_list_del(&epi->fllink);
 		down_write(&ep->sem);
 		ep_remove(ep, epi);
 		up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		     current, epfd, op, fd, event));
 
 	error = -EFAULT;
-	if (EP_OP_HASH_EVENT(op) &&
+	if (ep_op_hash_event(op) &&
 	    copy_from_user(&epds, event, sizeof(struct epoll_event)))
 		goto eexit_1;
 
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	 * adding an epoll file descriptor inside itself.
 	 */
 	error = -EINVAL;
-	if (file == tfile || !IS_FILE_EPOLL(file))
+	if (file == tfile || !is_file_epoll(file))
 		goto eexit_3;
 
 	/*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 	 * the user passed to us _is_ an eventpoll file.
 	 */
 	error = -EINVAL;
-	if (!IS_FILE_EPOLL(file))
+	if (!is_file_epoll(file))
 		goto eexit_2;
 
 	/*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	struct epitem *epi, *epir = NULL;
 	struct epoll_filefd ffd;
 
-	EP_SET_FFD(&ffd, file, fd);
+	ep_set_ffd(&ffd, file, fd);
 	read_lock_irqsave(&ep->lock, flags);
 	for (rbp = ep->rbr.rb_node; rbp; ) {
 		epi = rb_entry(rbp, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&ffd, &epi->ffd);
+		kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
 		if (kcmp > 0)
 			rbp = rbp->rb_right;
 		else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
-		EPI_MEM_FREE(epi);
+		kmem_cache_free(epi_cache, epi);
 }
 
 
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt)
 {
-	struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt);
+	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	while (*p) {
 		parent = *p;
 		epic = rb_entry(parent, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd);
+		kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
 		if (kcmp > 0)
 			p = &parent->rb_right;
 		else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = EPI_MEM_ALLOC()))
+	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
-	EP_RB_INITNODE(&epi->rbn);
+	ep_rb_initnode(&epi->rbn);
 	INIT_LIST_HEAD(&epi->rdllink);
 	INIT_LIST_HEAD(&epi->fllink);
 	INIT_LIST_HEAD(&epi->txlink);
 	INIT_LIST_HEAD(&epi->pwqlist);
 	epi->ep = ep;
-	EP_SET_FFD(&epi->ffd, tfile, fd);
+	ep_set_ffd(&epi->ffd, tfile, fd);
 	epi->event = *event;
 	atomic_set(&epi->usecnt, 1);
 	epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	ep_rbtree_insert(ep, epi);
 
 	/* If the file is already "ready" we drop it inside the ready list */
-	if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) {
+	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
 		list_add_tail(&epi->rdllink, &ep->rdllist);
 
 		/* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
 	 * allocated wait queue.
 	 */
 	write_lock_irqsave(&ep->lock, flags);
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 	write_unlock_irqrestore(&ep->lock, flags);
 
-	EPI_MEM_FREE(epi);
+	kmem_cache_free(epi_cache, epi);
 eexit_1:
 	return error;
 }
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 	 * If the item is not linked to the hash it means that it's on its
 	 * way toward the removal. Do nothing in this case.
 	 */
-	if (EP_RB_LINKED(&epi->rbn)) {
+	if (ep_rb_linked(&epi->rbn)) {
 		/*
 		 * If the item is "hot" and it is not registered inside the ready
 		 * list, push it inside. If the item is not "hot" and it is currently
 		 * registered inside the ready list, unlink it.
 		 */
 		if (revents & event->events) {
-			if (!EP_IS_LINKED(&epi->rdllink)) {
+			if (!ep_is_linked(&epi->rdllink)) {
 				list_add_tail(&epi->rdllink, &ep->rdllist);
 
 				/* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 		while (!list_empty(lsthead)) {
 			pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
 
-			EP_LIST_DEL(&pwq->llink);
+			ep_list_del(&pwq->llink);
 			remove_wait_queue(pwq->whead, &pwq->wait);
-			PWQ_MEM_FREE(pwq);
+			kmem_cache_free(pwq_cache, pwq);
 		}
 	}
 }
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * The check protect us from doing a double unlink ( crash ).
 	 */
 	error = -ENOENT;
-	if (!EP_RB_LINKED(&epi->rbn))
+	if (!ep_rb_linked(&epi->rbn))
 		goto eexit_1;
 
 	/*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * This operation togheter with the above check closes the door to
 	 * double unlinks.
 	 */
-	EP_RB_ERASE(&epi->rbn, &ep->rbr);
+	ep_rb_erase(&epi->rbn, &ep->rbr);
 
 	/*
 	 * If the item we are going to remove is inside the ready file descriptors
 	 * we want to remove it from this list to avoid stale events.
 	 */
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 
 	error = 0;
 eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 
 	/* Remove the current item from the list of epoll hooks */
 	spin_lock(&file->f_ep_lock);
-	if (EP_IS_LINKED(&epi->fllink))
-		EP_LIST_DEL(&epi->fllink);
+	if (ep_is_linked(&epi->fllink))
+		ep_list_del(&epi->fllink);
 	spin_unlock(&file->f_ep_lock);
 
 	/* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 {
 	int pwake = 0;
 	unsigned long flags;
-	struct epitem *epi = EP_ITEM_FROM_WAIT(wait);
+	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 		goto is_disabled;
 
 	/* If this file is already in the ready list we exit soon */
-	if (EP_IS_LINKED(&epi->rdllink))
+	if (ep_is_linked(&epi->rdllink))
 		goto is_linked;
 
 	list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 		lnk = lnk->next;
 
 		/* If this file is already in the ready list we exit soon */
-		if (!EP_IS_LINKED(&epi->txlink)) {
+		if (!ep_is_linked(&epi->txlink)) {
 			/*
 			 * This is initialized in this way so that the default
 			 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 			/*
 			 * Unlink the item from the ready list.
 			 */
-			EP_LIST_DEL(&epi->rdllink);
+			ep_list_del(&epi->rdllink);
 		}
 	}
 
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		epi = list_entry(txlist->next, struct epitem, txlink);
 
 		/* Unlink the current item from the transfer list */
-		EP_LIST_DEL(&epi->txlink);
+		ep_list_del(&epi->txlink);
 
 		/*
 		 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		 * item is set to have an Edge Triggered behaviour, we don't have
 		 * to push it back either.
 		 */
-		if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) &&
-		    (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) {
+		if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
+		    (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
 			list_add_tail(&epi->rdllink, &ep->rdllist);
 			ricnt++;
 		}
-- 
cgit v1.2.3


From bb93e3a52f8db7210258a1a2134cced0b78a46e1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Jun 2005 00:10:15 -0700
Subject: [PATCH] block: add unlocked_ioctl support for block devices

This patch allows block device drivers to convert their ioctl functions to
unlocked_ioctl() like character devices and other subsystems.  All
functions that were called with the BKL held before are still used that
way, but I would not be surprised if it could be removed from the ioctl
functions in drivers/block/ioctl.c themselves.

As a side note, I found that compat_blkdev_ioctl() acquires the BKL as
well, which looks like a bug.  I have checked that every user of
disk->fops->compat_ioctl() in the current git tree gets the BKL itself, so
it could easily be removed from compat_blkdev_ioctl().

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a0..e0df94c37b7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
 	return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
 }
 
-static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
   	.aio_write	= blkdev_file_aio_write, 
 	.mmap		= generic_file_mmap,
 	.fsync		= block_fsync,
-	.ioctl		= block_ioctl,
+	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
-- 
cgit v1.2.3


From 45778ca819accab1a4a3378b3566cab0f189164f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@graphe.net>
Date: Thu, 23 Jun 2005 00:10:17 -0700
Subject: [PATCH] Remove f_error field from struct file

The following patch removes the f_error field and all checks of f_error.

Trond said:

  f_error was introduced for NFS, and made sense when we were guaranteed
  always to have a file pointer around when write errors occurred.  Since
  then, we have (for various reasons) had to introduce the nfs_open_context in
  order to track the file read/write state, and it made sense to move our
  f_error tracking there too.

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/direct.c |  5 -----
 fs/open.c       | 16 ++++------------
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6a30c844de..6537f2c4ae4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
 		goto out;
-        if (file->f_error) {
-                retval = file->f_error;
-                file->f_error = 0;
-                goto out;
-        }
 	retval = -EFBIG;
 	if (limit != RLIM_INFINITY) {
 		if (pos >= limit) {
diff --git a/fs/open.c b/fs/open.c
index 2ebb72c1a87..5dd411b084b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -981,23 +981,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
  */
 int filp_close(struct file *filp, fl_owner_t id)
 {
-	int retval;
-
-	/* Report and clear outstanding errors */
-	retval = filp->f_error;
-	if (retval)
-		filp->f_error = 0;
+	int retval = 0;
 
 	if (!file_count(filp)) {
 		printk(KERN_ERR "VFS: Close: file count is 0\n");
-		return retval;
+		return 0;
 	}
 
-	if (filp->f_op && filp->f_op->flush) {
-		int err = filp->f_op->flush(filp);
-		if (!retval)
-			retval = err;
-	}
+	if (filp->f_op && filp->f_op->flush)
+		retval = filp->f_op->flush(filp);
 
 	dnotify_flush(filp, id);
 	locks_remove_posix(filp, id);
-- 
cgit v1.2.3


From 9a59f452abe11f569e13ec16c51e6d61c54b9838 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:10:19 -0700
Subject: [PATCH] remove <linux/xattr_acl.h>

This file duplicates <linux/posix_acl_xattr.h>, using slightly different
names.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/acl.c           | 12 ++++++------
 fs/ext2/acl.h           |  2 +-
 fs/ext3/acl.c           | 12 ++++++------
 fs/ext3/acl.h           |  2 +-
 fs/jfs/acl.c            | 11 ++++++-----
 fs/jfs/jfs_acl.h        |  2 --
 fs/jfs/super.c          |  1 +
 fs/jfs/xattr.c          |  7 ++++---
 fs/nfsd/vfs.c           |  9 ++++-----
 fs/reiserfs/xattr_acl.c | 26 +++++++++++++-------------
 10 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6b..213148c36eb 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
 ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -409,12 +409,12 @@ static size_t
 ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext2_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
 	.get	= ext2_xattr_get_acl_access,
 	.set	= ext2_xattr_set_acl_access,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
 	.get	= ext2_xattr_get_acl_default,
 	.set	= ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a7..0bde85bafe3 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT2_ACL_VERSION	0x0001
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c0..133f5aa581b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -417,12 +417,12 @@ static size_t
 ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -430,12 +430,12 @@ static size_t
 ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -535,14 +535,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext3_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
 	.get	= ext3_xattr_get_acl_access,
 	.set	= ext3_xattr_set_acl_access,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
 	.get	= ext3_xattr_get_acl_default,
 	.set	= ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0ba..92d50b53a93 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT3_ACL_VERSION	0x0001
 
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 30a2bf9eeda..e892dab40c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			break;
 		default:
@@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
@@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 			return -EINVAL;
 	}
 	if (acl) {
-		size = xattr_acl_size(acl->a_count);
+		size = posix_acl_xattr_size(acl->a_count);
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430adec..a3acd3eec05 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-#include <linux/xattr_acl.h>
-
 int jfs_permission(struct inode *, int, struct nameidata *);
 int jfs_init_acl(struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 810a3653d8b..ee32211288c 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,6 +24,7 @@
 #include <linux/completion.h>
 #include <linux/vfs.h>
 #include <linux/moduleparam.h>
+#include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 
 #include "jfs_incore.h"
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 6016373701a..ee438d429d4 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
 
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		return -EPERM;
 
 	/*
-	 * XATTR_NAME_ACL_ACCESS is tied to i_mode
+	 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
 	 */
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) {
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
 
 		return 0;
-	} else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
+	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ae3940dc85c..de340ffd33c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -50,7 +50,6 @@
 #include <linux/posix_acl.h>
 #ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
-#include <linux/xattr_acl.h>
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -425,13 +424,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 
 	if (pacl) {
-		error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+		error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
 		if (error < 0)
 			goto out_nfserr;
 	}
 
 	if (dpacl) {
-		error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+		error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
 		if (error < 0)
 			goto out_nfserr;
 	}
@@ -498,7 +497,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
 	unsigned int flags = 0;
 
-	pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
 	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
 		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
 	if (IS_ERR(pacl)) {
@@ -508,7 +507,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	}
 
 	if (S_ISDIR(inode->i_mode)) {
-		dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
 		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
 			dpacl = NULL;
 		else if (IS_ERR(dpacl)) {
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903a..c312881c5f5 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
@@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 break;
             default:
@@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 if (acl) {
                     mode_t mode = inode->i_mode;
@@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 }
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 if (!S_ISDIR (inode->i_mode))
                     return acl ? -EACCES : 0;
@@ -468,7 +468,7 @@ static int
 posix_acl_access_get(struct inode *inode, const char *name,
 			  void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
@@ -477,7 +477,7 @@ static int
 posix_acl_access_set(struct inode *inode, const char *name,
 			  const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
@@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_access;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_access_handler = {
-	.prefix = XATTR_NAME_ACL_ACCESS,
+	.prefix = POSIX_ACL_XATTR_ACCESS,
 	.get = posix_acl_access_get,
 	.set = posix_acl_access_set,
 	.del = posix_acl_access_del,
@@ -521,7 +521,7 @@ static int
 posix_acl_default_get (struct inode *inode, const char *name,
 			   void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
@@ -530,7 +530,7 @@ static int
 posix_acl_default_set(struct inode *inode, const char *name,
 			   const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
@@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_default;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_default_handler = {
-	.prefix = XATTR_NAME_ACL_DEFAULT,
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
 	.get = posix_acl_default_get,
 	.set = posix_acl_default_set,
 	.del = posix_acl_default_del,
-- 
cgit v1.2.3


From 152becd26e0563aefdbc4fd1fe491928efe92d1f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cam.ac.uk>
Date: Thu, 23 Jun 2005 00:10:21 -0700
Subject: [PATCH] Bug in error recovery in fs/buffer.c::__block_prepare_write()

fs/buffer.c::__block_prepare_write() has broken error recovery.  It calls
the get_block() callback with "create = 1" and if that succeeds it
immediately clears buffer_new on the just allocated buffer (which has
buffer_new set).

The bug is that if an error occurs and get_block() returns != 0, we break
from this loop and go into recovery code.  This code has this comment:

/* Error case: */
/*
 * Zero out any newly allocated blocks to avoid exposing stale
 * data.  If BH_New is set, we know that the block was newly
 * allocated in the above loop.
 */

So the intent is obviously good in that it wants to clear just allocated
and hence not zeroed buffers.  However the code recognises allocated
buffers by checking for buffer_new being set.

Unfortunately __block_prepare_write() as discussed above already cleared
buffer_new on all allocated buffers thus no buffers will be cleared during
error recovery and old data will be leaked.

The simplest way I can see to fix this is to make the current recovery code
work by _not_ clearing buffer_new after calling get_block() in
__block_prepare_write().

We cannot safely allow buffer_new buffers to "leak out" of
__block_prepare_write(), thus we simply do a quick loop over the buffers
clearing buffer_new on each of them if it is set just before returning
"success" from __block_prepare_write().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 12bdb279112..13e5938a64f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1926,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				clear_buffer_new(bh);
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
@@ -1968,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (!err)
-		return err;
-
+	if (!err) {
+		bh = head;
+		do {
+			if (buffer_new(bh))
+				clear_buffer_new(bh);
+		} while ((bh = bh->b_this_page) != head);
+		return 0;
+	}
 	/* Error case: */
 	/*
 	 * Zero out any newly allocated blocks to avoid exposing stale
-- 
cgit v1.2.3


From 63e6880918e75dcb92d60aff218a76e063a471ef Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Thu, 23 Jun 2005 00:10:27 -0700
Subject: [PATCH] aio: fix do_sync_(read|write) to properly handle aio retries

When do_sync_(read|write) encounters an aio method that makes use of the
retry mechanism, they fail to correctly retry the operation.  This fixes
that by adding the appropriate sleep and retry mechanism.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373e..9292f5fa4d6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -203,6 +203,16 @@ Einval:
 	return -EINVAL;
 }
 
+static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	if (!kiocbIsKicked(iocb))
+		schedule();
+	else
+		kiocbClearKicked(iocb);
+	__set_current_state(TASK_RUNNING);
+}
+
 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
 	struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+		(ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+	       (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
-- 
cgit v1.2.3


From fed2fc18a4567d613cd35115322257c6c6c710e9 Mon Sep 17 00:00:00 2001
From: Telemaque Ndizihiwe <telendiz@eircom.net>
Date: Thu, 23 Jun 2005 00:10:33 -0700
Subject: [PATCH] sys_open() cleanup

Clean up tortured logic in sys_open().

Signed-off-by: Telemaque Ndizihiwe <telendiz@eircom.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 5dd411b084b..8ec63f73591 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -934,7 +934,7 @@ EXPORT_SYMBOL(fd_install);
 asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 {
 	char * tmp;
-	int fd, error;
+	int fd;
 
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
@@ -945,20 +945,16 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 		fd = get_unused_fd();
 		if (fd >= 0) {
 			struct file *f = filp_open(tmp, flags, mode);
-			error = PTR_ERR(f);
-			if (IS_ERR(f))
-				goto out_error;
-			fd_install(fd, f);
+			if (IS_ERR(f)) {
+				put_unused_fd(fd);
+				fd = PTR_ERR(f);
+			} else {
+				fd_install(fd, f);
+			}
 		}
-out:
 		putname(tmp);
 	}
 	return fd;
-
-out_error:
-	put_unused_fd(fd);
-	fd = error;
-	goto out;
 }
 EXPORT_SYMBOL_GPL(sys_open);
 
-- 
cgit v1.2.3


From 92198f7eaa5df3479341dd8fa20c2c81aa3b1e25 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 22:00:59 -0700
Subject: [PATCH] pass iocb to dio_iodone_t

XFS will have to look at iocb->private to fix aio+dio.  No other filesystem
is using the blockdev_direct_IO* end_io callback.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c              | 2 +-
 fs/xfs/linux-2.6/xfs_aops.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e6734..0d06097bc99 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
 	if (dio->end_io && dio->result)
-		dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
+		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
 		up_read(&dio->inode->i_alloc_sem);
 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd14..a3a4b5aaf5d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
  */
 STATIC void
 linvfs_unwritten_convert_direct(
-	struct inode	*inode,
+	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
 	void		*private)
 {
+	struct inode	*inode = iocb->ki_filp->f_dentry->d_inode;
 	ASSERT(!private || inode == (struct inode *)private);
 
 	/* private indicates an unwritten extent lay beneath this IO */
-- 
cgit v1.2.3


From bd6a1f16fffdfe010fdc2979fd01f12357816762 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:01 -0700
Subject: [PATCH] reiserfs: add checking of journal_begin() return value

Check return values of journal_begin() and journal_end() in the quota code
for reiserfs.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c |  6 +++--
 fs/reiserfs/super.c | 66 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 52 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7b..073425e6e0a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2798,7 +2798,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    struct reiserfs_transaction_handle th;
 
 		    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		    journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+ 		    if (error)
+ 			goto out;
                     error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		    if (error) {
 			journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
@@ -2811,7 +2813,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		    mark_inode_dirty(inode);
-		    journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
 		}
         }
         if (!error)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aae0779ed5b..031577fb41a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1841,13 +1841,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
 static int reiserfs_dquot_initialize(struct inode *inode, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may create quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_initialize(inode, type);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1855,13 +1860,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 static int reiserfs_dquot_drop(struct inode *inode)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may delete quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+ 	goto out;
     ret = dquot_drop(inode);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1869,12 +1879,17 @@ static int reiserfs_dquot_drop(struct inode *inode)
 static int reiserfs_write_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_commit(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1882,12 +1897,17 @@ static int reiserfs_write_dquot(struct dquot *dquot)
 static int reiserfs_acquire_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_acquire(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1895,12 +1915,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
 static int reiserfs_release_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+ 	goto out;
     ret = dquot_release(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1920,13 +1945,18 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 static int reiserfs_write_info(struct super_block *sb, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* Data block + inode block */
     reiserfs_write_lock(sb);
-    journal_begin(&th, sb, 2);
+    ret = journal_begin(&th, sb, 2);
+    if (ret)
+	goto out;
     ret = dquot_commit_info(sb, type);
-    journal_end(&th, sb, 2);
+    err = journal_end(&th, sb, 2);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(sb);
     return ret;
 }
-- 
cgit v1.2.3


From 1f54587bea84a35125c95e19b98c2f464c50871b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:04 -0700
Subject: [PATCH] quota: ext3: Improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve a space
for a quota operation in a transaction only if filesystem was mounted with
some quota options.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/acl.c   |  5 +++--
 fs/ext3/inode.c |  7 ++++---
 fs/ext3/namei.c | 25 +++++++++++++------------
 fs/ext3/super.c | 37 +++++++++++++++++++++++++++----------
 fs/ext3/xattr.c |  2 +-
 5 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 133f5aa581b..3ac38266fc9 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
 		int retries = 0;
 
 	retry:
-		handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+		handle = ext3_journal_start(inode,
+				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			ext3_std_error(inode->i_sb, error);
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
 		acl = NULL;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	error = ext3_set_acl(handle, inode, type, acl);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18d..0b2db4f618c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 	if (needed > EXT3_MAX_TRANS_DATA) 
 		needed = EXT3_MAX_TRANS_DATA;
 
-	return EXT3_DATA_TRANS_BLOCKS + needed;
+	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /* 
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3);
+		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during DQUOT_INIT so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS;
+	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 60e44e6dd7a..50378d8ff84 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1645,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err, retries = 0;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1679,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 		return -EINVAL;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1715,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2006,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2065,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2120,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
 		return -ENAMETOOLONG;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2174,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2216,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * in separate transaction */
 	if (new_dentry->d_inode)
 		DQUOT_INIT(new_dentry->d_inode);
-	handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(old_dir, 2 *
+					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9630fbfdc24..b4b3e8a3913 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
-	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
 };
 
@@ -634,10 +634,10 @@ static match_table_t tokens = {
 	{Opt_grpjquota, "grpjquota=%s"},
 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
-	{Opt_ignore, "grpquota"},
-	{Opt_ignore, "noquota"},
-	{Opt_ignore, "quota"},
-	{Opt_ignore, "usrquota"},
+	{Opt_quota, "grpquota"},
+	{Opt_noquota, "noquota"},
+	{Opt_quota, "quota"},
+	{Opt_quota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
 			}
+			set_opt(sbi->s_mount_opt, QUOTA);
 			break;
 		case Opt_offusrjquota:
 			qtype = USRQUOTA;
@@ -898,6 +899,17 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			sbi->s_jquota_fmt = QFMT_VFS_V0;
 			break;
+		case Opt_quota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			break;
+		case Opt_noquota:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+					"options when quota turned on.\n");
+				return 0;
+			}
+			clear_opt(sbi->s_mount_opt, QUOTA);
+			break;
 #else
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -909,6 +921,9 @@ clear_qf_name:
 				"EXT3-fs: journalled quota options not "
 				"supported.\n");
 			break;
+		case Opt_quota:
+		case Opt_noquota:
+			break;
 #endif
 		case Opt_abort:
 			set_opt(sbi->s_mount_opt, ABORT);
@@ -2238,7 +2253,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
 	int ret, err;
 
 	/* We may create quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_initialize(inode, type);
@@ -2254,7 +2269,7 @@ static int ext3_dquot_drop(struct inode *inode)
 	int ret, err;
 
 	/* We may delete quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_drop(inode);
@@ -2272,7 +2287,7 @@ static int ext3_write_dquot(struct dquot *dquot)
 
 	inode = dquot_to_inode(dquot);
 	handle = ext3_journal_start(inode,
-					EXT3_QUOTA_TRANS_BLOCKS);
+					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -2288,7 +2303,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -2304,7 +2319,7 @@ static int ext3_release_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_release(dquot);
@@ -2361,6 +2376,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	int err;
 	struct nameidata nd;
 
+	if (!test_opt(sb, QUOTA))
+		return -EINVAL;
 	/* Not journalling quota? */
 	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
 	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212d..3f9dfa643b1 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {
-- 
cgit v1.2.3


From 556a2a45bce1740f035befaa7201e4ad836c7257 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:06 -0700
Subject: [PATCH] quota: reiserfs: improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve space
for a quota operation in a transaction only if filesystem was mounted with
some quota option.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c  |  4 ++--
 fs/reiserfs/inode.c | 11 ++++++-----
 fs/reiserfs/namei.c | 25 ++++++++++++++-----------
 fs/reiserfs/super.c | 35 +++++++++++++++++++++++------------
 4 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff187..12e91209544 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region(
     /* If we came here, it means we absolutely need to open a transaction,
        since we need to allocate some blocks */
     reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
-    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
+    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
     if (res)
         goto error_exit;
     reiserfs_update_inode_transaction(inode) ;
@@ -576,7 +576,7 @@ error_exit:
         int err;
         // update any changes we made to blk count
         reiserfs_update_sd(th, inode);
-        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
+        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
         if (err)
             res = err;
     }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 073425e6e0a..0d5817f8197 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page,
 void reiserfs_delete_inode (struct inode * inode)
 {
     /* We need blocks for transaction + (user+group) quota update (possibly delete) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
     struct reiserfs_transaction_handle th ;
   
     reiserfs_write_lock(inode->i_sb);
@@ -591,7 +591,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
        XXX in practically impossible worst case direct2indirect()
        can incur (much) more than 3 balancings.
        quota update for user, group */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
     int version;
     int dangle = 1;
     loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
@@ -2796,14 +2796,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 
                 if (!error) {
 		    struct reiserfs_transaction_handle th;
+		    int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
 
 		    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		    error = journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_begin(&th, inode->i_sb, jbegin_count);
  		    if (error)
  			goto out;
                     error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		    if (error) {
-			journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+			journal_end(&th, inode->i_sb, jbegin_count);
 			goto out;
 		    }
 		    /* Update corresponding info in inode so that everything is in
@@ -2813,7 +2814,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		    mark_inode_dirty(inode);
-		    error = journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_end(&th, inode->i_sb, jbegin_count);
 		}
         }
         if (!error)
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa8..4a333255f27 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
     int retval;
     struct inode * inode;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     struct reiserfs_transaction_handle th ;
     int locked;
 
@@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
     if (!new_valid_dev(rdev))
@@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
@@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
 
 
     /* we will be doing 2 balancings and update 2 stat data, we change quotas
-     * of the owner of the directory and of the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * of the owner of the directory and of the owner of the parent directory.
+     * The quota structure is possibly deleted only on last iput => outside
+     * of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
     inode = dentry->d_inode;
 
     /* in this transaction we can be doing at max two balancings and update
-       two stat datas, we change quotas of the owner of the directory and of
-       the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * two stat datas, we change quotas of the owner of the directory and of
+     * the owner of the parent directory. The quota structure is possibly
+     * deleted only on iput => outside of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir,
     struct reiserfs_transaction_handle th ;
     int mode = S_IFLNK | S_IRWXUGO;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
     if (!(inode = new_inode(parent_dir->i_sb))) {
 	return -ENOMEM ;
@@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct
     struct inode *inode = old_dentry->d_inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + update of quotas for the owners of the directory */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
        pointed initially and (5) maybe block containing ".." of
        renamed directory
        quota updates: two parent directories */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS;
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
     old_inode = old_dentry->d_inode;
     new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 031577fb41a..660aefca1fd 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	{"jdev",	.arg_required = 'j', .values = NULL},
 	{"nolargeio",	.arg_required = 'w', .values = NULL},
 	{"commit",	.arg_required = 'c', .values = NULL},
-	{"usrquota",},
-	{"grpquota",},
+	{"usrquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"grpquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"noquota",	.clrmask = 1<<REISERFS_QUOTA},
 	{"errors", 	.arg_required = 'e', .values = error_actions},
 	{"usrjquota",	.arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
 	{"grpjquota",	.arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
@@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 		    return 0;
 		}
 		strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+		*mount_options |= 1<<REISERFS_QUOTA;
 	    }
 	    else {
 		if (REISERFS_SB(s)->s_qf_names[qtype]) {
@@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
 	return 0;
     }
+    /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
+    if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
+	reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
+	return 0;
+    }
 #endif
+
     return 1;
 }
 
@@ -1105,6 +1113,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
   safe_mask |= 1 << REISERFS_ERROR_RO;
   safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
   safe_mask |= 1 << REISERFS_ERROR_PANIC;
+  safe_mask |= 1 << REISERFS_QUOTA;
 
   /* Update the bitmask, taking care to keep
    * the bits we're not allowed to change here */
@@ -1845,11 +1854,11 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 
     /* We may create quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
     if (ret)
 	goto out;
     ret = dquot_initialize(inode, type);
-    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1864,11 +1873,11 @@ static int reiserfs_dquot_drop(struct inode *inode)
 
     /* We may delete quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
     if (ret)
  	goto out;
     ret = dquot_drop(inode);
-    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1882,11 +1891,11 @@ static int reiserfs_write_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
     if (ret)
 	goto out;
     ret = dquot_commit(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1900,11 +1909,11 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
     if (ret)
 	goto out;
     ret = dquot_acquire(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1918,11 +1927,11 @@ static int reiserfs_release_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
     if (ret)
  	goto out;
     ret = dquot_release(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1978,6 +1987,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
     int err;
     struct nameidata nd;
 
+    if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA)))
+	return -EINVAL;
     err = path_lookup(path, LOOKUP_FOLLOW, &nd);
     if (err)
         return err;
-- 
cgit v1.2.3


From bdd5b29c6bc835dab71148afd5952f9cd278eef1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:37 -0700
Subject: [PATCH] Make reiserfs BUG on too big transaction

Make reiserfs BUG() when somebody tries to start a larger transaction than
it's allowed (currently the code just silently deadlocks).

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee95..7b87707acc3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2631,6 +2631,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup
   int retval;
 
   reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
+  if (nblocks > journal->j_trans_max)
+	BUG();
 
   PROC_INFO_INC( p_s_sb, journal.journal_being );
   /* set here for journal_join */
-- 
cgit v1.2.3


From 52f4fb43063c182f3ef7e257ab336a8be8066bb0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:49 -0700
Subject: [PATCH] nfsd4: find_delegation_file()

Factor out a bit of common code that will be useful elsewhere.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b137580..bb95275e7ba 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1526,6 +1526,18 @@ out:
 	return status;
 }
 
+static struct nfs4_delegation *
+find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
+{
+	struct nfs4_delegation *dp;
+
+	list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
+			return dp;
+	}
+	return NULL;
+}
+
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -2419,24 +2431,15 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-	struct nfs4_delegation *dp = NULL;
 	struct nfs4_file *fp = NULL;
-	u32 st_id;
 
 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                     stid->si_boot, stid->si_stateownerid,
                     stid->si_fileid, stid->si_generation);
 
-	st_id = stid->si_stateownerid;
 	fp = find_file(ino);
-	if (fp) {
-		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
-			if(dp->dl_stateid.si_stateownerid == st_id) {
-				dprintk("NFSD: find_delegation dp %p\n",dp);
-				return dp;
-			}
-		}
-	}
+	if (fp)
+		return find_delegation_file(fp, stid);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 4a6e43e6d4e43723699879f421d321e39eab5e41 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:50 -0700
Subject: [PATCH] nfsd4: nfs4_check_delegmode

Additional minor code reshuffling to prepare for claim_deleg_cur support.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bb95275e7ba..31f7082df49 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1526,6 +1526,15 @@ out:
 	return status;
 }
 
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+		return nfserr_openmode;
+	else
+		return nfs_ok;
+}
+
 static struct nfs4_delegation *
 find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
@@ -1960,15 +1969,6 @@ out:
 	return status;
 }
 
-static inline int
-nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
-{
-	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
-		return nfserr_openmode;
-	else
-		return nfs_ok;
-}
-
 static inline int
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
-- 
cgit v1.2.3


From 567d98292e81033182e3da4c33b41ada9c113447 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:53 -0700
Subject: [PATCH] nfsd4: don't reopen for delegated client

We don't really need to be doing a separate open for every stateid.  And in
the case of an open from a client that already has a delegation on a file, it
unnecessarily results in a delegation recall.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 31f7082df49..3791c9d84da 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1547,6 +1547,24 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
+static void
+nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
+		struct nfs4_delegation **dp)
+{
+	int flags;
+	int status;
+
+	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
+	if (*dp == NULL)
+		return;
+	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
+						RD_STATE : WR_STATE;
+	status = nfs4_check_delegmode(*dp, flags);
+	if (status)
+		*dp = NULL;
+	return;
+}
+
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -1572,21 +1590,28 @@ out:
 
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+		struct nfs4_delegation *dp,
 		struct svc_fh *cur_fh, int flags)
 {
 	struct nfs4_stateid *stp;
-	int status;
 
 	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
 	if (stp == NULL)
 		return nfserr_resource;
 
-	status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
-	if (status) {
-		if (status == nfserr_dropit)
-			status = nfserr_jukebox;
-		kfree(stp);
-		return status;
+	if (dp) {
+		get_file(dp->dl_vfs_file);
+		stp->st_vfs_file = dp->dl_vfs_file;
+	} else {
+		int status;
+		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
+				&stp->st_vfs_file);
+		if (status) {
+			if (status == nfserr_dropit)
+				status = nfserr_jukebox;
+			kfree(stp);
+			return status;
+		}
 	}
 	vfsopen++;
 	*stpp = stp;
@@ -1720,6 +1745,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	struct nfs4_file *fp = NULL;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_stateid *stp = NULL;
+	struct nfs4_delegation *dp = NULL;
 	int status;
 
 	status = nfserr_inval;
@@ -1734,6 +1760,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (fp) {
 		if ((status = nfs4_check_open(fp, open, &stp)))
 			goto out;
+		nfs4_check_deleg(fp, open, &dp);
 	} else {
 		status = nfserr_resource;
 		fp = alloc_init_file(ino);
@@ -1757,7 +1784,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 			flags = MAY_WRITE;
 		else
 			flags = MAY_READ;
-		if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+		status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
+		if (status)
 			goto out;
 		init_stateid(stp, fp, open);
 		status = nfsd4_truncate(rqstp, current_fh, open);
-- 
cgit v1.2.3


From c44c5eeb2c022ddac98a8543c08dc8ff820561dc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:54 -0700
Subject: [PATCH] nfsd4: add open state code for CLAIM_DELEGATE_CUR

State logic for OPEN with claim type CLAIM_DELEGATE_CUR, which the NFSv4
client uses to report local OPENs on a delegated file back to the NFSv4
server.

nfs4_check_deleg() performs input delegation stateid lookup and sanity check.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3791c9d84da..8ac0c9abe94 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1547,22 +1547,28 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
-static void
+static int
 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
 		struct nfs4_delegation **dp)
 {
 	int flags;
-	int status;
+	int status = nfserr_bad_stateid;
 
 	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
 	if (*dp == NULL)
-		return;
+		goto out;
 	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
 						RD_STATE : WR_STATE;
 	status = nfs4_check_delegmode(*dp, flags);
 	if (status)
 		*dp = NULL;
-	return;
+out:
+	if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
+		return nfs_ok;
+	if (status)
+		return status;
+	open->op_stateowner->so_confirmed = 1;
+	return nfs_ok;
 }
 
 static int
@@ -1760,8 +1766,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (fp) {
 		if ((status = nfs4_check_open(fp, open, &stp)))
 			goto out;
-		nfs4_check_deleg(fp, open, &dp);
+		status = nfs4_check_deleg(fp, open, &dp);
+		if (status)
+			goto out;
 	} else {
+		status = nfserr_bad_stateid;
+		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+			goto out;
 		status = nfserr_resource;
 		fp = alloc_init_file(ino);
 		if (fp == NULL)
-- 
cgit v1.2.3


From 0dd3c19212961453817f219cd6200981c38564bc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:56 -0700
Subject: [PATCH] nfsd4: support CLAIM_DELEGATE_CUR

Add OPEN claim type NFS4_OPEN_CLAIM_DELEGATE_CUR to nfsd4_open().

A delegation stateid and a name are provided.  OPEN with O_CREAT is not legal
with this claim type; otherwise, use the NFS4_OPEN_CLAIM_NULL code path to
lookup the filename to be opened.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8b..eb8c1337d9b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -198,6 +198,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	if (status)
 		goto out;
 	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+			status = nfserr_inval;
+			if (open->op_create)
+				goto out;
+			/* fall through */
 		case NFS4_OPEN_CLAIM_NULL:
 			/*
 			 * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +225,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			if (status)
 				goto out;
 			break;
-		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
              	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
 			printk("NFSD: unsupported OPEN claim type %d\n",
 				open->op_claim_type);
-- 
cgit v1.2.3


From 496400014f22c4dbdbc1e89249a2feba46939708 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:58 -0700
Subject: [PATCH] nfsd4: fix fh_expire_type

We're returning NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME for the
fh_expire_type attribute.  This is incorrect:
	1. The spec actually only allows NOEXPIRE_WITH_OPEN when
	   VOLATILE_ANY is also set.
	2. Filehandles for open files can expire, if the file is removed
	   and there is a reboot.
	3. Filehandles are only volatile on rename in the nosubtree check
	   case.

Unfortunately, there's no way to indicate that we only expire on remove.  So
our only choice is FH4_VOLATILE_ANY.  Although it's redundant, we also set
FH4_VOL_RENAME in the subtree check case, since subtreecheck does actually
cause problems in practice and it seems possibly useful to give clients some
way to distinguish that case.

Fix a mispelled #define while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4xdr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112d..0ae1467c3bc 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+			WRITE32(NFS4_FH_VOLATILE_ANY);
+		else
+			WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
 		/*
-- 
cgit v1.2.3


From c815afc73eeef089922449857ca4ed4d2e8950cb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:00 -0700
Subject: [PATCH] nfsd4: block metadata ops during grace period

We currently return err_grace if a user attempts a non-reclaim open during the
grace period.  But we also need to prevent renames and removes, at least, to
ensure clients have the chance to recover state on files before they are moved
or deleted.

Of course, local users could also do renames and removes during the lease
period, and there's not much we can do about that.  This at least will help
with remote users.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eb8c1337d9b..ac4878ac221 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -536,6 +536,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 {
 	int status;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
 	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
 	if (status == nfserr_symlink)
 		return nfserr_notdir;
@@ -554,6 +556,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 
 	if (!save_fh->fh_dentry)
 		return status;
+	if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+					& NFSEXP_NOSUBTREECHECK))
+		return nfserr_grace;
 	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
 			     rename->rn_snamelen, current_fh,
 			     rename->rn_tname, rename->rn_tnamelen);
-- 
cgit v1.2.3


From e60d4398a7c20fbe9c4a6cc39d7188ef9f65d2f1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:01 -0700
Subject: [PATCH] nfsd4: slabify nfs4_files

The structures the server uses to keep track of various pieces of nfsv4 state
(open files, outstanding delegations, etc.) are likely to be allocated and
deallocated frequently and seem reasonable candidates for slab caches.

While we're at it, the slab code keeps statistics that help catch leaks and
such, so we may as well take this chance to eliminate some debugging counters
that we've been keeping ourselves.

Start with the struct nfs4_file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8ac0c9abe94..260c1cbe25c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -70,8 +70,6 @@ u32 list_add_perfile = 0;
 u32 list_del_perfile = 0;
 u32 add_perclient = 0;
 u32 del_perclient = 0;
-u32 alloc_file = 0;
-u32 free_file = 0;
 u32 vfsopen = 0;
 u32 vfsclose = 0;
 u32 alloc_delegation= 0;
@@ -90,6 +88,9 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
  */
 static DECLARE_MUTEX(client_sema);
 
+kmem_cache_t *stateowner_slab = NULL;
+kmem_cache_t *file_slab = NULL;
+
 void
 nfs4_lock_state(void)
 {
@@ -961,14 +962,14 @@ alloc_init_file(struct inode *ino)
 	struct nfs4_file *fp;
 	unsigned int hashval = file_hashval(ino);
 
-	if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
+	if (fp) {
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_perfile);
 		INIT_LIST_HEAD(&fp->fi_del_perfile);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
-		alloc_file++;
 		return fp;
 	}
 	return NULL;
@@ -992,29 +993,41 @@ release_all_files(void)
 	}
 }
 
-kmem_cache_t *stateowner_slab = NULL;
-
-static int
-nfsd4_init_slabs(void)
+static void
+nfsd4_free_slab(kmem_cache_t **slab)
 {
-	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
-			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
-	if (stateowner_slab == NULL) {
-		dprintk("nfsd4: out of memory while initializing nfsv4\n");
-		return -ENOMEM;
-	}
-	return 0;
+	int status;
+
+	if (*slab == NULL)
+		return;
+	status = kmem_cache_destroy(*slab);
+	*slab = NULL;
+	WARN_ON(status);
 }
 
 static void
 nfsd4_free_slabs(void)
 {
-	int status = 0;
+	nfsd4_free_slab(&stateowner_slab);
+	nfsd4_free_slab(&file_slab);
+}
 
-	if (stateowner_slab)
-		status = kmem_cache_destroy(stateowner_slab);
-	stateowner_slab = NULL;
-	BUG_ON(status);
+static int
+nfsd4_init_slabs(void)
+{
+	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
+			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
+	if (stateowner_slab == NULL)
+		goto out_nomem;
+	file_slab = kmem_cache_create("nfsd4_files",
+			sizeof(struct nfs4_file), 0, 0, NULL, NULL);
+	if (file_slab == NULL)
+		goto out_nomem;
+	return 0;
+out_nomem:
+	nfsd4_free_slabs();
+	dprintk("nfsd4: out of memory while initializing nfsv4\n");
+	return -ENOMEM;
 }
 
 void
@@ -1167,10 +1180,9 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 static void
 release_file(struct nfs4_file *fp)
 {
-	free_file++;
 	list_del(&fp->fi_hash);
 	iput(fp->fi_inode);
-	kfree(fp);
+	kmem_cache_free(file_slab, fp);
 }	
 
 void
@@ -3286,8 +3298,6 @@ __nfs4_state_shutdown(void)
 			list_add_perfile, list_del_perfile);
 	dprintk("NFSD: add_perclient %d del_perclient %d\n",
 			add_perclient, del_perclient);
-	dprintk("NFSD: alloc_file %d free_file %d\n",
-			alloc_file, free_file);
 	dprintk("NFSD: vfsopen %d vfsclose %d\n",
 			vfsopen, vfsclose);
 	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-- 
cgit v1.2.3


From 5ac049ac66416bbe84923f7c2384f23f6ee4aa88 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:03 -0700
Subject: [PATCH] nfsd4: slabify stateids

Allocate stateid's from a slab cache.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 260c1cbe25c..c5fce309d87 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -90,6 +90,7 @@ static DECLARE_MUTEX(client_sema);
 
 kmem_cache_t *stateowner_slab = NULL;
 kmem_cache_t *file_slab = NULL;
+kmem_cache_t *stateid_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -1010,6 +1011,7 @@ nfsd4_free_slabs(void)
 {
 	nfsd4_free_slab(&stateowner_slab);
 	nfsd4_free_slab(&file_slab);
+	nfsd4_free_slab(&stateid_slab);
 }
 
 static int
@@ -1023,6 +1025,10 @@ nfsd4_init_slabs(void)
 			sizeof(struct nfs4_file), 0, 0, NULL, NULL);
 	if (file_slab == NULL)
 		goto out_nomem;
+	stateid_slab = kmem_cache_create("nfsd4_stateids",
+			sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
+	if (stateid_slab == NULL)
+		goto out_nomem;
 	return 0;
 out_nomem:
 	nfsd4_free_slabs();
@@ -1173,7 +1179,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 		vfsclose++;
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
-	kfree(stp);
+	kmem_cache_free(stateid_slab, stp);
 	stp = NULL;
 }
 
@@ -1606,6 +1612,12 @@ out:
 	return status;
 }
 
+static inline struct nfs4_stateid *
+nfs4_alloc_stateid(void)
+{
+	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
+}
+
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		struct nfs4_delegation *dp,
@@ -1613,7 +1625,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 {
 	struct nfs4_stateid *stp;
 
-	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+	stp = nfs4_alloc_stateid();
 	if (stp == NULL)
 		return nfserr_resource;
 
@@ -1627,7 +1639,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		if (status) {
 			if (status == nfserr_dropit)
 				status = nfserr_jukebox;
-			kfree(stp);
+			kmem_cache_free(stateid_slab, stp);
 			return status;
 		}
 	}
@@ -2627,8 +2639,8 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	struct nfs4_stateid *stp;
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
-	if ((stp = kmalloc(sizeof(struct nfs4_stateid), 
-					GFP_KERNEL)) == NULL)
+	stp = nfs4_alloc_stateid();
+	if (stp == NULL)
 		goto out;
 	INIT_LIST_HEAD(&stp->st_hash);
 	INIT_LIST_HEAD(&stp->st_perfile);
-- 
cgit v1.2.3


From 5b2d21c1965859acc881dd862b6ebbfae67cdc14 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:04 -0700
Subject: [PATCH] nfsd4: slabify delegations

Allocate delegations from a slab cache.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c5fce309d87..927d2007d5a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -72,8 +72,6 @@ u32 add_perclient = 0;
 u32 del_perclient = 0;
 u32 vfsopen = 0;
 u32 vfsclose = 0;
-u32 alloc_delegation= 0;
-u32 free_delegation= 0;
 
 /* forward declarations */
 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
@@ -91,6 +89,7 @@ static DECLARE_MUTEX(client_sema);
 kmem_cache_t *stateowner_slab = NULL;
 kmem_cache_t *file_slab = NULL;
 kmem_cache_t *stateid_slab = NULL;
+kmem_cache_t *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -138,8 +137,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
 
 	dprintk("NFSD alloc_init_deleg\n");
-	if ((dp = kmalloc(sizeof(struct nfs4_delegation),
-		GFP_KERNEL)) == NULL)
+	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
+	if (dp == NULL)
 		return dp;
 	INIT_LIST_HEAD(&dp->dl_del_perfile);
 	INIT_LIST_HEAD(&dp->dl_del_perclnt);
@@ -164,7 +163,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	atomic_set(&dp->dl_count, 1);
 	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
 	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
-	alloc_delegation++;
 	return dp;
 }
 
@@ -173,8 +171,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
 	if (atomic_dec_and_test(&dp->dl_count)) {
 		dprintk("NFSD: freeing dp %p\n",dp);
-		kfree(dp);
-		free_delegation++;
+		kmem_cache_free(deleg_slab, dp);
 	}
 }
 
@@ -1012,6 +1009,7 @@ nfsd4_free_slabs(void)
 	nfsd4_free_slab(&stateowner_slab);
 	nfsd4_free_slab(&file_slab);
 	nfsd4_free_slab(&stateid_slab);
+	nfsd4_free_slab(&deleg_slab);
 }
 
 static int
@@ -1029,6 +1027,10 @@ nfsd4_init_slabs(void)
 			sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
 	if (stateid_slab == NULL)
 		goto out_nomem;
+	deleg_slab = kmem_cache_create("nfsd4_delegations",
+			sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
+	if (deleg_slab == NULL)
+		goto out_nomem;
 	return 0;
 out_nomem:
 	nfsd4_free_slabs();
@@ -3312,9 +3314,6 @@ __nfs4_state_shutdown(void)
 			add_perclient, del_perclient);
 	dprintk("NFSD: vfsopen %d vfsclose %d\n",
 			vfsopen, vfsclose);
-	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-			alloc_delegation, free_delegation);
-
 }
 
 void
-- 
cgit v1.2.3


From 6fa305ded4cc859deb4727ad9b25df0bbc064e99 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:06 -0700
Subject: [PATCH] nfsd4: remove debugging counters

These remaining debugging counters haven't proved that useful.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 927d2007d5a..f03a4180fa1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -65,14 +65,6 @@ static u32 nfs4_init;
 stateid_t zerostateid;             /* bits all 0 */
 stateid_t onestateid;              /* bits all 1 */
 
-/* debug counters */
-u32 list_add_perfile = 0; 
-u32 list_del_perfile = 0;
-u32 add_perclient = 0;
-u32 del_perclient = 0;
-u32 vfsopen = 0;
-u32 vfsclose = 0;
-
 /* forward declarations */
 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
@@ -192,7 +184,6 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
 	if (dp->dl_flock)
 		setlease(filp, F_UNLCK, &dp->dl_flock);
 	nfsd_close(filp);
-	vfsclose++;
 }
 
 /* Called under the state lock. */
@@ -1083,7 +1074,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
 	list_add(&sop->so_perclient, &clp->cl_perclient);
-	add_perclient++;
 	sop->so_is_open_owner = 1;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -1117,10 +1107,8 @@ unhash_stateowner(struct nfs4_stateowner *sop)
 
 	list_del(&sop->so_idhash);
 	list_del(&sop->so_strhash);
-	if (sop->so_is_open_owner) {
+	if (sop->so_is_open_owner)
 		list_del(&sop->so_perclient);
-		del_perclient++;
-	}
 	list_del(&sop->so_perlockowner);
 	while (!list_empty(&sop->so_perfilestate)) {
 		stp = list_entry(sop->so_perfilestate.next, 
@@ -1151,7 +1139,6 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
-	list_add_perfile++;
 	list_add(&stp->st_perfile, &fp->fi_perfile);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
@@ -1171,14 +1158,12 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 	struct file *filp = stp->st_vfs_file;
 
 	list_del(&stp->st_hash);
-	list_del_perfile++;
 	list_del(&stp->st_perfile);
 	list_del(&stp->st_perfilestate);
 	if (flags & OPEN_STATE) {
 		release_stateid_lockowners(stp);
 		stp->st_vfs_file = NULL;
 		nfsd_close(filp);
-		vfsclose++;
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
 	kmem_cache_free(stateid_slab, stp);
@@ -1645,7 +1630,6 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 			return status;
 		}
 	}
-	vfsopen++;
 	*stpp = stp;
 	return 0;
 }
@@ -2650,7 +2634,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
 	list_add(&stp->st_perfile, &fp->fi_perfile);
-	list_add_perfile++;
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
@@ -3308,12 +3291,6 @@ __nfs4_state_shutdown(void)
 	cancel_delayed_work(&laundromat_work);
 	flush_scheduled_work();
 	nfs4_init = 0;
-	dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
-			list_add_perfile, list_del_perfile);
-	dprintk("NFSD: add_perclient %d del_perclient %d\n",
-			add_perclient, del_perclient);
-	dprintk("NFSD: vfsopen %d vfsclose %d\n",
-			vfsopen, vfsclose);
 }
 
 void
-- 
cgit v1.2.3


From 8beefa249371f55432394ac96864c83b0b309c28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:08 -0700
Subject: [PATCH] nfsd4: rename nfs4_file fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Go ahead and do this for struct nfs4_file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f03a4180fa1..a84a80e8c0c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -153,7 +153,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 		        current_fh->fh_handle.fh_size);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+	list_add(&dp->dl_del_perfile, &fp->fi_delegations);
 	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
 	return dp;
 }
@@ -954,8 +954,8 @@ alloc_init_file(struct inode *ino)
 	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
 	if (fp) {
 		INIT_LIST_HEAD(&fp->fi_hash);
-		INIT_LIST_HEAD(&fp->fi_perfile);
-		INIT_LIST_HEAD(&fp->fi_del_perfile);
+		INIT_LIST_HEAD(&fp->fi_stateids);
+		INIT_LIST_HEAD(&fp->fi_delegations);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
@@ -974,7 +974,7 @@ release_all_files(void)
 		while (!list_empty(&file_hashtbl[i])) {
 			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
 			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
+			if (!list_empty(&fp->fi_stateids) || !list_empty(&fp->fi_delegations)) {
 				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
 			}
 			release_file(fp);
@@ -1139,7 +1139,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
-	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
@@ -1204,7 +1204,7 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
 		move_to_close_lru(sop);
 	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
+	if (list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations)) {
 		release_file(fp);
 	}
 }
@@ -1294,7 +1294,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	fp = find_file(ino);
 	if (fp) {
 	/* Search for conflicting share reservations */
-		list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
+		list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
 			if (test_bit(deny_type, &stp->st_deny_bmap) ||
 			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
 				return nfserr_share_denied;
@@ -1545,7 +1545,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
 	struct nfs4_delegation *dp;
 
-	list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+	list_for_each_entry(dp, &fp->fi_delegations, dl_del_perfile) {
 		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
 			return dp;
 	}
@@ -1583,7 +1583,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
 	int status = nfserr_share_denied;
 	struct nfs4_stateowner *sop = open->op_stateowner;
 
-	list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
 		/* ignore lock owners */
 		if (local->st_stateowner->so_is_open_owner == 0)
 			continue;
@@ -1830,7 +1830,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
 	/* take the opportunity to clean up unused state */
-	if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
+	if (fp && list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations))
 		release_file(fp);
 
 	/* CLAIM_PREVIOUS has different error returns */
@@ -2633,7 +2633,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	INIT_LIST_HEAD(&stp->st_perfilestate);
 	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
-	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
-- 
cgit v1.2.3


From 13cd21845d6a9729ca95e36ae6e8c669623fbfd4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:10 -0700
Subject: [PATCH] nfsd4: reference count struct nfs4_file

Add a struct kref to each nfs4_file and take a reference to it from each
stateid and delegation that refers to it.  The atomicity guarantees are
overkill given that all this stuff is done under the single nfsd4 state lock,
but a) we'd like finer-grained locking some day, and b) this simplifies the
cleanup of the structures a bit, something that has previously been a bit
complicated and bug-prone.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 100 +++++++++++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a84a80e8c0c..6ba428afa43 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -111,7 +111,6 @@ opaque_hashval(const void *ptr, int nbytes)
 /* forward declarations */
 static void release_stateowner(struct nfs4_stateowner *sop);
 static void release_stateid(struct nfs4_stateid *stp, int flags);
-static void release_file(struct nfs4_file *fp);
 
 /*
  * Delegation state
@@ -121,6 +120,27 @@ static void release_file(struct nfs4_file *fp);
 spinlock_t recall_lock;
 static struct list_head del_recall_lru;
 
+static void
+free_nfs4_file(struct kref *kref)
+{
+	struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
+	list_del(&fp->fi_hash);
+	iput(fp->fi_inode);
+	kmem_cache_free(file_slab, fp);
+}
+
+static inline void
+put_nfs4_file(struct nfs4_file *fi)
+{
+	kref_put(&fi->fi_ref, free_nfs4_file);
+}
+
+static inline void
+get_nfs4_file(struct nfs4_file *fi)
+{
+	kref_get(&fi->fi_ref);
+}
+
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
 {
@@ -136,6 +156,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	INIT_LIST_HEAD(&dp->dl_del_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_client = clp;
+	get_nfs4_file(fp);
 	dp->dl_file = fp;
 	dp->dl_flock = NULL;
 	get_file(stp->st_vfs_file);
@@ -163,6 +184,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
 	if (atomic_dec_and_test(&dp->dl_count)) {
 		dprintk("NFSD: freeing dp %p\n",dp);
+		put_nfs4_file(dp->dl_file);
 		kmem_cache_free(deleg_slab, dp);
 	}
 }
@@ -953,6 +975,7 @@ alloc_init_file(struct inode *ino)
 
 	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
 	if (fp) {
+		kref_init(&fp->fi_ref);
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_stateids);
 		INIT_LIST_HEAD(&fp->fi_delegations);
@@ -964,24 +987,6 @@ alloc_init_file(struct inode *ino)
 	return NULL;
 }
 
-static void
-release_all_files(void)
-{
-	int i;
-	struct nfs4_file *fp;
-
-	for (i=0;i<FILE_HASH_SIZE;i++) {
-		while (!list_empty(&file_hashtbl[i])) {
-			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
-			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_stateids) || !list_empty(&fp->fi_delegations)) {
-				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
-			}
-			release_file(fp);
-		}
-	}
-}
-
 static void
 nfsd4_free_slab(kmem_cache_t **slab)
 {
@@ -1141,6 +1146,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1166,18 +1172,11 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 		nfsd_close(filp);
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
+	put_nfs4_file(stp->st_file);
 	kmem_cache_free(stateid_slab, stp);
 	stp = NULL;
 }
 
-static void
-release_file(struct nfs4_file *fp)
-{
-	list_del(&fp->fi_hash);
-	iput(fp->fi_inode);
-	kmem_cache_free(file_slab, fp);
-}	
-
 void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
@@ -1192,7 +1191,6 @@ void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_file *fp = stp->st_file;
 
 	dprintk("NFSD: release_state_owner\n");
 	release_stateid(stp, flag);
@@ -1203,10 +1201,6 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	 */
 	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
 		move_to_close_lru(sop);
-	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations)) {
-		release_file(fp);
-	}
 }
 
 static int
@@ -1236,8 +1230,10 @@ find_file(struct inode *ino)
 	struct nfs4_file *fp;
 
 	list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
-		if (fp->fi_inode == ino)
+		if (fp->fi_inode == ino) {
+			get_nfs4_file(fp);
 			return fp;
+		}
 	}
 	return NULL;
 }
@@ -1288,19 +1284,24 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
+	int ret;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
 	fp = find_file(ino);
-	if (fp) {
+	if (!fp)
+		return nfs_ok;
+	ret = nfserr_share_denied;
 	/* Search for conflicting share reservations */
-		list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
-			if (test_bit(deny_type, &stp->st_deny_bmap) ||
-			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
-				return nfserr_share_denied;
-		}
+	list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
+		if (test_bit(deny_type, &stp->st_deny_bmap) ||
+		    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+			goto out;
 	}
-	return nfs_ok;
+	ret = nfs_ok;
+out:
+	put_nfs4_file(fp);
+	return ret;
 }
 
 static inline void
@@ -1829,10 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
-	/* take the opportunity to clean up unused state */
-	if (fp && list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations))
-		release_file(fp);
-
+	if (fp)
+		put_nfs4_file(fp);
 	/* CLAIM_PREVIOUS has different error returns */
 	nfs4_set_claim_prev(open, &status);
 	/*
@@ -2480,16 +2479,19 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-	struct nfs4_file *fp = NULL;
+	struct nfs4_file *fp;
+	struct nfs4_delegation *dl;
 
 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                     stid->si_boot, stid->si_stateownerid,
                     stid->si_fileid, stid->si_generation);
 
 	fp = find_file(ino);
-	if (fp)
-		return find_delegation_file(fp, stid);
-	return NULL;
+	if (!fp)
+		return NULL;
+	dl = find_delegation_file(fp, stid);
+	put_nfs4_file(fp);
+	return dl;
 }
 
 /*
@@ -2636,6 +2638,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -3287,7 +3290,6 @@ __nfs4_state_shutdown(void)
 		unhash_delegation(dp);
 	}
 
-	release_all_files();
 	cancel_delayed_work(&laundromat_work);
 	flush_scheduled_work();
 	nfs4_init = 0;
-- 
cgit v1.2.3


From 46be925fa6f4796e732e16a020fa0ef9d48ea7c8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:11 -0700
Subject: [PATCH] knfsd: lockd: flush signals on shutdown

Silence another annoying "failed to contact portmap (errno -512)" on shutdown.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912e..6e242556b90 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
 		printk(KERN_DEBUG
 			"lockd: new process, skipping host shutdown\n");
 	wake_up(&lockd_exit);
-		
+
+	flush_signals(current);
+
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-- 
cgit v1.2.3


From 7e06b7f9e9537cb826f3cff95816fc4384b67806 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:13 -0700
Subject: [PATCH] knfsd: nfs4: hold filp while reading or writing

We're trying to read and write from a struct file that we may not hold a
reference to any more (since a close could be processed as soon as we drop the
state lock).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac4878ac221..d71f14517b9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
 #include <linux/param.h>
 #include <linux/major.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -477,26 +478,27 @@ static inline int
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
 	int status;
-	struct file *filp = NULL;
 
 	/* no need to check permission - this will be done in nfsd_read() */
 
+	read->rd_filp = NULL;
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
 	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
-					CHECK_FH | RD_STATE, &filp))) {
+				CHECK_FH | RD_STATE, &read->rd_filp))) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
 	}
+	if (read->rd_filp)
+		get_file(read->rd_filp);
 	status = nfs_ok;
 out:
 	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = current_fh;
-	read->rd_filp = filp;
 	return status;
 }
 
@@ -633,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		goto out;
 	}
+	if (filp)
+		get_file(filp);
 	nfs4_unlock_state();
 
 	write->wr_bytes_written = write->wr_buflen;
@@ -644,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 	status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
 			write->wr_vec, write->wr_vlen, write->wr_buflen,
 			&write->wr_how_written);
+	if (filp)
+		fput(filp);
 
 	if (status == nfserr_symlink)
 		status = nfserr_inval;
@@ -932,6 +938,9 @@ encode_op:
 			nfs4_put_stateowner(replay_owner);
 			replay_owner = NULL;
 		}
+		/* XXX Ugh, we need to get rid of this kind of special case: */
+		if (op->opnum == OP_READ && op->u.read.rd_filp)
+			fput(op->u.read.rd_filp);
 	}
 
 out:
-- 
cgit v1.2.3


From 5ba266d6323e957588712f6a7d31252cd6b797bb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:15 -0700
Subject: [PATCH] knfsd: nfsd4: fix probe_callback

rpc_create_client was modified recently to do its own (synchronous) NULL ping
of the server.  We'd rather do that on our own, asynchronously, so that we
don't have to block the nfsd thread doing the probe, and so that setclientid
handling (hence, client mounts) can proceed normally whether the callback is
succesful or not.  (We can still function fine without the callback
channel--we just won't be able to give out delegations till it's verified to
work.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 634465e9cfc..38c3e1c47d8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -427,7 +427,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	 * XXX AUTH_UNIX only - need AUTH_GSS....
 	 */
 	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
+	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	if (IS_ERR(clnt)) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
-- 
cgit v1.2.3


From dfc8356570b6fcb4035c7d916ade5bbbe6c3b50a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:16 -0700
Subject: [PATCH] knfsd: nfsd4: nfs4_check_open_reclaim cleanup

Minor cleanup.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6ba428afa43..2a5f00b0ee9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3155,11 +3155,7 @@ nfs4_find_reclaim_client(clientid_t *clid)
 int
 nfs4_check_open_reclaim(clientid_t *clid)
 {
-	struct nfs4_client_reclaim *crp;
-
-	if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
-		return nfserr_reclaim_bad;
-	return nfs_ok;
+	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
 
-- 
cgit v1.2.3


From 58da282b733cff4caef805c6555c7a3b90772946 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:19 -0700
Subject: [PATCH] knfsd: nfsd4: create separate laundromat workqueue

We're running the laundromat work on the default kevent worker thread.  But
the laundromat takes the nfsv4 state semaphore, which is used for way too much
stuff, and the potential for deadlocks is high.  Better to have this on a
separate workqueue.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2a5f00b0ee9..9bec088e765 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1844,6 +1844,7 @@ out:
 	return status;
 }
 
+static struct workqueue_struct *laundry_wq;
 static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1951,7 +1952,7 @@ laundromat_main(void *not_used)
 
 	t = nfs4_laundromat();
 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-	schedule_delayed_work(&laundromat_work, t*HZ);
+	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
 }
 
 /* search ownerid_hashtbl[] and close_lru for stateid owner
@@ -3211,7 +3212,8 @@ __nfs4_state_init(void)
 		printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
 	INIT_WORK(&laundromat_work,laundromat_main, NULL);
-	schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
+	laundry_wq = create_singlethread_workqueue("nfsd4");
+	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
 }
 
 int
@@ -3287,7 +3289,8 @@ __nfs4_state_shutdown(void)
 	}
 
 	cancel_delayed_work(&laundromat_work);
-	flush_scheduled_work();
+	flush_workqueue(laundry_wq);
+	destroy_workqueue(laundry_wq);
 	nfs4_init = 0;
 }
 
-- 
cgit v1.2.3


From d99a05adf8490cc171b7709554936b8f3ac9e362 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:21 -0700
Subject: [PATCH] knfsd: nfsd4: simplify lease changing

The only way the protocol gives to change the lease time on the fly is to
simulate a reboot.  We don't have that completely right in the current code;
among other things, we should probably put lockd in grace too while we do
this.

For now, let's just keep this simple, and wait till the next time nfsd starts
to register any changes in lease time.  If the administrator really wants to
change the lease time *now*, they can go ahead and bring nfsd down and then
back up again after changing the lease time.

Also remove the "if (reclaim_str_hashtbl_size == 0)" case, a shortcut which
skips the grace period if we know of no clients in need of recovery.  This
isn't going to work well with nlm.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 63 ++++++++++-------------------------------------------
 1 file changed, 12 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9bec088e765..16c9a43218c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -53,7 +53,7 @@
 
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
-static time_t old_lease_time = 90; /* past incarnation lease time */
+static time_t user_lease_time = 90;
 static u32 nfs4_reclaim_init = 0;
 time_t boot_time;
 static time_t grace_end = 0;
@@ -3205,11 +3205,9 @@ __nfs4_state_init(void)
 	INIT_LIST_HEAD(&del_recall_lru);
 	spin_lock_init(&recall_lock);
 	boot_time = get_seconds();
-	grace_time = max(old_lease_time, lease_time);
-	if (reclaim_str_hashtbl_size == 0)
-		grace_time = 0;
-	if (grace_time)
-		printk("NFSD: starting %ld-second grace period\n", grace_time);
+	grace_time = max(user_lease_time, lease_time);
+	lease_time = user_lease_time;
+	printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
 	INIT_WORK(&laundromat_work,laundromat_main, NULL);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
@@ -3307,53 +3305,16 @@ nfs4_state_shutdown(void)
 /*
  * Called when leasetime is changed.
  *
- * if nfsd is not started, simply set the global lease.
- *
- * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
- * e.g: boot_time is reset, existing nfs4_client structs are
- * used to fill reclaim_str_hashtbl, then all state (except for the
- * reclaim_str_hashtbl) is re-initialized.
- *
- * if the old lease time is greater than the new lease time, the grace
- * period needs to be set to the old lease time to allow clients to reclaim
- * their state. XXX - we may want to set the grace period == lease time
- * after an initial grace period == old lease time
- *
- * if an error occurs in this process, the new lease is set, but the server
- * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
- * which means OPEN/LOCK/READ/WRITE will fail during grace period.
- *
- * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
- * OPEN and LOCK reclaims.
+ * The only way the protocol gives us to handle on-the-fly lease changes is to
+ * simulate a reboot.  Instead of doing that, we just wait till the next time
+ * we start to register any changes in lease time.  If the administrator
+ * really wants to change the lease time *now*, they can go ahead and bring
+ * nfsd down and then back up again after changing the lease time.
  */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-	struct nfs4_client *clp;
-	int i;
-
-	printk("NFSD: New leasetime %ld\n",leasetime);
-	if (!nfs4_init)
-		return;
-	nfs4_lock_state();
-	old_lease_time = lease_time;
-	lease_time = leasetime;
-
-	nfs4_release_reclaim();
-
-	/* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
-			if (!nfs4_client_to_reclaim(clp->cl_name.data,
-						clp->cl_name.len)) {
-				nfs4_release_reclaim();
-				goto init_state;
-			}
-		}
-	}
-init_state:
-	__nfs4_state_shutdown();
-	__nfs4_state_init();
-	nfs4_unlock_state();
+	lock_kernel();
+	user_lease_time = leasetime;
+	unlock_kernel();
 }
-
-- 
cgit v1.2.3


From 7b190fecfa33d72bcf74c9473134c2ad14ae9545 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:23 -0700
Subject: [PATCH] knfsd: nfsd4: delegation recovery

Allow recovery of delegations after reboot.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 36 ++++++++++++++++++++++++++++--------
 fs/nfsd/nfs4xdr.c   |  2 +-
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 16c9a43218c..0f6119714c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1709,14 +1709,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	int status, flag = 0;
 
 	flag = NFS4_OPEN_DELEGATE_NONE;
-	if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
-	     || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
-		goto out;
-
-	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-		flag = NFS4_OPEN_DELEGATE_WRITE;
-	else
-		flag = NFS4_OPEN_DELEGATE_READ;
+	open->op_recall = 0;
+	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			if (!atomic_read(&cb->cb_set))
+				open->op_recall = 1;
+			flag = open->op_delegate_type;
+			if (flag == NFS4_OPEN_DELEGATE_NONE)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_NULL:
+			/* Let's not give out any delegations till everyone's
+			 * had the chance to reclaim theirs.... */
+			if (nfs4_in_grace())
+				goto out;
+			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+				goto out;
+			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+				flag = NFS4_OPEN_DELEGATE_WRITE;
+			else
+				flag = NFS4_OPEN_DELEGATE_READ;
+			break;
+		default:
+			goto out;
+	}
 
 	dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
 	if (dp == NULL) {
@@ -1750,6 +1766,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	             dp->dl_stateid.si_fileid,
 	             dp->dl_stateid.si_generation);
 out:
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+			&& flag == NFS4_OPEN_DELEGATE_NONE
+			&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+		printk("NFSD: WARNING: refusing delegation reclaim\n");
 	open->op_delegate_type = flag;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0ae1467c3bc..cfe978a72ce 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1972,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
 	case NFS4_OPEN_DELEGATE_READ:
 		RESERVE_SPACE(20 + sizeof(stateid_t));
 		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
-		WRITE32(0);
+		WRITE32(open->op_recall);
 
 		/*
 		 * TODO: ACE's in delegations
-- 
cgit v1.2.3


From 76a3550ec50ed86885a10a767ebaebb7c9104721 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:26 -0700
Subject: [PATCH] knfsd: nfsd4: rename nfs4_state_init

Somewhat gratuitous rename to simplify following patch.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 6 +++---
 fs/nfsd/nfssvc.c    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f6119714c8..e00b3472851 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3185,7 +3185,7 @@ nfs4_check_open_reclaim(clientid_t *clid)
  */
 
 static void
-__nfs4_state_init(void)
+__nfs4_state_start(void)
 {
 	int i;
 	time_t grace_time;
@@ -3235,7 +3235,7 @@ __nfs4_state_init(void)
 }
 
 int
-nfs4_state_init(void)
+nfs4_state_start(void)
 {
 	int status;
 
@@ -3244,7 +3244,7 @@ nfs4_state_init(void)
 	status = nfsd4_init_slabs();
 	if (status)
 		return status;
-	__nfs4_state_init();
+	__nfs4_state_start();
 	nfs4_init = 1;
 	return 0;
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 904df604e86..07b9a065e9d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -95,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	error = nfs4_state_init();
+	error = nfs4_state_start();
 	if (error<0)
 		goto out;
 	if (!nfsd_serv) {
-- 
cgit v1.2.3


From ac4d8ff2a57179de3ef7834c6ab3fac430b0a05d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:30 -0700
Subject: [PATCH] knfsd: nfsd4: clean up state initialization

Separate out stuff that needs initialization on startup from stuff that only
needs initialization on module init from static data.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 35 +++++++++++++++++------------------
 fs/nfsd/nfsctl.c    |  1 +
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e00b3472851..1f68ce36e72 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -117,7 +117,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
  */
 
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock;
+spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
 
 static void
@@ -3179,23 +3179,13 @@ nfs4_check_open_reclaim(clientid_t *clid)
 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+/* initialization to perform at module load time: */
 
-/* 
- * Start and stop routines
- */
-
-static void
-__nfs4_state_start(void)
+void
+nfs4_state_init(void)
 {
 	int i;
-	time_t grace_time;
 
-	if (!nfs4_reclaim_init) {
-		for (i = 0; i < CLIENT_HASH_SIZE; i++)
-			INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
-		reclaim_str_hashtbl_size = 0;
-		nfs4_reclaim_init = 1;
-	}
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3217,19 +3207,28 @@ __nfs4_state_start(void)
 		INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
 		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
 	}
-	memset(&zerostateid, 0, sizeof(stateid_t));
 	memset(&onestateid, ~0, sizeof(stateid_t));
-
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	spin_lock_init(&recall_lock);
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+	reclaim_str_hashtbl_size = 0;
+	nfs4_reclaim_init = 1;
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+static void
+__nfs4_state_start(void)
+{
+	time_t grace_time;
+
 	boot_time = get_seconds();
 	grace_time = max(user_lease_time, lease_time);
 	lease_time = user_lease_time;
 	printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
-	INIT_WORK(&laundromat_work,laundromat_main, NULL);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f7..3d56531a7a0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -397,6 +397,7 @@ static int __init init_nfsd(void)
 	nfsd_cache_init();	/* RPC reply cache */
 	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
+	nfs4_state_init();	/* NFSv4 locking state */
 #ifdef CONFIG_NFSD_V4
 	nfsd_idmap_init();      /* Name to ID mapping */
 #endif /* CONFIG_NFSD_V4 */
-- 
cgit v1.2.3


From 707d4ab7b3aa6d1f7a7d2cd123fb83ba9a528205 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:32 -0700
Subject: [PATCH] knfsd: nfsd4: remove nfs4_reclaim_init

nfs4_reclaim_init is no longer performing any useful function.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1f68ce36e72..fb9b4eb4302 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,7 +54,6 @@
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
-static u32 nfs4_reclaim_init = 0;
 time_t boot_time;
 static time_t grace_end = 0;
 static u32 current_clientid = 1;
@@ -3128,7 +3127,6 @@ nfs4_release_reclaim(void)
 	struct nfs4_client_reclaim *crp = NULL;
 	int i;
 
-	BUG_ON(!nfs4_reclaim_init);
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&reclaim_str_hashtbl[i])) {
 			crp = list_entry(reclaim_str_hashtbl[i].next,
@@ -3214,7 +3212,6 @@ nfs4_state_init(void)
 	for (i = 0; i < CLIENT_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	reclaim_str_hashtbl_size = 0;
-	nfs4_reclaim_init = 1;
 }
 
 /* initialization to perform when the nfsd service is started: */
-- 
cgit v1.2.3


From bd0b1e954e3ba3e5d2cab941458cf98206471bd2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:35 -0700
Subject: [PATCH] knfsd: nfsd4: idmap initialization

Adopt standard kernel style by defining a no-op function instead of putting
ifdef's in the code where the function is called.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsctl.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3d56531a7a0..3da43a3ed32 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -398,9 +398,7 @@ static int __init init_nfsd(void)
 	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
 	nfs4_state_init();	/* NFSv4 locking state */
-#ifdef CONFIG_NFSD_V4
 	nfsd_idmap_init();      /* Name to ID mapping */
-#endif /* CONFIG_NFSD_V4 */
 	if (proc_mkdir("fs/nfs", NULL)) {
 		struct proc_dir_entry *entry;
 		entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -427,9 +425,7 @@ static void __exit exit_nfsd(void)
 	remove_proc_entry("fs/nfs", NULL);
 	nfsd_stat_shutdown();
 	nfsd_lockd_shutdown();
-#ifdef CONFIG_NFSD_V4
 	nfsd_idmap_shutdown();
-#endif /* CONFIG_NFSD_V4 */
 	unregister_filesystem(&nfsd_fs_type);
 }
 
-- 
cgit v1.2.3


From 7dea9d280c96f90382ec5d5709433e66a0993ec9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:42 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid simplification

We can be a little more concise here.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb9b4eb4302..2a59d176e69 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -674,23 +674,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * or different ip_address
 		 */
 		status = nfserr_clid_inuse;
-		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
+		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)
+				|| clp->cl_addr != ip_addr) {
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
 			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 			goto out;
 		}
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-
-		/* 
-	 	 * cl_name match from a previous SETCLIENTID operation
-	 	 * XXX check for additional matches?
-		 */
 		conf = clp;
 		break;
 	}
-- 
cgit v1.2.3


From a55370a3c0106106a975c5a09cee800611d0cf50 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:52 -0700
Subject: [PATCH] knfsd: nfsd4: reboot hash

For the purposes of reboot recovery we keep a directory with subdirectories
each having a name that is the ascii hex representation of the md5 sum of a
client identifier for an active client.

This adds the code to calculate that name.  We also use it for the purposes of
comparing clients, so if someone ever manages to find two client names that
are md5 collisions, then we'll return clid_inuse to the second.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig            |  2 ++
 fs/nfsd/Makefile      |  2 +-
 fs/nfsd/nfs4recover.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   | 80 ++++++++++++++++++++----------------------
 4 files changed, 138 insertions(+), 43 deletions(-)
 create mode 100644 fs/nfsd/nfs4recover.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index a7c0cc3203c..5c704d05627 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1413,6 +1413,8 @@ config NFSD_V4
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	depends on NFSD_V3 && EXPERIMENTAL
 	select NFSD_TCP
+	select CRYPTO_MD5
+	select CRYPTO
 	help
 	  If you would like to include the NFSv4 server as well as the NFSv2
 	  and NFSv3 servers, say Y here.  This feature is experimental, and
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9f043f44c92..ce341dc76d5 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,5 +10,5 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o nfs4callback.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o
 nfsd-objs		:= $(nfsd-y)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 00000000000..841a305d794
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,97 @@
+/*
+*  linux/fs/nfsd/nfs4recover.c
+*
+*  Copyright (c) 2004 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Andy Adamson <andros@citi.umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <asm/uaccess.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+static void
+md5_to_hex(char *out, char *md5)
+{
+	int i;
+
+	for (i=0; i<16; i++) {
+		unsigned char c = md5[i];
+
+		*out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+		*out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+	}
+	*out = '\0';
+}
+
+int
+nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+{
+	struct xdr_netobj cksum;
+	struct crypto_tfm *tfm;
+	struct scatterlist sg[1];
+	int status = nfserr_resource;
+
+	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+			clname->len, clname->data);
+	tfm = crypto_alloc_tfm("md5", 0);
+	if (tfm == NULL)
+		goto out;
+	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+	if (cksum.data == NULL)
+ 		goto out;
+	crypto_digest_init(tfm);
+
+	sg[0].page = virt_to_page(clname->data);
+	sg[0].offset = offset_in_page(clname->data);
+	sg[0].length = clname->len;
+
+	crypto_digest_update(tfm, sg, 1);
+	crypto_digest_final(tfm, cksum.data);
+
+	md5_to_hex(dname, cksum.data);
+
+	kfree(cksum.data);
+	status = nfs_ok;
+out:
+	if (tfm)
+		crypto_free_tfm(tfm);
+	return status;
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2a59d176e69..0be0b37c84e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -231,8 +231,8 @@ unhash_delegation(struct nfs4_delegation *dp)
 
 #define clientid_hashval(id) \
 	((id) & CLIENT_HASH_MASK)
-#define clientstr_hashval(name, namelen) \
-	(opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+#define clientstr_hashval(name) \
+	(opaque_hashval((name), 8) & CLIENT_HASH_MASK)
 /*
  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
  * used in reboot/reset lease grace period processing
@@ -366,11 +366,12 @@ expire_client(struct nfs4_client *clp)
 }
 
 static struct nfs4_client *
-create_client(struct xdr_netobj name) {
+create_client(struct xdr_netobj name, char *recdir) {
 	struct nfs4_client *clp;
 
 	if (!(clp = alloc_client(name)))
 		goto out;
+	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_callback.cb_set, 0);
 	clp->cl_callback.cb_parsed = 0;
@@ -403,11 +404,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
 	get_group_info(target->cr_group_info);
 }
 
-static int
-cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
-	if (!n1 || !n2)
-		return 0;
-	return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
+static inline int
+same_name(const char *n1, const char *n2) {
+	return 0 == memcmp(n1, n2, HEXDIR_LEN);
 }
 
 static int
@@ -479,8 +478,7 @@ move_to_confirmed(struct nfs4_client *clp)
 	list_del_init(&clp->cl_strhash);
 	list_del_init(&clp->cl_idhash);
 	list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-	strhashval = clientstr_hashval(clp->cl_name.data, 
-			clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
 	renew_client(clp);
 }
@@ -651,22 +649,27 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	unsigned int 		strhashval;
 	struct nfs4_client *	conf, * unconf, * new, * clp;
 	int 			status;
+	char                    dname[HEXDIR_LEN];
 	
 	status = nfserr_inval;
 	if (!check_name(clname))
 		goto out;
 
+	status = nfs4_make_rec_clidname(dname, &clname);
+	if (status)
+		goto out;
+
 	/* 
 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
 	 * We get here on a DRC miss.
 	 */
 
-	strhashval = clientstr_hashval(clname.data, clname.len);
+	strhashval = clientstr_hashval(dname);
 
 	conf = NULL;
 	nfs4_lock_state();
 	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
+		if (!same_name(clp->cl_recdir, dname))
 			continue;
 		/* 
 		 * CASE 0:
@@ -686,7 +689,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	}
 	unconf = NULL;
 	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
+		if (!same_name(clp->cl_recdir, dname))
 			continue;
 		/* cl_name match from a previous SETCLIENTID operation */
 		unconf = clp;
@@ -700,7 +703,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 */
 		if (unconf)
 			expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new, &clverifier);
 		new->cl_addr = ip_addr;
@@ -728,7 +732,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
 				expire_client(unconf);
 		}
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&conf->cl_verifier);
 		new->cl_addr = ip_addr;
@@ -746,7 +751,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * using input clverifier, clname, and callback info
 		 * and generate a new cl_clientid and cl_confirm.
 		 */
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -772,7 +778,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * new cl_verifier and a new cl_confirm
 		 */
 		expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -856,7 +863,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
-	    (cmp_name(&conf->cl_name,&unconf->cl_name))  &&
+	    (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
@@ -876,7 +883,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	if ((conf && !unconf) || 
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
-	      !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
+	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
 		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -3074,39 +3081,28 @@ out:
 }
 
 static inline struct nfs4_client_reclaim *
-alloc_reclaim(int namelen)
+alloc_reclaim(void)
 {
-	struct nfs4_client_reclaim *crp = NULL;
-
-	crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
-	if (!crp)
-		return NULL;
-	crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
-	if (!crp->cr_name.data) {
-		kfree(crp);
-		return NULL;
-	}
-	return crp;
+	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
 static int
-nfs4_client_to_reclaim(char *name, int namlen)
+nfs4_client_to_reclaim(char *name)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
-	crp = alloc_reclaim(namlen);
+	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+	crp = alloc_reclaim();
 	if (!crp)
 		return 0;
-	strhashval = clientstr_hashval(name, namlen);
+	strhashval = clientstr_hashval(name);
 	INIT_LIST_HEAD(&crp->cr_strhash);
 	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_name.data, name, namlen);
-	crp->cr_name.len = namlen;
+	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
 	reclaim_str_hashtbl_size++;
 	return 1;
 }
@@ -3122,7 +3118,6 @@ nfs4_release_reclaim(void)
 			crp = list_entry(reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
 			list_del(&crp->cr_strhash);
-			kfree(crp->cr_name.data);
 			kfree(crp);
 			reclaim_str_hashtbl_size--;
 		}
@@ -3145,13 +3140,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
 	if (clp == NULL)
 		return NULL;
 
-	dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
-		            clp->cl_name.len, clp->cl_name.data);
+	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
+		            clp->cl_name.len, clp->cl_name.data,
+			    clp->cl_recdir);
 
 	/* find clp->cl_name in reclaim_str_hashtbl */
-	strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-		if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
 			return crp;
 		}
 	}
-- 
cgit v1.2.3


From 28ce6054f1de89834e5e9522f9e451cd88a35f85 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:56 -0700
Subject: [PATCH] knfsd: nfsd4: add find_{un}conf_by_str functions to simplify
 setclientid

Minor setclientid cleanup

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0be0b37c84e..72f2b6fdae6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -509,6 +509,30 @@ find_unconfirmed_client(clientid_t *clid)
 	return NULL;
 }
 
+static struct nfs4_client *
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
 /* a helper function for parse_callback */
 static int
 parse_octet(unsigned int *lenp, char **addrp)
@@ -647,7 +671,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	};
 	nfs4_verifier		clverifier = setclid->se_verf;
 	unsigned int 		strhashval;
-	struct nfs4_client *	conf, * unconf, * new, * clp;
+	struct nfs4_client	*conf, *unconf, *new;
 	int 			status;
 	char                    dname[HEXDIR_LEN];
 	
@@ -666,35 +690,24 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 
 	strhashval = clientstr_hashval(dname);
 
-	conf = NULL;
 	nfs4_lock_state();
-	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
-		if (!same_name(clp->cl_recdir, dname))
-			continue;
+	conf = find_confirmed_client_by_str(dname, strhashval);
+	if (conf) {
 		/* 
 		 * CASE 0:
 		 * clname match, confirmed, different principal
 		 * or different ip_address
 		 */
 		status = nfserr_clid_inuse;
-		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)
-				|| clp->cl_addr != ip_addr) {
+		if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
+				|| conf->cl_addr != ip_addr) {
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
 			goto out;
 		}
-		conf = clp;
-		break;
-	}
-	unconf = NULL;
-	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-		if (!same_name(clp->cl_recdir, dname))
-			continue;
-		/* cl_name match from a previous SETCLIENTID operation */
-		unconf = clp;
-		break;
 	}
+	unconf = find_unconfirmed_client_by_str(dname, strhashval);
 	status = nfserr_resource;
 	if (!conf) {
 		/* 
-- 
cgit v1.2.3


From a76b4319ca85b5e3a8098470c623a272d40271cd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:01 -0700
Subject: [PATCH] knfsd: nfsd4: grace period end

For the purposes of reboot recovery, we want to do some work during the
transition period at the end of the grace period.  Some of that work must be
guaranteed to have a certain relationship with the end of the grace period, so
we want to control the transition there.

Our approach is to modify the in_grace() checks to consult a global variable
instead of checking the time directly, to schedule the first run of the
laundromat thread at the end of the grace period, and to set the global
end-of-grace-period there.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 72f2b6fdae6..1b2f67f5eef 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,7 +55,7 @@
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
 time_t boot_time;
-static time_t grace_end = 0;
+static int in_grace = 1;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
@@ -1908,6 +1908,13 @@ out:
 	return status;
 }
 
+static void
+end_grace(void)
+{
+	dprintk("NFSD: end of grace period\n");
+	in_grace = 0;
+}
+
 time_t
 nfs4_laundromat(void)
 {
@@ -1922,6 +1929,8 @@ nfs4_laundromat(void)
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
+	if (in_grace)
+		end_grace();
 	list_for_each_safe(pos, next, &client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -3223,10 +3232,10 @@ __nfs4_state_start(void)
 	boot_time = get_seconds();
 	grace_time = max(user_lease_time, lease_time);
 	lease_time = user_lease_time;
+	in_grace = 1;
 	printk("NFSD: starting %ld-second grace period\n", grace_time);
-	grace_end = boot_time + grace_time;
 	laundry_wq = create_singlethread_workqueue("nfsd4");
-	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
+	queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
 }
 
 int
@@ -3247,14 +3256,7 @@ nfs4_state_start(void)
 int
 nfs4_in_grace(void)
 {
-	return get_seconds() < grace_end;
-}
-
-void
-set_no_grace(void)
-{
-	printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
-	grace_end = get_seconds();
+	return in_grace;
 }
 
 time_t
-- 
cgit v1.2.3


From fd39ca9a808c6026989bc2188868a0574eb37108 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:03 -0700
Subject: [PATCH] knfsd: nfsd4: make needlessly global code static

This patch contains the following possible cleanups:

- make needlessly global code static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4acl.c      |  4 ++--
 fs/nfsd/nfs4callback.c |  7 +++----
 fs/nfsd/nfs4idmap.c    | 12 +++++------
 fs/nfsd/nfs4state.c    | 57 ++++++++++++++++++++++++++------------------------
 fs/nfsd/nfs4xdr.c      |  4 ++--
 5 files changed, 43 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa5..4a2105552ac 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
 static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
 static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
 int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
-int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
 
 struct nfs4_acl *
 nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
 	return pacl;
 }
 
-int
+static int
 nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
 {
 	struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 38c3e1c47d8..68bb245491f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
 
 /* declarations */
 static void nfs4_cb_null(struct rpc_task *task);
-extern spinlock_t recall_lock;
 
 /* Index of predefined Linux callback client operations */
 
@@ -329,12 +328,12 @@ out:
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
 }
 
-struct rpc_procinfo     nfs4_cb_procedures[] = {
+static struct rpc_procinfo     nfs4_cb_procedures[] = {
     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
 };
 
-struct rpc_version              nfs_cb_version4 = {
+static struct rpc_version       nfs_cb_version4 = {
         .number                 = 1,
         .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
         .procs                  = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version *	nfs_cb_version[] = {
 /*
  * Use the SETCLIENTID credential
  */
-struct rpc_cred *
+static struct rpc_cred *
 nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
 {
         struct auth_cred acred;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841cf..5605a26efc5 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
 	ent_init(new, itm);
 }
 
-void
+static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
 	if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
 static int         idtoname_parse(struct cache_detail *, char *, int);
 static struct ent *idtoname_lookup(struct ent *, int);
 
-struct cache_detail idtoname_cache = {
+static struct cache_detail idtoname_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= idtoname_table,
 	.name		= "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
 	return hash_str(ent->name, ENT_HASHBITS);
 }
 
-void
+static void
 nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
     int *blen)
 {
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 }
 
 static struct ent *nametoid_lookup(struct ent *, int);
-int                nametoid_parse(struct cache_detail *, char *, int);
+static int         nametoid_parse(struct cache_detail *, char *, int);
 
-struct cache_detail nametoid_cache = {
+static struct cache_detail nametoid_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= nametoid_table,
 	.name		= "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
 	.warn_no_listener = warn_no_idmapd,
 };
 
-int
+static int
 nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 {
 	struct ent ent, *res;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1b2f67f5eef..8a5f777b1e9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,18 +54,21 @@
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
-time_t boot_time;
+static time_t boot_time;
 static int in_grace = 1;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
 static u32 current_delegid = 1;
 static u32 nfs4_init;
-stateid_t zerostateid;             /* bits all 0 */
-stateid_t onestateid;              /* bits all 1 */
+static stateid_t zerostateid;             /* bits all 0 */
+static stateid_t onestateid;              /* bits all 1 */
+
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
 
 /* forward declarations */
-struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
 
@@ -77,10 +80,10 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
  */
 static DECLARE_MUTEX(client_sema);
 
-kmem_cache_t *stateowner_slab = NULL;
-kmem_cache_t *file_slab = NULL;
-kmem_cache_t *stateid_slab = NULL;
-kmem_cache_t *deleg_slab = NULL;
+static kmem_cache_t *stateowner_slab = NULL;
+static kmem_cache_t *file_slab = NULL;
+static kmem_cache_t *stateid_slab = NULL;
+static kmem_cache_t *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -116,7 +119,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
  */
 
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
 
 static void
@@ -456,7 +459,7 @@ check_name(struct xdr_netobj name) {
 	return 1;
 }
 
-void
+static void
 add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 {
 	unsigned int idhashval;
@@ -468,7 +471,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 	clp->cl_time = get_seconds();
 }
 
-void
+static void
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -567,7 +570,7 @@ parse_octet(unsigned int *lenp, char **addrp)
 }
 
 /* parse and set the setclientid ipv4 callback address */
-int
+static int
 parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
 {
 	int temp = 0;
@@ -603,7 +606,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
 	return 1;
 }
 
-void
+static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
 	struct nfs4_callback *cb = &clp->cl_callback;
@@ -1186,7 +1189,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 	stp = NULL;
 }
 
-void
+static void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
 	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1196,7 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
 	sop->so_time = get_seconds();
 }
 
-void
+static void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
 	struct nfs4_stateowner *sop = stp->st_stateowner;
@@ -1250,7 +1253,7 @@ find_file(struct inode *ino)
 #define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
 #define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
 
-void
+static void
 set_access(unsigned int *access, unsigned long bmap) {
 	int i;
 
@@ -1261,7 +1264,7 @@ set_access(unsigned int *access, unsigned long bmap) {
 	}
 }
 
-void
+static void
 set_deny(unsigned int *deny, unsigned long bmap) {
 	int i;
 
@@ -1287,7 +1290,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
  * Called to check deny when READ with all zero stateid or
  * WRITE with all zero or all one stateid
  */
-int
+static int
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
 	struct inode *ino = current_fh->fh_dentry->d_inode;
@@ -1442,7 +1445,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-struct lock_manager_operations nfsd_lease_mng_ops = {
+static struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1915,7 +1918,7 @@ end_grace(void)
 	in_grace = 0;
 }
 
-time_t
+static time_t
 nfs4_laundromat(void)
 {
 	struct nfs4_client *clp;
@@ -1996,7 +1999,7 @@ laundromat_main(void *not_used)
 /* search ownerid_hashtbl[] and close_lru for stateid owner
  * (stateid->si_stateownerid)
  */
-struct nfs4_stateowner *
+static struct nfs4_stateowner *
 find_openstateowner_id(u32 st_id, int flags) {
 	struct nfs4_stateowner *local = NULL;
 
@@ -2170,7 +2173,7 @@ out:
 /* 
  * Checks for sequence id mutating operations. 
  */
-int
+static int
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
 {
 	int status;
@@ -2486,7 +2489,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
 static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
 static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 find_stateid(stateid_t *stid, int flags)
 {
 	struct nfs4_stateid *local = NULL;
@@ -2550,7 +2553,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 		lock->fl_end = OFFSET_MAX;
 }
 
-int
+static int
 nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
 {
 	struct nfs4_stateowner *local = NULL;
@@ -2660,7 +2663,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	return sop;
 }
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateid *stp;
@@ -2691,7 +2694,7 @@ out:
 	return stp;
 }
 
-int
+static int
 check_lock_length(u64 offset, u64 length)
 {
 	return ((length == 0)  || ((length != ~(u64)0) &&
@@ -3149,7 +3152,7 @@ nfs4_release_reclaim(void)
 
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-struct nfs4_client_reclaim *
+static struct nfs4_client_reclaim *
 nfs4_find_reclaim_client(clientid_t *clid)
 {
 	unsigned int strhashval;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index cfe978a72ce..91fb171d2ac 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error:					\
 	}					\
 } while (0)
 
-u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
 	 * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
 	return 0;
 }
 
-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
 {
 	void *new = NULL;
 	if (p == argp->tmp) {
-- 
cgit v1.2.3


From 31f4a6c127f619886bf97f643e546f7788248f3f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:06 -0700
Subject: [PATCH] knfsd: nfsd4: fix uncomfirmed list

Setclientid code assumes there is only one match in unconfirmed list.
Make sure that assumption holds.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8a5f777b1e9..67a038dc0d0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -743,10 +743,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * nfs4_client,  but with the new callback info and a 
 		 * new cl_confirm
 		 */
-		if ((unconf) && 
-		    cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
-		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
-				expire_client(unconf);
+		if (unconf) {
+			/* Note this is removing unconfirmed {*x***},
+			 * which is stronger than RFC recommended {vxc**}.
+			 * This has the advantage that there is at most
+			 * one {*x***} in either list at any time.
+			 */
+			expire_client(unconf);
 		}
 		new = create_client(clname, dname);
 		if (new == NULL)
-- 
cgit v1.2.3


From 1a69c179a28a9bb9f4d086927b192d5cffe88e50 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:08 -0700
Subject: [PATCH] knfsd: nfsd4: fix setclientid_confirm cases

Setclientid_confirm code confused states 1 and 3 (numbering from the
IMPLEMENTATION section of rfc3530, section 14.2.33).  Fix this.

State 1 allows the client to change the callback channel on the fly.  We don't
implement this currently, so just turn off the callback channel in this case.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 67a038dc0d0..997343c2304 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -887,10 +887,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
 		else {
-			expire_client(conf);
-			clp = unconf;
-			move_to_confirmed(unconf);
+			/* XXX: We just turn off callbacks until we can handle
+			  * change request correctly. */
+			clp = conf;
+			clp->cl_callback.cb_parsed = 0;
+			gen_confirm(clp);
+			expire_client(unconf);
 			status = nfs_ok;
+
 		}
 		goto out;
 	} 
@@ -920,9 +924,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
-			status = nfs_ok;
+			unsigned int hash =
+				clientstr_hashval(unconf->cl_recdir);
+			conf = find_confirmed_client_by_str(unconf->cl_recdir,
+									hash);
+			if (conf) {
+				expire_client(conf);
+			}
 			clp = unconf;
 			move_to_confirmed(unconf);
+			status = nfs_ok;
 		}
 		goto out;
 	}
-- 
cgit v1.2.3


From 22de4d837439071a0bec897485d3911383b6ffad Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:09 -0700
Subject: [PATCH] knfsd: nfsd4: fix setclientid_confirm error return

NFS4_INVAL is not a valid error for setclientid_confirm, and INUSE is the more
logical error here anyway.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 997343c2304..2dc6da74cce 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -849,12 +849,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	nfs4_lock_state();
 	clp = find_confirmed_client(clid);
 	if (clp) {
-		status = nfserr_inval;
-		/* 
-		 * Found a record for this clientid. If the IP addresses
-		 * don't match, return ERR_INVAL just as if the record had
-		 * not been found.
-		 */
+		status = nfserr_clid_inuse;
 		if (clp->cl_addr != ip_addr) { 
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
@@ -865,7 +860,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	}
 	clp = find_unconfirmed_client(clid);
 	if (clp) {
-		status = nfserr_inval;
+		status = nfserr_clid_inuse;
 		if (clp->cl_addr != ip_addr) { 
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
@@ -949,7 +944,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		goto out;
 	}
 	/* check that we have hit one of the cases...*/
-	status = nfserr_inval;
+	status = nfserr_clid_inuse;
 	goto out;
 out:
 	if (!status)
-- 
cgit v1.2.3


From 08e8987c37cd04d2df211c1e019d8f165d44266e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:11 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid_confirm gotoectomy

Change from "goto" to "else if" format in setclientid_confirm.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2dc6da74cce..9014dc2a632 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -891,14 +891,13 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			status = nfs_ok;
 
 		}
-		goto out;
 	} 
 	/* CASE 2:
 	 * conf record that matches input clientid.
 	 * if unconf record that matches input clientid, then unconf->cl_name
 	 * or unconf->cl_verifier don't match the conf record.
 	 */
-	if ((conf && !unconf) || 
+	else if ((conf && !unconf) ||
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
 	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
@@ -908,14 +907,13 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			clp = conf;
 			status = nfs_ok;
 		}
-		goto out;
 	}
 	/* CASE 3:
 	 * conf record not found.
 	 * unconf record found. 
 	 * unconf->cl_confirm matches input confirm
 	 */ 
-	if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+	else if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -930,7 +928,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			move_to_confirmed(unconf);
 			status = nfs_ok;
 		}
-		goto out;
 	}
 	/* CASE 4:
 	 * conf record not found, or if conf, then conf->cl_confirm does not
@@ -938,14 +935,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	 * unconf record not found, or if unconf, then unconf->cl_confirm 
 	 * does not match input confirm.
 	 */
-	if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
+	else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
 	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
 		status = nfserr_stale_clientid;
-		goto out;
 	}
-	/* check that we have hit one of the cases...*/
-	status = nfserr_clid_inuse;
-	goto out;
+	else {
+		/* check that we have hit one of the cases...*/
+		status = nfserr_clid_inuse;
+	}
 out:
 	if (!status)
 		nfsd4_probe_callback(clp);
-- 
cgit v1.2.3


From 7c79f7377cd4f2a50d51475f4c7966a3e60596a7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:13 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid_confirm comments

Trivial whitespace and comment fixes.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 59 ++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9014dc2a632..f100eeab545 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -869,16 +869,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		}
 		unconf = clp;
 	}
-	/* CASE 1: 
-	* unconf record that matches input clientid and input confirm.
-	* conf record that matches input clientid.
-	* conf  and unconf records match names, verifiers 
-	*/
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
 	    (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+		/* CASE 1:
+		* unconf record that matches input clientid and input confirm.
+		* conf record that matches input clientid.
+		* conf and unconf records match names, verifiers
+		*/
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
 		else {
@@ -891,29 +891,29 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			status = nfs_ok;
 
 		}
-	} 
-	/* CASE 2:
-	 * conf record that matches input clientid.
-	 * if unconf record that matches input clientid, then unconf->cl_name
-	 * or unconf->cl_verifier don't match the conf record.
-	 */
-	else if ((conf && !unconf) ||
+	} else if ((conf && !unconf) ||
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
 	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
+		/* CASE 2:
+		 * conf record that matches input clientid.
+		 * if unconf record matches input clientid, then
+		 * unconf->cl_name or unconf->cl_verifier don't match the
+		 * conf record.
+		 */
 		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
 			clp = conf;
 			status = nfs_ok;
 		}
-	}
-	/* CASE 3:
-	 * conf record not found.
-	 * unconf record found. 
-	 * unconf->cl_confirm matches input confirm
-	 */ 
-	else if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+	} else if (!conf && unconf
+			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
+		/* CASE 3:
+		 * conf record not found.
+		 * unconf record found.
+		 * unconf->cl_confirm matches input confirm
+		 */
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -928,18 +928,17 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			move_to_confirmed(unconf);
 			status = nfs_ok;
 		}
-	}
-	/* CASE 4:
-	 * conf record not found, or if conf, then conf->cl_confirm does not
-	 * match input confirm.
-	 * unconf record not found, or if unconf, then unconf->cl_confirm 
-	 * does not match input confirm.
-	 */
-	else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
-	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
+	} else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
+	    && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
+				    				&confirm)))) {
+		/* CASE 4:
+		 * conf record not found, or if conf, conf->cl_confirm does not
+		 * match input confirm.
+		 * unconf record not found, or if unconf, unconf->cl_confirm
+		 * does not match input confirm.
+		 */
 		status = nfserr_stale_clientid;
-	}
-	else {
+	} else {
 		/* check that we have hit one of the cases...*/
 		status = nfserr_clid_inuse;
 	}
-- 
cgit v1.2.3


From 21ab45a480ec7705d177e959ebf452d62340c004 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:14 -0700
Subject: [PATCH] knfsd: nfsd4: miscellaneous setclientid_confirm cleanup

Minor cleanup, remove some unnecessary printk's.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 49 +++++++++++++++++--------------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f100eeab545..88411519811 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -834,7 +834,7 @@ int
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
-	struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
 	int status;
@@ -847,28 +847,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	 */
 
 	nfs4_lock_state();
-	clp = find_confirmed_client(clid);
-	if (clp) {
-		status = nfserr_clid_inuse;
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		conf = clp;
-	}
-	clp = find_unconfirmed_client(clid);
-	if (clp) {
-		status = nfserr_clid_inuse;
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		unconf = clp;
-	}
+
+	conf = find_confirmed_client(clid);
+	unconf = find_unconfirmed_client(clid);
+
+	status = nfserr_clid_inuse;
+	if (conf && conf->cl_addr != ip_addr)
+		goto out;
+	if (unconf && unconf->cl_addr != ip_addr)
+		goto out;
+
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
@@ -884,9 +872,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			clp = conf;
-			clp->cl_callback.cb_parsed = 0;
-			gen_confirm(clp);
+			conf->cl_callback.cb_parsed = 0;
+			gen_confirm(conf);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -901,12 +888,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		 * unconf->cl_name or unconf->cl_verifier don't match the
 		 * conf record.
 		 */
-		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
 			status = nfserr_clid_inuse;
-		} else {
-			clp = conf;
+		else
 			status = nfs_ok;
-		}
 	} else if (!conf && unconf
 			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
 		/* CASE 3:
@@ -924,8 +909,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			if (conf) {
 				expire_client(conf);
 			}
-			clp = unconf;
 			move_to_confirmed(unconf);
+			conf = unconf;
 			status = nfs_ok;
 		}
 	} else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
@@ -944,7 +929,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	}
 out:
 	if (!status)
-		nfsd4_probe_callback(clp);
+		nfsd4_probe_callback(conf);
 	nfs4_unlock_state();
 	return status;
 }
-- 
cgit v1.2.3


From ea1da636e956ad1591a74904f23d98bbc26a644b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:17 -0700
Subject: [PATCH] knfsd: nfsd4: rename state list fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Already done for struct nfs4_file; go ahead and do it for the other nfsd4
state structures.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 78 ++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 88411519811..22e76e3f06a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -154,8 +154,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
 	if (dp == NULL)
 		return dp;
-	INIT_LIST_HEAD(&dp->dl_del_perfile);
-	INIT_LIST_HEAD(&dp->dl_del_perclnt);
+	INIT_LIST_HEAD(&dp->dl_perfile);
+	INIT_LIST_HEAD(&dp->dl_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_client = clp;
 	get_nfs4_file(fp);
@@ -176,8 +176,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 		        current_fh->fh_handle.fh_size);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	list_add(&dp->dl_del_perfile, &fp->fi_delegations);
-	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+	list_add(&dp->dl_perfile, &fp->fi_delegations);
+	list_add(&dp->dl_perclnt, &clp->cl_delegations);
 	return dp;
 }
 
@@ -214,8 +214,8 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-	list_del_init(&dp->dl_del_perfile);
-	list_del_init(&dp->dl_del_perclnt);
+	list_del_init(&dp->dl_perfile);
+	list_del_init(&dp->dl_perclnt);
 	spin_lock(&recall_lock);
 	list_del_init(&dp->dl_recall_lru);
 	spin_unlock(&recall_lock);
@@ -345,11 +345,11 @@ expire_client(struct nfs4_client *clp)
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
-	while (!list_empty(&clp->cl_del_perclnt)) {
-		dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+	while (!list_empty(&clp->cl_delegations)) {
+		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
 		dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
 				dp->dl_flock);
-		list_del_init(&dp->dl_del_perclnt);
+		list_del_init(&dp->dl_perclnt);
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&recall_lock);
@@ -361,8 +361,8 @@ expire_client(struct nfs4_client *clp)
 	list_del(&clp->cl_idhash);
 	list_del(&clp->cl_strhash);
 	list_del(&clp->cl_lru);
-	while (!list_empty(&clp->cl_perclient)) {
-		sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+	while (!list_empty(&clp->cl_openowners)) {
+		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_stateowner(sop);
 	}
 	put_nfs4_client(clp);
@@ -380,8 +380,8 @@ create_client(struct xdr_netobj name, char *recdir) {
 	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
-	INIT_LIST_HEAD(&clp->cl_perclient);
-	INIT_LIST_HEAD(&clp->cl_del_perclnt);
+	INIT_LIST_HEAD(&clp->cl_openowners);
+	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
 out:
 	return clp;
@@ -1074,13 +1074,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
 	INIT_LIST_HEAD(&sop->so_close_lru);
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perclient, &clp->cl_perclient);
+	list_add(&sop->so_perclient, &clp->cl_openowners);
 	sop->so_is_open_owner = 1;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -1098,10 +1098,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateowner *lock_sop;
 
-	while (!list_empty(&open_stp->st_perlockowner)) {
-		lock_sop = list_entry(open_stp->st_perlockowner.next,
-				struct nfs4_stateowner, so_perlockowner);
-		/* list_del(&open_stp->st_perlockowner);  */
+	while (!list_empty(&open_stp->st_lockowners)) {
+		lock_sop = list_entry(open_stp->st_lockowners.next,
+				struct nfs4_stateowner, so_perstateid);
+		/* list_del(&open_stp->st_lockowners);  */
 		BUG_ON(lock_sop->so_is_open_owner);
 		release_stateowner(lock_sop);
 	}
@@ -1116,10 +1116,10 @@ unhash_stateowner(struct nfs4_stateowner *sop)
 	list_del(&sop->so_strhash);
 	if (sop->so_is_open_owner)
 		list_del(&sop->so_perclient);
-	list_del(&sop->so_perlockowner);
-	while (!list_empty(&sop->so_perfilestate)) {
-		stp = list_entry(sop->so_perfilestate.next, 
-			struct nfs4_stateid, st_perfilestate);
+	list_del(&sop->so_perstateid);
+	while (!list_empty(&sop->so_stateids)) {
+		stp = list_entry(sop->so_stateids.next,
+			struct nfs4_stateid, st_perstateowner);
 		if (sop->so_is_open_owner)
 			release_stateid(stp, OPEN_STATE);
 		else
@@ -1141,11 +1141,11 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
 	INIT_LIST_HEAD(&stp->st_hash);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner);
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners);
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
@@ -1167,7 +1167,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 
 	list_del(&stp->st_hash);
 	list_del(&stp->st_perfile);
-	list_del(&stp->st_perfilestate);
+	list_del(&stp->st_perstateowner);
 	if (flags & OPEN_STATE) {
 		release_stateid_lockowners(stp);
 		stp->st_vfs_file = NULL;
@@ -1201,7 +1201,7 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	 * released by the laundromat service after the lease period
 	 * to enable us to handle CLOSE replay
 	 */
-	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+	if (sop->so_confirmed && list_empty(&sop->so_stateids))
 		move_to_close_lru(sop);
 }
 
@@ -1548,7 +1548,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
 	struct nfs4_delegation *dp;
 
-	list_for_each_entry(dp, &fp->fi_delegations, dl_del_perfile) {
+	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
 		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
 			return dp;
 	}
@@ -1892,7 +1892,7 @@ nfsd4_renew(clientid_t *clid)
 	}
 	renew_client(clp);
 	status = nfserr_cb_path_down;
-	if (!list_empty(&clp->cl_del_perclnt)
+	if (!list_empty(&clp->cl_delegations)
 			&& !atomic_read(&clp->cl_callback.cb_set))
 		goto out;
 	status = nfs_ok;
@@ -2634,13 +2634,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);
 	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+	list_add(&sop->so_perstateid, &open_stp->st_lockowners);
 	sop->so_is_open_owner = 0;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -2664,11 +2664,11 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 		goto out;
 	INIT_LIST_HEAD(&stp->st_hash);
 	INIT_LIST_HEAD(&stp->st_perfile);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
@@ -3081,8 +3081,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 		/* check for any locks held by any stateid
 		 * associated with the (lock) stateowner */
 		status = nfserr_locks_held;
-		list_for_each_entry(stp, &local->so_perfilestate,
-				st_perfilestate) {
+		list_for_each_entry(stp, &local->so_stateids,
+				st_perstateowner) {
 			if (check_for_locks(stp->st_vfs_file, local))
 				goto out;
 		}
-- 
cgit v1.2.3


From 3e9e3dbe0fe36c824ce2c5d7b05997c87a64bbdc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:20 -0700
Subject: [PATCH] knfsd: nfsd4: allow multiple lockowners

>From the language of rfc3530 section 8.1.3 (e.g., the suggestion that a
"process id" might be a reasonable lockowner value) it's conceivable that a
client might want to use the same lockowner string on multiple files, so we may
as well allow that.  We expect each use of open_to_lockowner to create a
distinct seqid stream, though.

For now we're also allowing multiple uses of open_to_lockowner with the same
open, though it seems unlikely clients would actually do that.

Also add a comment reminding myself of some very non-scalable data structures.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 76 +++++++++++++++++++++++------------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 22e76e3f06a..26d00465c28 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2583,22 +2583,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 		deny->ld_type = NFS4_WRITE_LT;
 }
 
-static struct nfs4_stateowner *
-find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
-{
-	struct nfs4_stateowner *local = NULL;
-	int i;
-
-	for (i = 0; i < LOCK_HASH_SIZE; i++) {
-		list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
-			if (!cmp_owner_str(local, owner, clid))
-				continue;
-			return local;
-		}
-	}
-	return NULL;
-}
-
 static struct nfs4_stateowner *
 find_lockstateowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
@@ -2697,7 +2681,7 @@ check_lock_length(u64 offset, u64 length)
 int
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
 {
-	struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+	struct nfs4_stateowner *open_sop = NULL;
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
@@ -2756,16 +2740,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
 				open_sop->so_client->cl_clientid.cl_id, 
 				&lock->v.new.owner);
-		/* 
-		 * If we already have this lock owner, the client is in 
-		 * error (or our bookeeping is wrong!) 
-		 * for asking for a 'new lock'.
-		 */
-		status = nfserr_bad_stateid;
-		lock_sop = find_lockstateowner(&lock->v.new.owner,
-						&lock->v.new.clientid);
-		if (lock_sop)
-			goto out;
+		/* XXX: Do we need to check for duplicate stateowners on
+		 * the same file, or should they just be allowed (and
+		 * create new stateids)? */
 		status = nfserr_resource;
 		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
 			goto out;
@@ -3056,8 +3033,11 @@ int
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
-	struct nfs4_stateowner *local = NULL;
+	struct nfs4_stateowner *sop;
+	struct nfs4_stateid *stp;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
+	struct list_head matches;
+	int i;
 	int status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -3073,22 +3053,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 
 	nfs4_lock_state();
 
-	status = nfs_ok;
-	local = find_lockstateowner(owner, clid);
-	if (local) {
-		struct nfs4_stateid *stp;
-
-		/* check for any locks held by any stateid
-		 * associated with the (lock) stateowner */
-		status = nfserr_locks_held;
-		list_for_each_entry(stp, &local->so_stateids,
-				st_perstateowner) {
-			if (check_for_locks(stp->st_vfs_file, local))
-				goto out;
+	status = nfserr_locks_held;
+	/* XXX: we're doing a linear search through all the lockowners.
+	 * Yipes!  For now we'll just hope clients aren't really using
+	 * release_lockowner much, but eventually we have to fix these
+	 * data structures. */
+	INIT_LIST_HEAD(&matches);
+	for (i = 0; i < LOCK_HASH_SIZE; i++) {
+		list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
+			if (!cmp_owner_str(sop, owner, clid))
+				continue;
+			list_for_each_entry(stp, &sop->so_stateids,
+					st_perstateowner) {
+				if (check_for_locks(stp->st_vfs_file, sop))
+					goto out;
+				/* Note: so_perclient unused for lockowners,
+				 * so it's OK to fool with here. */
+				list_add(&sop->so_perclient, &matches);
+			}
 		}
-		/* no locks held by (lock) stateowner */
-		status = nfs_ok;
-		release_stateowner(local);
+	}
+	/* Clients probably won't expect us to return with some (but not all)
+	 * of the lockowner state released; so don't release any until all
+	 * have been checked. */
+	status = nfs_ok;
+	list_for_each_entry(sop, &matches, so_perclient) {
+		release_stateowner(sop);
 	}
 out:
 	nfs4_unlock_state();
-- 
cgit v1.2.3


From cb36d6345752fa24827044c68e15f6708a40d9f6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:23 -0700
Subject: [PATCH] knfsd: nfsd4: remove cb_parsed

The cb_parsed field is only used by probe_callback, to determine whether the
callback information has been filled in by setclientid.  But there is no way
that probe_callback() can be called without that having already happened, so
that check is superfluous, as is cb_parsed.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 4 +---
 fs/nfsd/nfs4state.c    | 5 +----
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 68bb245491f..583c0710e45 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -386,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	char                    hostname[32];
 	int status;
 
-	dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
-			cb->cb_parsed, atomic_read(&cb->cb_set));
-	if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+	if (atomic_read(&cb->cb_set))
 		return;
 
 	/* Initialize address */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 26d00465c28..0b47a97e953 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -377,7 +377,6 @@ create_client(struct xdr_netobj name, char *recdir) {
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_callback.cb_set, 0);
-	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
@@ -620,14 +619,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 		goto out_err;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
-	cb->cb_parsed = 1;
 	return;
 out_err:
 	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
-	cb->cb_parsed = 0;
 	return;
 }
 
@@ -872,7 +869,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			conf->cl_callback.cb_parsed = 0;
+			atomic_set(&conf->cl_callback.cb_set, 0);
 			gen_confirm(conf);
 			expire_client(unconf);
 			status = nfs_ok;
-- 
cgit v1.2.3


From 190e4fbf96037e5e526ba3210f2bcc2a3b6fe964 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:25 -0700
Subject: [PATCH] knfsd: nfsd4: initialize recovery directory

NFSv4 clients are required to know what state they have on the server so that
they can reclaim it on server reboot.  However, it is possible for
pathalogical combinations of server reboots and network partitions to leave a
client in a state where it cannot know whether it has lost its state on the
server.

For this reason, rfc3530 requires that we store some information about clients
to stable storage.

So we maintain a directory /var/lib/nfs/v4recovery with a subdirectory for
each client with active state.  We leave open the possibility of including
files underneath each such subdirectory with information about the client, but
for now the subdirectories are empty.

We create a client subdirectory whenever a client makes its first non-reclaim
open_confirm.

We remove a client subdirectory whenever either
        a) its lease expires, or
	b) the grace period ends without it reclaiming anything.
When handling reclaims, we allow the reclaim if and only if the client doing
the reclaim has a subdirectory.

This patch adds just the code to scan the recovery directory on nfsd startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   |  18 +++++-
 2 files changed, 182 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 841a305d794..2dc9851a1d3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -39,6 +39,9 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/param.h>
+#include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/scatterlist.h>
 #include <linux/crypto.h>
@@ -46,6 +49,27 @@
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
+/* Globals */
+char recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static struct nameidata rec_dir;
+static int rec_dir_init = 0;
+
+static void
+nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+{
+	*saveuid = current->fsuid;
+	*savegid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+}
+
+static void
+nfs4_reset_user(uid_t saveuid, gid_t savegid)
+{
+	current->fsuid = saveuid;
+	current->fsgid = savegid;
+}
+
 static void
 md5_to_hex(char *out, char *md5)
 {
@@ -95,3 +119,145 @@ out:
 		crypto_free_tfm(tfm);
 	return status;
 }
+
+typedef int (recdir_func)(struct dentry *, struct dentry *);
+
+struct dentry_list {
+	struct dentry *dentry;
+	struct list_head list;
+};
+
+struct dentry_list_arg {
+	struct list_head dentries;
+	struct dentry *parent;
+};
+
+static int
+nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
+		loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct dentry_list_arg *dla = arg;
+	struct list_head *dentries = &dla->dentries;
+	struct dentry *parent = dla->parent;
+	struct dentry *dentry;
+	struct dentry_list *child;
+
+	if (name && isdotent(name, namlen))
+		return nfs_ok;
+	dentry = lookup_one_len(name, parent, namlen);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	child = kmalloc(sizeof(*child), GFP_KERNEL);
+	if (child == NULL)
+		return -ENOMEM;
+	child->dentry = dentry;
+	list_add(&child->list, dentries);
+	return 0;
+}
+
+static int
+nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
+{
+	struct file *filp;
+	struct dentry_list_arg dla = {
+		.parent = dir,
+	};
+	struct list_head *dentries = &dla.dentries;
+	struct dentry_list *child;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
+			O_RDWR);
+	status = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out;
+	INIT_LIST_HEAD(dentries);
+	status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
+	fput(filp);
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		status = f(dir, child->dentry);
+		if (status)
+			goto out;
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+out:
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+	nfs4_reset_user(uid, gid);
+	return status;
+}
+
+static int
+load_recdir(struct dentry *parent, struct dentry *child)
+{
+	if (child->d_name.len != HEXDIR_LEN - 1) {
+		printk("nfsd4: illegal name %s in recovery directory\n",
+				child->d_name.name);
+		/* Keep trying; maybe the others are OK: */
+		return nfs_ok;
+	}
+	nfs4_client_to_reclaim(child->d_name.name);
+	return nfs_ok;
+}
+
+int
+nfsd4_recdir_load(void) {
+	int status;
+
+	status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+	if (status)
+		printk("nfsd4: failed loading clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return status;
+}
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+void
+nfsd4_init_recdir(char *rec_dirname)
+{
+	uid_t			uid = 0;
+	gid_t			gid = 0;
+	int 			status;
+
+	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+			rec_dirname);
+
+	BUG_ON(rec_dir_init);
+
+	nfs4_save_user(&uid, &gid);
+
+	status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
+	if (status == -ENOENT)
+		printk("NFSD: recovery directory %s doesn't exist\n",
+				rec_dirname);
+
+	if (!status)
+		rec_dir_init = 1;
+	nfs4_reset_user(uid, gid);
+}
+
+void
+nfsd4_shutdown_recdir(void)
+{
+	if (!rec_dir_init)
+		return;
+	rec_dir_init = 0;
+	path_release(&rec_dir);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0b47a97e953..6b9d23c39af 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -71,6 +71,7 @@ static stateid_t onestateid;              /* bits all 1 */
 static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+extern char recovery_dirname[];
 
 /* Locking:
  *
@@ -3091,8 +3092,8 @@ alloc_reclaim(void)
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-static int
-nfs4_client_to_reclaim(char *name)
+int
+nfs4_client_to_reclaim(const char *name)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
@@ -3202,6 +3203,17 @@ nfs4_state_init(void)
 	reclaim_str_hashtbl_size = 0;
 }
 
+static void
+nfsd4_load_reboot_recovery_data(void)
+{
+	int status;
+
+	nfsd4_init_recdir(recovery_dirname);
+	status = nfsd4_recdir_load();
+	if (status)
+		printk("NFSD: Failure reading reboot recovery data\n");
+}
+
 /* initialization to perform when the nfsd service is started: */
 
 static void
@@ -3228,6 +3240,7 @@ nfs4_state_start(void)
 	status = nfsd4_init_slabs();
 	if (status)
 		return status;
+	nfsd4_load_reboot_recovery_data();
 	__nfs4_state_start();
 	nfs4_init = 1;
 	return 0;
@@ -3286,6 +3299,7 @@ __nfs4_state_shutdown(void)
 	cancel_delayed_work(&laundromat_work);
 	flush_workqueue(laundry_wq);
 	destroy_workqueue(laundry_wq);
+	nfsd4_shutdown_recdir();
 	nfs4_init = 0;
 }
 
-- 
cgit v1.2.3


From c7b9a45927e74c81d6562153f7fde9d32da00159 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:30 -0700
Subject: [PATCH] knfsd: nfsd4: reboot recovery

This patch adds the code to create and remove client subdirectories from the
recovery directory, as described in the previous patch comment.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   |  16 +++++
 2 files changed, 185 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2dc9851a1d3..2805c5245ea 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -120,6 +120,70 @@ out:
 	return status;
 }
 
+static int
+nfsd4_rec_fsync(struct dentry *dentry)
+{
+	struct file *filp;
+	int status = nfs_ok;
+
+	dprintk("NFSD: nfs4_fsync_rec_dir\n");
+	filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
+	if (IS_ERR(filp)) {
+		status = PTR_ERR(filp);
+		goto out;
+	}
+	if (filp->f_op && filp->f_op->fsync)
+		status = filp->f_op->fsync(filp, filp->f_dentry, 0);
+	fput(filp);
+out:
+	if (status)
+		printk("nfsd4: unable to sync recovery directory\n");
+	return status;
+}
+
+int
+nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+	char *dname = clp->cl_recdir;
+	struct dentry *dentry;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
+
+	if (!rec_dir_init || clp->cl_firststate)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	/* lock the parent */
+	down(&rec_dir.dentry->d_inode->i_sem);
+
+	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		goto out_unlock;
+	}
+	status = -EEXIST;
+	if (dentry->d_inode) {
+		dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
+		goto out_put;
+	}
+	status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+out_put:
+	dput(dentry);
+out_unlock:
+	up(&rec_dir.dentry->d_inode->i_sem);
+	if (status == 0) {
+		clp->cl_firststate = 1;
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	}
+	nfs4_reset_user(uid, gid);
+	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
+	return status;
+}
+
 typedef int (recdir_func)(struct dentry *, struct dentry *);
 
 struct dentry_list {
@@ -201,6 +265,111 @@ out:
 	return status;
 }
 
+static int
+nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	if (!S_ISREG(dir->d_inode->i_mode)) {
+		printk("nfsd4: non-file found in client recovery directory\n");
+		return -EINVAL;
+	}
+	down(&dir->d_inode->i_sem);
+	status = vfs_unlink(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	/* For now this directory should already be empty, but we empty it of
+	 * any regular files anyway, just in case the directory was created by
+	 * a kernel from the future.... */
+	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
+	down(&dir->d_inode->i_sem);
+	status = vfs_rmdir(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen)
+{
+	struct dentry *dentry;
+	int status;
+
+	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+
+	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		return status;
+	}
+	status = -ENOENT;
+	if (!dentry->d_inode)
+		goto out;
+
+	status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+out:
+	dput(dentry);
+	return status;
+}
+
+void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init || !clp->cl_firststate)
+		return;
+
+	nfs4_save_user(&uid, &gid);
+	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+	nfs4_reset_user(uid, gid);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("NFSD: Failed to remove expired client state directory"
+				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+	return;
+}
+
+static int
+purge_old(struct dentry *parent, struct dentry *child)
+{
+	int status;
+
+	if (nfs4_has_reclaimed_state(child->d_name.name))
+		return nfs_ok;
+
+	status = nfsd4_clear_clid_dir(parent, child);
+	if (status)
+		printk("failed to remove client recovery directory %s\n",
+				child->d_name.name);
+	/* Keep trying, success or failure: */
+	return nfs_ok;
+}
+
+void
+nfsd4_recdir_purge_old(void) {
+	int status;
+
+	if (!rec_dir_init)
+		return;
+	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("nfsd4: failed to purge old clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return;
+}
+
 static int
 load_recdir(struct dentry *parent, struct dentry *child)
 {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b9d23c39af..6cca358cd65 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -905,6 +905,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			conf = find_confirmed_client_by_str(unconf->cl_recdir,
 									hash);
 			if (conf) {
+				nfsd4_remove_clid_dir(conf);
 				expire_client(conf);
 			}
 			move_to_confirmed(unconf);
@@ -1691,6 +1692,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
 			*status = nfserr_reclaim_bad;
 		else {
 			open->op_stateowner->so_confirmed = 1;
+			open->op_stateowner->so_client->cl_firststate = 1;
 			open->op_stateowner->so_seqid--;
 		}
 	}
@@ -1903,6 +1905,7 @@ static void
 end_grace(void)
 {
 	dprintk("NFSD: end of grace period\n");
+	nfsd4_recdir_purge_old();
 	in_grace = 0;
 }
 
@@ -1932,6 +1935,7 @@ nfs4_laundromat(void)
 		}
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
+		nfsd4_remove_clid_dir(clp);
 		expire_client(clp);
 	}
 	INIT_LIST_HEAD(&reaplist);
@@ -2320,6 +2324,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 		         stp->st_stateid.si_stateownerid,
 		         stp->st_stateid.si_fileid,
 		         stp->st_stateid.si_generation);
+
+	nfsd4_create_clid_dir(sop->so_client);
 out:
 	if (oc->oc_stateowner)
 		nfs4_get_stateowner(oc->oc_stateowner);
@@ -3089,6 +3095,16 @@ alloc_reclaim(void)
 	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
+int
+nfs4_has_reclaimed_state(const char *name)
+{
+	unsigned int strhashval = clientstr_hashval(name);
+	struct nfs4_client *clp;
+
+	clp = find_confirmed_client_by_str(name, strhashval);
+	return clp ? 1 : 0;
+}
+
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-- 
cgit v1.2.3


From 0964a3d3f1aa96468091924f6b0c391a46dc6d0b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:32 -0700
Subject: [PATCH] knfsd: nfsd4 reboot dirname fix

Set the recovery directory via /proc/fs/nfsd/nfs4recoverydir.

It may be changed any time, but is used only on startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c |  1 -
 fs/nfsd/nfs4state.c   | 37 +++++++++++++++++++++++++++++++++++--
 fs/nfsd/nfsctl.c      | 23 +++++++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2805c5245ea..095f1740f3a 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -50,7 +50,6 @@
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
-char recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 static struct nameidata rec_dir;
 static int rec_dir_init = 0;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6cca358cd65..89e36526d7f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/namei.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -71,7 +72,8 @@ static stateid_t onestateid;              /* bits all 1 */
 static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
-extern char recovery_dirname[];
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static void nfs4_set_recdir(char *recdir);
 
 /* Locking:
  *
@@ -3224,8 +3226,10 @@ nfsd4_load_reboot_recovery_data(void)
 {
 	int status;
 
-	nfsd4_init_recdir(recovery_dirname);
+	nfs4_lock_state();
+	nfsd4_init_recdir(user_recovery_dirname);
 	status = nfsd4_recdir_load();
+	nfs4_unlock_state();
 	if (status)
 		printk("NFSD: Failure reading reboot recovery data\n");
 }
@@ -3329,6 +3333,35 @@ nfs4_state_shutdown(void)
 	nfs4_unlock_state();
 }
 
+static void
+nfs4_set_recdir(char *recdir)
+{
+	nfs4_lock_state();
+	strcpy(user_recovery_dirname, recdir);
+	nfs4_unlock_state();
+}
+
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+	int status;
+	struct nameidata nd;
+
+	status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
+	if (status)
+		return status;
+	status = -ENOTDIR;
+	if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
+		nfs4_set_recdir(recdir);
+		status = 0;
+	}
+	path_release(&nd);
+	return status;
+}
+
 /*
  * Called when leasetime is changed.
  *
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3da43a3ed32..841c562991e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
 	NFSD_Fh,
 	NFSD_Threads,
 	NFSD_Leasetime,
+	NFSD_RecoveryDir,
 };
 
 /*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Leasetime] = write_leasetime,
+	[NFSD_RecoveryDir] = write_recoverydir,
 };
 
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 	return strlen(buf);
 }
 
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+	char *mesg = buf;
+	char *recdir;
+	int len, status;
+
+	if (size > PATH_MAX || buf[size-1] != '\n')
+		return -EINVAL;
+	buf[size-1] = 0;
+
+	recdir = mesg;
+	len = qword_get(&mesg, recdir, size);
+	if (len <= 0)
+		return -EINVAL;
+
+	status = nfs4_reset_recoverydir(recdir);
+	return strlen(buf);
+}
+
 /*----------------------------------------------------------------------------*/
 /*
  *	populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
 #endif
 		/* last one */ {""}
 	};
-- 
cgit v1.2.3


From 3d41088fa327782b14b5659dbcfff62ec704c23c Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Thu, 23 Jun 2005 22:05:21 -0700
Subject: [PATCH] DocBook: update comments

This patch updates some comments to match code changes.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysfs/file.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e9cfa39f409..d72c1ce4855 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,7 +14,7 @@
 #define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
 #define to_sattr(a) container_of(a,struct subsys_attribute,attr)
 
-/**
+/*
  * Subsystem file operations.
  * These operations allow subsystems to have files that can be 
  * read/written. 
@@ -192,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 
 /**
  *	flush_write_buffer - push buffer to kobject.
- *	@file:		file pointer.
+ *	@dentry:	dentry to the attribute
  *	@buffer:	data buffer for file.
+ *	@count:		number of bytes
  *
  *	Get the correct pointers for the kobject and the attribute we're
  *	dealing with, then call the store() method for the attribute, 
-- 
cgit v1.2.3


From ceffc078528befc008c6f2c2c4decda79eabd534 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:25 -0700
Subject: [PATCH] xip: fs/mm: execute in place

- generic_file* file operations do no longer have a xip/non-xip split
- filemap_xip.c implements a new set of fops that require get_xip_page
  aop to work proper. all new fops are exported GPL-only (don't like to
  see whatever code use those except GPL modules)
- __xip_unmap now uses page_check_address, which is no longer static
  in rmap.c, and defined in linux/rmap.h
- mm/filemap.h is now much more clean, plainly having just Linus'
  inline funcs moved here from filemap.c
- fix includes in filemap_xip to make it build cleanly on i386

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 8ec63f73591..3f4a4286fdc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -808,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
-		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
+		if (!f->f_mapping->a_ops ||
+		    ((!f->f_mapping->a_ops->direct_IO) &&
+		    (!f->f_mapping->a_ops->get_xip_page))) {
 			fput(f);
 			f = ERR_PTR(-EINVAL);
 		}
-- 
cgit v1.2.3


From 6d79125bba55ee82701f1c7d4ebbc1aa20ecbe4e Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:26 -0700
Subject: [PATCH] xip: ext2: execute in place

These are the ext2 related parts.  Ext2 now uses the xip_* file operations
along with the get_xip_page aop when mounted with -o xip.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig       | 17 ++++++++++++
 fs/ext2/Makefile |  1 +
 fs/ext2/ext2.h   |  2 ++
 fs/ext2/file.c   | 18 +++++++++++++
 fs/ext2/inode.c  | 31 +++++++++++++++++++---
 fs/ext2/namei.c  | 12 ++++++---
 fs/ext2/super.c  | 27 ++++++++++++++++++-
 fs/ext2/xip.c    | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/xip.h    | 25 ++++++++++++++++++
 9 files changed, 205 insertions(+), 8 deletions(-)
 create mode 100644 fs/ext2/xip.c
 create mode 100644 fs/ext2/xip.h

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 5c704d05627..8157f2e2d51 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,23 @@ config EXT2_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
+config EXT2_FS_XIP
+	bool "Ext2 execute in place support"
+	depends on EXT2_FS
+	help
+	  Execute in place can be used on memory-backed block devices. If you
+	  enable this option, you can select to mount block devices which are
+	  capable of this feature without using the page cache.
+
+	  If you do not use a block device that is capable of using this,
+	  or if unsure, say N.
+
+config FS_XIP
+# execute in place
+	bool
+	depends on EXT2_FS_XIP
+	default y
+
 config EXT3_FS
 	tristate "Ext3 journalling file system support"
 	help
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e70..c5d02da73bc 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
 ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
 ext2-$(CONFIG_EXT2_FS_SECURITY)	 += xattr_security.o
+ext2-$(CONFIG_EXT2_FS_XIP)	 += xip.o
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3f..eed521d22cf 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations;
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
 extern struct file_operations ext2_file_operations;
+extern struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_aops_xip;
 extern struct address_space_operations ext2_nobh_aops;
 
 /* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec5..2b3d572365a 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,24 @@ struct file_operations ext2_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
+#ifdef CONFIG_EXT2_FS_XIP
+struct file_operations ext2_xip_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= xip_file_aio_read,
+	.aio_write	= xip_file_aio_write,
+	.ioctl		= ext2_ioctl,
+	.mmap		= xip_file_mmap,
+	.open		= generic_file_open,
+	.release	= ext2_release_file,
+	.fsync		= ext2_sync_file,
+	.readv		= xip_file_readv,
+	.writev		= xip_file_writev,
+	.sendfile	= xip_file_sendfile,
+};
+#endif
+
 struct inode_operations ext2_file_inode_operations = {
 	.truncate	= ext2_truncate,
 #ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6e..53dceb0c659 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
 #include <linux/mpage.h>
 #include "ext2.h"
 #include "acl.h"
+#include "xip.h"
 
 MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
 	if (err)
 		goto cleanup;
 
+	if (ext2_use_xip(inode->i_sb)) {
+		/*
+		 * we need to clear the block
+		 */
+		err = ext2_clear_xip_target (inode,
+			le32_to_cpu(chain[depth-1].key));
+		if (err)
+			goto cleanup;
+	}
+
 	if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
 		goto changed;
 
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
 	.writepages		= ext2_writepages,
 };
 
+struct address_space_operations ext2_aops_xip = {
+	.bmap			= ext2_bmap,
+	.get_xip_page		= ext2_get_xip_page,
+};
+
 struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
 	iblock = (inode->i_size + blocksize-1)
 					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
 
-	if (test_opt(inode->i_sb, NOBH))
+	if (mapping_is_xip(inode->i_mapping))
+		xip_truncate_page(inode->i_mapping, inode->i_size);
+	else if (test_opt(inode->i_sb, NOBH))
 		nobh_truncate_page(inode->i_mapping, inode->i_size);
 	else
 		block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
 
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ffa..c5513953c82 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 /*
  * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
 	int err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 		mark_inode_dirty(inode);
 		err = ext2_add_nondir(dentry, inode);
 	}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d94..876e391f287 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
 	Opt_ignore, Opt_err,
 };
 
@@ -286,6 +287,7 @@ static match_table_t tokens = {
 	{Opt_nouser_xattr, "nouser_xattr"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
+	{Opt_xip, "xip"},
 	{Opt_ignore, "grpquota"},
 	{Opt_ignore, "noquota"},
 	{Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
 			printk("EXT2 (no)acl options not supported\n");
 			break;
 #endif
+		case Opt_xip:
+#ifdef CONFIG_EXT2_FS_XIP
+			set_opt (sbi->s_mount_opt, XIP);
+#else
+			printk("EXT2 xip option not supported\n");
+#endif
+			break;
 		case Opt_ignore:
 			break;
 		default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
 		 MS_POSIXACL : 0);
 
+	ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+				    EXT2_MOUNT_XIP if not */
+
 	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
 	    (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
+	if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
+				  (sb->s_blocksize != blocksize))) {
+		if (!silent)
+			printk("XIP: Unsupported blocksize\n");
+		goto failed_mount;
+	}
+
 	/* If the blocksize doesn't match, re-read the thing.. */
 	if (sb->s_blocksize != blocksize) {
 		brelse(bh);
@@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 {
 	struct ext2_sb_info * sbi = EXT2_SB(sb);
 	struct ext2_super_block * es;
+	unsigned long old_mount_opt = sbi->s_mount_opt;
 
 	/*
 	 * Allow the "check" option to be passed as a remount option.
@@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
+	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
+	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
+	    invalidate_inodes(sb))
+		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
+			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 	if (*flags & MS_RDONLY) {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 00000000000..d44431d1a33
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,80 @@
+/*
+ *  linux/fs/ext2/xip.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/buffer_head.h>
+#include <linux/ext2_fs_sb.h>
+#include <linux/ext2_fs.h>
+#include "ext2.h"
+#include "xip.h"
+
+static inline int
+__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
+	BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
+	return inode->i_sb->s_bdev->bd_disk->fops
+		->direct_access(inode->i_sb->s_bdev,sector,data);
+}
+
+int
+ext2_clear_xip_target(struct inode *inode, int block) {
+	sector_t sector = block*(PAGE_SIZE/512);
+	unsigned long data;
+	int rc;
+
+	rc = __inode_direct_access(inode, sector, &data);
+	if (rc)
+		return rc;
+	clear_page((void*)data);
+	return 0;
+}
+
+void ext2_xip_verify_sb(struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+	if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
+		if ((sb->s_bdev == NULL) ||
+			sb->s_bdev->bd_disk == NULL ||
+			sb->s_bdev->bd_disk->fops == NULL ||
+			sb->s_bdev->bd_disk->fops->direct_access == NULL) {
+			sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
+			ext2_warning(sb, __FUNCTION__,
+				"ignoring xip option - not supported by bdev");
+		}
+	}
+}
+
+struct page*
+ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
+		   int create)
+{
+	int rc;
+	unsigned long data;
+	struct buffer_head tmp;
+
+	tmp.b_state = 0;
+	tmp.b_blocknr = 0;
+	rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
+				create);
+	if (rc)
+		return ERR_PTR(rc);
+	if (tmp.b_blocknr == 0) {
+		/* SPARSE block */
+		BUG_ON(create);
+		return ERR_PTR(-ENODATA);
+	}
+
+	rc = __inode_direct_access
+		(mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
+	if (rc)
+		return ERR_PTR(rc);
+
+	SetPageUptodate(virt_to_page(data));
+	return virt_to_page(data);
+}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 00000000000..aa85331d6c5
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
+/*
+ *  linux/fs/ext2/xip.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#ifdef CONFIG_EXT2_FS_XIP
+extern void ext2_xip_verify_sb (struct super_block *);
+extern int ext2_clear_xip_target (struct inode *, int);
+
+static inline int ext2_use_xip (struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
+}
+struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
+#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
+#else
+#define mapping_is_xip(map)			0
+#define ext2_xip_verify_sb(sb)			do { } while (0)
+#define ext2_use_xip(sb)			0
+#define ext2_clear_xip_target(inode, chain)	0
+#define ext2_get_xip_page			NULL
+#endif
-- 
cgit v1.2.3


From eb6fe0c388e43b02e261f0fdee60e42f6298d7f7 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:28 -0700
Subject: [PATCH] xip: reduce code duplication

This patch reworks filemap_xip.c with the goal to reduce code duplication
from mm/filemap.c.  It applies agains 2.6.12-rc6-mm1.  Instead of
implementing the aio functions, this one implements the synchronous
read/write functions only.  For readv and writev, the generic fallback is
used.  For aio, we rely on the application doing the fallback.  Since our
"synchronous" function does memcpy immediately anyway, there is no
performance difference between using the fallbacks or implementing each
operation.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/file.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b3d572365a..a484412fc78 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -58,17 +58,13 @@ struct file_operations ext2_file_operations = {
 #ifdef CONFIG_EXT2_FS_XIP
 struct file_operations ext2_xip_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= xip_file_aio_read,
-	.aio_write	= xip_file_aio_write,
+	.read		= xip_file_read,
+	.write		= xip_file_write,
 	.ioctl		= ext2_ioctl,
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_sync_file,
-	.readv		= xip_file_readv,
-	.writev		= xip_file_writev,
 	.sendfile	= xip_file_sendfile,
 };
 #endif
-- 
cgit v1.2.3


From 52c1da39534fb382c061de58b65f678ad74b59f5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 23 Jun 2005 22:05:33 -0700
Subject: [PATCH] make various thing static

Another rollup of patches which give various symbols static scope

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c      | 2 +-
 fs/reiserfs/stree.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750e..208c079e9fd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-void umount_tree(struct vfsmount *mnt)
+static void umount_tree(struct vfsmount *mnt)
 {
 	struct vfsmount *p;
 	LIST_HEAD(kill);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a2..63158491e15 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb;
 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 
 /* Maximal possible key. It is never in the tree. */
-const struct reiserfs_key  MAX_KEY = {
+static const struct reiserfs_key  MAX_KEY = {
 	__constant_cpu_to_le32(0xffffffff),
 	__constant_cpu_to_le32(0xffffffff),
 	{{__constant_cpu_to_le32(0xffffffff),
-- 
cgit v1.2.3


From 75043cb5b386e5a01fd03b88f647dd992de02f97 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@mail.ru>
Date: Fri, 24 Jun 2005 20:52:52 +0000
Subject: [PATCH] fs/qnx4/*: fix sparse warnings

This patch fixes sparse warnings in the qnx4fs (and might even make
qnx4fs work on big-endian boxes)

Signed-off-by: Alexey Dobriyan <adobriyan@mail.ru>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/qnx4/dir.c   | 2 +-
 fs/qnx4/inode.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca0..7a8f5595c26 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 						ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
 					else {
 						le  = (struct qnx4_link_info*)de;
-						ino = ( le->dl_inode_blk - 1 ) *
+						ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
 							QNX4_INODES_PER_BLOCK +
 							le->dl_inode_ndx;
 					}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a9..b79162a3547 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 	struct buffer_head *bh = NULL;
 	struct qnx4_xblk *xblk = NULL;
 	struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
-	qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
+	u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
 
 	if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
 		// iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 		printk("qnx4: unable to read the superblock\n");
 		goto outnobh;
 	}
-	if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) {
+	if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
 		if (!silent)
 			printk("qnx4: wrong fsid in superblock.\n");
 		goto out;
-- 
cgit v1.2.3


From 8ae0b77811d97552b3b3c745e97de18849583bf7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 25 Jun 2005 14:55:41 -0700
Subject: [PATCH] fix fsync(dir) return value for ram-based filesystems

Any filesystem which is using simple_dir_operations will retunr -EINVAL for
fsync() on a directory.  Make it return zero instead.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/libfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/libfs.c b/fs/libfs.c
index 5025563e737..58101dff2c6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -183,6 +183,7 @@ struct file_operations simple_dir_operations = {
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
+	.fsync		= simple_sync_file,
 };
 
 struct inode_operations simple_dir_inode_operations = {
-- 
cgit v1.2.3


From 6283d58e7464f82b1c1c33943f0bd51c1e83899a Mon Sep 17 00:00:00 2001
From: Qu Fuping <fs@ercist.iscas.ac.cn>
Date: Sat, 25 Jun 2005 14:55:44 -0700
Subject: [PATCH] reiserfs: do not ignore i/io error on readpage

Reiserfs's readpage does not notice i/o errors.  This patch makes
reiserfs_readpage to return -EIO when i/o error appears.

This patch makes reiserfs to not ignore I/O error on readpage.

Signed-off-by: Qu Fuping <fs@ercist.iscas.ac.cn>
Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0d5817f8197..289d864fe73 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -254,6 +254,7 @@ static int _get_block_create_0 (struct inode * inode, long block,
     char * p = NULL;
     int chars;
     int ret ;
+    int result ;
     int done = 0 ;
     unsigned long offset ;
 
@@ -262,10 +263,13 @@ static int _get_block_create_0 (struct inode * inode, long block,
 		  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
 
 research:
-    if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
+    result = search_for_position_by_key (inode->i_sb, &key, &path) ;
+    if (result != POSITION_FOUND) {
 	pathrelse (&path);
         if (p)
             kunmap(bh_result->b_page) ;
+	if (result == IO_ERROR)
+	    return -EIO;
 	// We do not return -ENOENT if there is a hole but page is uptodate, because it means
 	// That there is some MMAPED data associated with it that is yet to be written to disk.
 	if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
@@ -382,8 +386,9 @@ research:
 
 	// update key to look for the next piece
 	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
-	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
-	    // we read something from tail, even if now we got IO_ERROR
+	result = search_for_position_by_key (inode->i_sb, &key, &path);
+	if (result != POSITION_FOUND)
+	    // i/o error most likely
 	    break;
 	bh = get_last_bh (&path);
 	ih = get_ih (&path);
@@ -394,6 +399,10 @@ research:
 
 finished:
     pathrelse (&path);
+
+    if (result == IO_ERROR)
+	return -EIO;
+
     /* this buffer has valid data, but isn't valid for io.  mapping it to
      * block #0 tells the rest of reiserfs it just has a tail in it
      */
-- 
cgit v1.2.3


From 666bfddbe8b8fd4fd44617d6c55193d5ac7edb29 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:21 -0700
Subject: [PATCH] kdump: Access dump file in elf format (/proc/vmcore)

From: "Vivek Goyal" <vgoyal@in.ibm.com>

o Support for /proc/vmcore interface. This interface exports elf core image
  either in ELF32 or ELF64 format, depending on the format in which elf headers
  have been stored by crashed kernel.
o Added support for CONFIG_VMCORE config option.
o Removed the dependency on /proc/kcore.

From: "Eric W. Biederman" <ebiederm@xmission.com>

This patch has been refactored to more closely match the prevailing style in
the affected files.  And to clearly indicate the dependency between
/proc/kcore and proc/vmcore.c

From: Hariprasad Nellitheertha <hari@in.ibm.com>

This patch contains the code that provides an ELF format interface to the
previous kernel's memory post kexec reboot.

Signed off by Hariprasad Nellitheertha <hari@in.ibm.com>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig          |   6 +
 fs/proc/Makefile    |   1 +
 fs/proc/proc_misc.c |   6 +
 fs/proc/vmcore.c    | 451 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 fs/proc/vmcore.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 8157f2e2d51..06217795623 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -734,6 +734,12 @@ config PROC_KCORE
 	bool "/proc/kcore support" if !ARM
 	depends on PROC_FS && MMU
 
+config PROC_VMCORE
+        bool "/proc/vmcore support (EXPERIMENTAL)"
+        depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP
+        help
+        Exports the dump image of crashed kernel in ELF format.
+
 config SYSFS
 	bool "sysfs file system support" if EMBEDDED
 	default y
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 738b9b60293..7431d7ba2d0 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -11,4 +11,5 @@ proc-y       += inode.o root.o base.o generic.o array.o \
 		kmsg.o proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
+proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 94b570ad037..a3453555a94 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -44,6 +44,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/crash_dump.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -618,6 +619,11 @@ void __init proc_misc_init(void)
 				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
 	}
 #endif
+#ifdef CONFIG_PROC_VMCORE
+	proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
+	if (proc_vmcore)
+		proc_vmcore->proc_fops = &proc_vmcore_operations;
+#endif
 #ifdef CONFIG_MAGIC_SYSRQ
 	entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
 	if (entry)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
new file mode 100644
index 00000000000..8ad46785584
--- /dev/null
+++ b/fs/proc/vmcore.c
@@ -0,0 +1,451 @@
+/*
+ *	fs/proc/vmcore.c Interface for accessing the crash
+ * 				 dump from the system's previous life.
+ * 	Heavily borrowed from fs/proc/kcore.c
+ *	Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *	Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/crash_dump.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+/* List representing chunks of contiguous memory areas and their offsets in
+ * vmcore file.
+ */
+static LIST_HEAD(vmcore_list);
+
+/* Stores the pointer to the buffer containing kernel elf core headers. */
+static char *elfcorebuf;
+static size_t elfcorebuf_sz;
+
+/* Total size of vmcore file. */
+static u64 vmcore_size;
+
+struct proc_dir_entry *proc_vmcore = NULL;
+
+/* Reads a page from the oldmem device from given offset. */
+static ssize_t read_from_oldmem(char *buf, size_t count,
+			     loff_t *ppos, int userbuf)
+{
+	unsigned long pfn, offset;
+	size_t nr_bytes;
+	ssize_t read = 0, tmp;
+
+	if (!count)
+		return 0;
+
+	offset = (unsigned long)(*ppos % PAGE_SIZE);
+	pfn = (unsigned long)(*ppos / PAGE_SIZE);
+	if (pfn > saved_max_pfn)
+		return -EINVAL;
+
+	do {
+		if (count > (PAGE_SIZE - offset))
+			nr_bytes = PAGE_SIZE - offset;
+		else
+			nr_bytes = count;
+
+		tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf);
+		if (tmp < 0)
+			return tmp;
+		*ppos += nr_bytes;
+		count -= nr_bytes;
+		buf += nr_bytes;
+		read += nr_bytes;
+		++pfn;
+		offset = 0;
+	} while (count);
+
+	return read;
+}
+
+/* Maps vmcore file offset to respective physical address in memroy. */
+static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
+					struct vmcore **m_ptr)
+{
+	struct vmcore *m;
+	u64 paddr;
+
+	list_for_each_entry(m, vc_list, list) {
+		u64 start, end;
+		start = m->offset;
+		end = m->offset + m->size - 1;
+		if (offset >= start && offset <= end) {
+			paddr = m->paddr + offset - start;
+			*m_ptr = m;
+			return paddr;
+		}
+	}
+	*m_ptr = NULL;
+	return 0;
+}
+
+/* Read from the ELF header and then the crash dump. On error, negative value is
+ * returned otherwise number of bytes read are returned.
+ */
+static ssize_t read_vmcore(struct file *file, char __user *buffer,
+				size_t buflen, loff_t *fpos)
+{
+	ssize_t acc = 0, tmp;
+	size_t tsz, nr_bytes;
+	u64 start;
+	struct vmcore *curr_m = NULL;
+
+	if (buflen == 0 || *fpos >= vmcore_size)
+		return 0;
+
+	/* trim buflen to not go beyond EOF */
+	if (buflen > vmcore_size - *fpos)
+		buflen = vmcore_size - *fpos;
+
+	/* Read ELF core header */
+	if (*fpos < elfcorebuf_sz) {
+		tsz = elfcorebuf_sz - *fpos;
+		if (buflen < tsz)
+			tsz = buflen;
+		if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+			return -EFAULT;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+
+		/* leave now if filled buffer already */
+		if (buflen == 0)
+			return acc;
+	}
+
+	start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
+	if (!curr_m)
+        	return -EINVAL;
+	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+		tsz = buflen;
+
+	/* Calculate left bytes in current memory segment. */
+	nr_bytes = (curr_m->size - (start - curr_m->paddr));
+	if (tsz > nr_bytes)
+		tsz = nr_bytes;
+
+	while (buflen) {
+		tmp = read_from_oldmem(buffer, tsz, &start, 1);
+		if (tmp < 0)
+			return tmp;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+		if (start >= (curr_m->paddr + curr_m->size)) {
+			if (curr_m->list.next == &vmcore_list)
+				return acc;	/*EOF*/
+			curr_m = list_entry(curr_m->list.next,
+						struct vmcore, list);
+			start = curr_m->paddr;
+		}
+		if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+			tsz = buflen;
+		/* Calculate left bytes in current memory segment. */
+		nr_bytes = (curr_m->size - (start - curr_m->paddr));
+		if (tsz > nr_bytes)
+			tsz = nr_bytes;
+	}
+	return acc;
+}
+
+static int open_vmcore(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+struct file_operations proc_vmcore_operations = {
+	.read		= read_vmcore,
+	.open		= open_vmcore,
+};
+
+static struct vmcore* __init get_new_element(void)
+{
+	struct vmcore *p;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p)
+		memset(p, 0, sizeof(*p));
+	return p;
+}
+
+static u64 __init get_vmcore_size_elf64(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
+						struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr phdr, *phdr_ptr;
+	Elf64_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf64_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf64_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
+/* Add memory chunks represented by program headers to vmcore list. Also update
+ * the new offset fields of exported program headers. */
+static int __init process_ptload_program_headers_elf64(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset. */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
+/* Sets offset fields of vmcore elements. */
+static void __init set_vmcore_list_offsets_elf64(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf64_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
+static int __init parse_crash_elf64_headers(void)
+{
+	int rc=0;
+	Elf64_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+		!elf_check_arch(&ehdr) ||
+		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	/* Read in all elf headers. */
+	elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
+	elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+	if (!elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
+							&vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list);
+	return 0;
+}
+
+static int __init parse_crash_elf_headers(void)
+{
+	unsigned char e_ident[EI_NIDENT];
+	u64 addr;
+	int rc=0;
+
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0);
+	if (rc < 0)
+		return rc;
+	if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
+		printk(KERN_WARNING "Warning: Core image elf header"
+					" not found\n");
+		return -EINVAL;
+	}
+
+	if (e_ident[EI_CLASS] == ELFCLASS64) {
+		rc = parse_crash_elf64_headers();
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		vmcore_size = get_vmcore_size_elf64(elfcorebuf);
+	} else {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					" sane\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Init function for vmcore module. */
+static int __init vmcore_init(void)
+{
+	int rc = 0;
+
+	/* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
+	if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+		return rc;
+	rc = parse_crash_elf_headers();
+	if (rc) {
+		printk(KERN_WARNING "Kdump: vmcore not initialized\n");
+		return rc;
+	}
+
+	/* Initialize /proc/vmcore size if proc is already up. */
+	if (proc_vmcore)
+		proc_vmcore->size = vmcore_size;
+	return 0;
+}
+module_init(vmcore_init)
-- 
cgit v1.2.3


From 72658e9d5004fc0dd807bea9eda49e6a52e40103 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:22 -0700
Subject: [PATCH] kdump: Parse elf32 headers and export through /proc/vmcore

o Adds support for parsing core ELF32 headers.
o I am expecting ELF32 support to go away down the line. This patch has been
  introduced for testing purposes as gdb can not parse ELF64 headers for
  i386. When a decent user space solution is available, ELF32 support
  can go away.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/vmcore.c | 218 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 218 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 8ad46785584..3b2e7b69e63 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -202,6 +202,23 @@ static u64 __init get_vmcore_size_elf64(char *elfptr)
 	return size;
 }
 
+static u64 __init get_vmcore_size_elf32(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
 /* Merges all the PT_NOTE headers into one. */
 static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
 						struct list_head *vc_list)
@@ -283,6 +300,87 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
 	return 0;
 }
 
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
+						struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr phdr, *phdr_ptr;
+	Elf32_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf32_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf32_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
 /* Add memory chunks represented by program headers to vmcore list. Also update
  * the new offset fields of exported program headers. */
 static int __init process_ptload_program_headers_elf64(char *elfptr,
@@ -322,6 +420,43 @@ static int __init process_ptload_program_headers_elf64(char *elfptr,
 	return 0;
 }
 
+static int __init process_ptload_program_headers_elf32(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
 /* Sets offset fields of vmcore elements. */
 static void __init set_vmcore_list_offsets_elf64(char *elfptr,
 						struct list_head *vc_list)
@@ -342,6 +477,26 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr,
 	}
 }
 
+/* Sets offset fields of vmcore elements. */
+static void __init set_vmcore_list_offsets_elf32(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf32_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
 static int __init parse_crash_elf64_headers(void)
 {
 	int rc=0;
@@ -398,6 +553,62 @@ static int __init parse_crash_elf64_headers(void)
 	return 0;
 }
 
+static int __init parse_crash_elf32_headers(void)
+{
+	int rc=0;
+	Elf32_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+		!elf_check_arch(&ehdr) ||
+		ehdr.e_ident[EI_CLASS] != ELFCLASS32||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	/* Read in all elf headers. */
+	elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
+	elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+	if (!elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
+								&vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list);
+	return 0;
+}
+
 static int __init parse_crash_elf_headers(void)
 {
 	unsigned char e_ident[EI_NIDENT];
@@ -421,6 +632,13 @@ static int __init parse_crash_elf_headers(void)
 
 		/* Determine vmcore size. */
 		vmcore_size = get_vmcore_size_elf64(elfcorebuf);
+	} else if (e_ident[EI_CLASS] == ELFCLASS32) {
+		rc = parse_crash_elf32_headers();
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		vmcore_size = get_vmcore_size_elf32(elfcorebuf);
 	} else {
 		printk(KERN_WARNING "Warning: Core image elf header is not"
 					" sane\n");
-- 
cgit v1.2.3


From 486fd404fbc840e28a959d2f2842b6c46ed6b250 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:58:47 -0700
Subject: [PATCH] small partitions/msdos cleanups

This patch makes the following changes to the msdos partition code:
- remove CONFIG_NEC98_PARTITION leftovers
- make parse_bsd static

This patch was already ACK'ed by Andries Brouwer.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/Makefile | 1 -
 fs/partitions/check.c  | 3 ---
 fs/partitions/check.h  | 4 ----
 fs/partitions/msdos.c  | 4 ++--
 4 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 4c83c17969e..66d5cc26faf 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,4 +17,3 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o
 obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
 obj-$(CONFIG_IBM_PARTITION) += ibm.o
 obj-$(CONFIG_EFI_PARTITION) += efi.o
-obj-$(CONFIG_NEC98_PARTITION) += nec98.o msdos.o
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 2cab98a9a62..77e178f1316 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -79,9 +79,6 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
 #ifdef CONFIG_LDM_PARTITION
 	ldm_partition,		/* this must come before msdos */
 #endif
-#ifdef CONFIG_NEC98_PARTITION
-	nec98_partition,	/* must be come before `msdos_partition' */
-#endif
 #ifdef CONFIG_MSDOS_PARTITION
 	msdos_partition,
 #endif
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 43adcc68e47..17ae8ecd9e8 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -30,7 +30,3 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
 
 extern int warn_no_part;
 
-extern void parse_bsd(struct parsed_partitions *state,
-			struct block_device *bdev, u32 offset, u32 size,
-			int origin, char *flavour, int max_partitions);
-
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 584a27b2bbd..9935d254186 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -202,12 +202,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
 #endif
 }
 
-#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION)
+#if defined(CONFIG_BSD_DISKLABEL)
 /* 
  * Create devices for BSD partitions listed in a disklabel, under a
  * dos-like partition. See parse_extended() for more information.
  */
-void
+static void
 parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
 		u32 offset, u32 size, int origin, char *flavour,
 		int max_partitions)
-- 
cgit v1.2.3


From 94c9eca223048ae15df1989fae50eefda9daae7e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:06 -0700
Subject: [PATCH] fs/jffs/: cleanups

This patch contains the following cleanups:
- make needlessly global functions static
- provide some debugging helper functions only for appropriate
  values of CONFIG_JFFS_FS_VERBOSE

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/intrep.c  | 114 +++++++++++++++++++++++++++---------------------------
 fs/jffs/intrep.h  |   2 -
 fs/jffs/jffs_fm.c | 105 +++++++++++++++++++++++++------------------------
 fs/jffs/jffs_fm.h |   3 +-
 4 files changed, 112 insertions(+), 112 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 8cc6893fc56..fc589ddd076 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -175,8 +175,64 @@ jffs_hexdump(struct mtd_info *mtd, loff_t pos, int size)
 	}
 }
 
+/* Print the contents of a node.  */
+static void
+jffs_print_node(struct jffs_node *n)
+{
+	D(printk("jffs_node: 0x%p\n", n));
+	D(printk("{\n"));
+	D(printk("        0x%08x, /* version  */\n", n->version));
+	D(printk("        0x%08x, /* data_offset  */\n", n->data_offset));
+	D(printk("        0x%08x, /* data_size  */\n", n->data_size));
+	D(printk("        0x%08x, /* removed_size  */\n", n->removed_size));
+	D(printk("        0x%08x, /* fm_offset  */\n", n->fm_offset));
+	D(printk("        0x%02x,       /* name_size  */\n", n->name_size));
+	D(printk("        0x%p, /* fm,  fm->offset: %u  */\n",
+		 n->fm, (n->fm ? n->fm->offset : 0)));
+	D(printk("        0x%p, /* version_prev  */\n", n->version_prev));
+	D(printk("        0x%p, /* version_next  */\n", n->version_next));
+	D(printk("        0x%p, /* range_prev  */\n", n->range_prev));
+	D(printk("        0x%p, /* range_next  */\n", n->range_next));
+	D(printk("}\n"));
+}
+
 #endif
 
+/* Print the contents of a raw inode.  */
+static void
+jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
+{
+	D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
+	D(printk("{\n"));
+	D(printk("        0x%08x, /* magic  */\n", raw_inode->magic));
+	D(printk("        0x%08x, /* ino  */\n", raw_inode->ino));
+	D(printk("        0x%08x, /* pino  */\n", raw_inode->pino));
+	D(printk("        0x%08x, /* version  */\n", raw_inode->version));
+	D(printk("        0x%08x, /* mode  */\n", raw_inode->mode));
+	D(printk("        0x%04x,     /* uid  */\n", raw_inode->uid));
+	D(printk("        0x%04x,     /* gid  */\n", raw_inode->gid));
+	D(printk("        0x%08x, /* atime  */\n", raw_inode->atime));
+	D(printk("        0x%08x, /* mtime  */\n", raw_inode->mtime));
+	D(printk("        0x%08x, /* ctime  */\n", raw_inode->ctime));
+	D(printk("        0x%08x, /* offset  */\n", raw_inode->offset));
+	D(printk("        0x%08x, /* dsize  */\n", raw_inode->dsize));
+	D(printk("        0x%08x, /* rsize  */\n", raw_inode->rsize));
+	D(printk("        0x%02x,       /* nsize  */\n", raw_inode->nsize));
+	D(printk("        0x%02x,       /* nlink  */\n", raw_inode->nlink));
+	D(printk("        0x%02x,       /* spare  */\n",
+		 raw_inode->spare));
+	D(printk("        %u,          /* rename  */\n",
+		 raw_inode->rename));
+	D(printk("        %u,          /* deleted  */\n",
+		 raw_inode->deleted));
+	D(printk("        0x%02x,       /* accurate  */\n",
+		 raw_inode->accurate));
+	D(printk("        0x%08x, /* dchksum  */\n", raw_inode->dchksum));
+	D(printk("        0x%04x,     /* nchksum  */\n", raw_inode->nchksum));
+	D(printk("        0x%04x,     /* chksum  */\n", raw_inode->chksum));
+	D(printk("}\n"));
+}
+
 #define flash_safe_acquire(arg)
 #define flash_safe_release(arg)
 
@@ -2507,64 +2563,6 @@ jffs_update_file(struct jffs_file *f, struct jffs_node *node)
 	return 0;
 }
 
-/* Print the contents of a node.  */
-void
-jffs_print_node(struct jffs_node *n)
-{
-	D(printk("jffs_node: 0x%p\n", n));
-	D(printk("{\n"));
-	D(printk("        0x%08x, /* version  */\n", n->version));
-	D(printk("        0x%08x, /* data_offset  */\n", n->data_offset));
-	D(printk("        0x%08x, /* data_size  */\n", n->data_size));
-	D(printk("        0x%08x, /* removed_size  */\n", n->removed_size));
-	D(printk("        0x%08x, /* fm_offset  */\n", n->fm_offset));
-	D(printk("        0x%02x,       /* name_size  */\n", n->name_size));
-	D(printk("        0x%p, /* fm,  fm->offset: %u  */\n",
-		 n->fm, (n->fm ? n->fm->offset : 0)));
-	D(printk("        0x%p, /* version_prev  */\n", n->version_prev));
-	D(printk("        0x%p, /* version_next  */\n", n->version_next));
-	D(printk("        0x%p, /* range_prev  */\n", n->range_prev));
-	D(printk("        0x%p, /* range_next  */\n", n->range_next));
-	D(printk("}\n"));
-}
-
-
-/* Print the contents of a raw inode.  */
-void
-jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
-{
-	D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
-	D(printk("{\n"));
-	D(printk("        0x%08x, /* magic  */\n", raw_inode->magic));
-	D(printk("        0x%08x, /* ino  */\n", raw_inode->ino));
-	D(printk("        0x%08x, /* pino  */\n", raw_inode->pino));
-	D(printk("        0x%08x, /* version  */\n", raw_inode->version));
-	D(printk("        0x%08x, /* mode  */\n", raw_inode->mode));
-	D(printk("        0x%04x,     /* uid  */\n", raw_inode->uid));
-	D(printk("        0x%04x,     /* gid  */\n", raw_inode->gid));
-	D(printk("        0x%08x, /* atime  */\n", raw_inode->atime));
-	D(printk("        0x%08x, /* mtime  */\n", raw_inode->mtime));
-	D(printk("        0x%08x, /* ctime  */\n", raw_inode->ctime));
-	D(printk("        0x%08x, /* offset  */\n", raw_inode->offset));
-	D(printk("        0x%08x, /* dsize  */\n", raw_inode->dsize));
-	D(printk("        0x%08x, /* rsize  */\n", raw_inode->rsize));
-	D(printk("        0x%02x,       /* nsize  */\n", raw_inode->nsize));
-	D(printk("        0x%02x,       /* nlink  */\n", raw_inode->nlink));
-	D(printk("        0x%02x,       /* spare  */\n",
-		 raw_inode->spare));
-	D(printk("        %u,          /* rename  */\n",
-		 raw_inode->rename));
-	D(printk("        %u,          /* deleted  */\n",
-		 raw_inode->deleted));
-	D(printk("        0x%02x,       /* accurate  */\n",
-		 raw_inode->accurate));
-	D(printk("        0x%08x, /* dchksum  */\n", raw_inode->dchksum));
-	D(printk("        0x%04x,     /* nchksum  */\n", raw_inode->nchksum));
-	D(printk("        0x%04x,     /* chksum  */\n", raw_inode->chksum));
-	D(printk("}\n"));
-}
-
-
 /* Print the contents of a file.  */
 #if 0
 int
diff --git a/fs/jffs/intrep.h b/fs/jffs/intrep.h
index 4ae97b17911..5c7abe0e269 100644
--- a/fs/jffs/intrep.h
+++ b/fs/jffs/intrep.h
@@ -49,8 +49,6 @@ int jffs_garbage_collect_thread(void *c);
 void jffs_garbage_collect_trigger(struct jffs_control *c);
 
 /* For debugging purposes.  */
-void jffs_print_node(struct jffs_node *n);
-void jffs_print_raw_inode(struct jffs_raw_inode *raw_inode);
 #if 0
 int jffs_print_file(struct jffs_file *f);
 #endif  /*  0  */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 0cab8da49d3..053e3a98a27 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -31,6 +31,60 @@ static void jffs_free_fm(struct jffs_fm *n);
 extern kmem_cache_t     *fm_cache;
 extern kmem_cache_t     *node_cache;
 
+#if CONFIG_JFFS_FS_VERBOSE > 0
+void
+jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
+{
+	D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
+	D(printk("{\n"));
+	D(printk("        %u, /* flash_size  */\n", fmc->flash_size));
+	D(printk("        %u, /* used_size  */\n", fmc->used_size));
+	D(printk("        %u, /* dirty_size  */\n", fmc->dirty_size));
+	D(printk("        %u, /* free_size  */\n", fmc->free_size));
+	D(printk("        %u, /* sector_size  */\n", fmc->sector_size));
+	D(printk("        %u, /* min_free_size  */\n", fmc->min_free_size));
+	D(printk("        %u, /* max_chunk_size  */\n", fmc->max_chunk_size));
+	D(printk("        0x%p, /* mtd  */\n", fmc->mtd));
+	D(printk("        0x%p, /* head  */    "
+		 "(head->offset = 0x%08x)\n",
+		 fmc->head, (fmc->head ? fmc->head->offset : 0)));
+	D(printk("        0x%p, /* tail  */    "
+		 "(tail->offset + tail->size = 0x%08x)\n",
+		 fmc->tail,
+		 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
+	D(printk("        0x%p, /* head_extra  */\n", fmc->head_extra));
+	D(printk("        0x%p, /* tail_extra  */\n", fmc->tail_extra));
+	D(printk("}\n"));
+}
+#endif  /*  CONFIG_JFFS_FS_VERBOSE > 0  */
+
+#if CONFIG_JFFS_FS_VERBOSE > 2
+static void
+jffs_print_fm(struct jffs_fm *fm)
+{
+	D(printk("struct jffs_fm: 0x%p\n", fm));
+	D(printk("{\n"));
+	D(printk("       0x%08x, /* offset  */\n", fm->offset));
+	D(printk("       %u, /* size  */\n", fm->size));
+	D(printk("       0x%p, /* prev  */\n", fm->prev));
+	D(printk("       0x%p, /* next  */\n", fm->next));
+	D(printk("       0x%p, /* nodes  */\n", fm->nodes));
+	D(printk("}\n"));
+}
+#endif  /*  CONFIG_JFFS_FS_VERBOSE > 2  */
+
+#if 0
+void
+jffs_print_node_ref(struct jffs_node_ref *ref)
+{
+	D(printk("struct jffs_node_ref: 0x%p\n", ref));
+	D(printk("{\n"));
+	D(printk("       0x%p, /* node  */\n", ref->node));
+	D(printk("       0x%p, /* next  */\n", ref->next));
+	D(printk("}\n"));
+}
+#endif  /*  0  */
+
 /* This function creates a new shiny flash memory control structure.  */
 struct jffs_fmcontrol *
 jffs_build_begin(struct jffs_control *c, int unit)
@@ -742,54 +796,3 @@ int jffs_get_node_inuse(void)
 {
 	return no_jffs_node;
 }
-
-void
-jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
-{
-	D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
-	D(printk("{\n"));
-	D(printk("        %u, /* flash_size  */\n", fmc->flash_size));
-	D(printk("        %u, /* used_size  */\n", fmc->used_size));
-	D(printk("        %u, /* dirty_size  */\n", fmc->dirty_size));
-	D(printk("        %u, /* free_size  */\n", fmc->free_size));
-	D(printk("        %u, /* sector_size  */\n", fmc->sector_size));
-	D(printk("        %u, /* min_free_size  */\n", fmc->min_free_size));
-	D(printk("        %u, /* max_chunk_size  */\n", fmc->max_chunk_size));
-	D(printk("        0x%p, /* mtd  */\n", fmc->mtd));
-	D(printk("        0x%p, /* head  */    "
-		 "(head->offset = 0x%08x)\n",
-		 fmc->head, (fmc->head ? fmc->head->offset : 0)));
-	D(printk("        0x%p, /* tail  */    "
-		 "(tail->offset + tail->size = 0x%08x)\n",
-		 fmc->tail,
-		 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
-	D(printk("        0x%p, /* head_extra  */\n", fmc->head_extra));
-	D(printk("        0x%p, /* tail_extra  */\n", fmc->tail_extra));
-	D(printk("}\n"));
-}
-
-void
-jffs_print_fm(struct jffs_fm *fm)
-{
-	D(printk("struct jffs_fm: 0x%p\n", fm));
-	D(printk("{\n"));
-	D(printk("       0x%08x, /* offset  */\n", fm->offset));
-	D(printk("       %u, /* size  */\n", fm->size));
-	D(printk("       0x%p, /* prev  */\n", fm->prev));
-	D(printk("       0x%p, /* next  */\n", fm->next));
-	D(printk("       0x%p, /* nodes  */\n", fm->nodes));
-	D(printk("}\n"));
-}
-
-#if 0
-void
-jffs_print_node_ref(struct jffs_node_ref *ref)
-{
-	D(printk("struct jffs_node_ref: 0x%p\n", ref));
-	D(printk("{\n"));
-	D(printk("       0x%p, /* node  */\n", ref->node));
-	D(printk("       0x%p, /* next  */\n", ref->next));
-	D(printk("}\n"));
-}
-#endif  /*  0  */
-
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index bc291c43182..f64151e7412 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -139,8 +139,9 @@ int jffs_add_node(struct jffs_node *node);
 void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm,
 			__u32 size);
 
+#if CONFIG_JFFS_FS_VERBOSE > 0
 void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc);
-void jffs_print_fm(struct jffs_fm *fm);
+#endif
 #if 0
 void jffs_print_node_ref(struct jffs_node_ref *ref);
 #endif  /*  0  */
-- 
cgit v1.2.3


From db407163773a8447dd869ee98348e05c81b4c337 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:07 -0700
Subject: [PATCH] fs/ncpfs/: remove unused #ifdef
 USE_OLD_SLOW_DIRECTORY_LISTING code

This patch removes some unused #ifdef USE_OLD_SLOW_DIRECTORY_LISTING
code.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/dir.c           | 13 -------------
 fs/ncpfs/ncplib_kernel.c | 40 ----------------------------------------
 fs/ncpfs/ncplib_kernel.h |  3 ---
 3 files changed, 56 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 2dc2d869396..a9f7a8ab1d5 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -705,18 +705,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 		DPRINTK("ncp_do_readdir: init failed, err=%d\n", err);
 		return;
 	}
-#ifdef USE_OLD_SLOW_DIRECTORY_LISTING
-	for (;;) {
-		err = ncp_search_for_file_or_subdir(server, &seq, &entry.i);
-		if (err) {
-			DPRINTK("ncp_do_readdir: search failed, err=%d\n", err);
-			break;
-		}
-		entry.volume = entry.i.volNumber;
-		if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry))
-			break;
-	}
-#else
 	/* We MUST NOT use server->buffer_size handshaked with server if we are
 	   using UDP, as for UDP server uses max. buffer size determined by
 	   MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes). 
@@ -754,7 +742,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 		}
 	} while (more);
 	vfree(buf);
-#endif
 	return;
 }
 
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index e4eb5ed4bee..c755e1848a4 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -845,46 +845,6 @@ out:
 	return result;
 }
 
-/* Search for everything */
-int ncp_search_for_file_or_subdir(struct ncp_server *server,
-				  struct nw_search_sequence *seq,
-				  struct nw_info_struct *target)
-{
-	int result;
-
-	ncp_init_request(server);
-	ncp_add_byte(server, 3);	/* subfunction */
-	ncp_add_byte(server, server->name_space[seq->volNumber]);
-	ncp_add_byte(server, 0);	/* data stream (???) */
-	ncp_add_word(server, cpu_to_le16(0x8006));	/* Search attribs */
-	ncp_add_dword(server, RIM_ALL);		/* return info mask */
-	ncp_add_mem(server, seq, 9);
-#ifdef CONFIG_NCPFS_NFS_NS
-	if (server->name_space[seq->volNumber] == NW_NS_NFS) {
-		ncp_add_byte(server, 0);	/* 0 byte pattern */
-	} else 
-#endif
-	{
-		ncp_add_byte(server, 2);	/* 2 byte pattern */
-		ncp_add_byte(server, 0xff);	/* following is a wildcard */
-		ncp_add_byte(server, '*');
-	}
-	
-	if ((result = ncp_request(server, 87)) != 0)
-		goto out;
-	memcpy(seq, ncp_reply_data(server, 0), sizeof(*seq));
-	ncp_extract_file_info(ncp_reply_data(server, 10), target);
-
-	ncp_unlock_server(server);
-	
-	result = ncp_obtain_nfs_info(server, target);
-	return result;
-
-out:
-	ncp_unlock_server(server);
-	return result;
-}
-
 int ncp_search_for_fileset(struct ncp_server *server,
 			   struct nw_search_sequence *seq,
 			   int* more,
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 05ec2e9d90c..9e4dc30c243 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -87,9 +87,6 @@ int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *,
 
 int ncp_initialize_search(struct ncp_server *, struct inode *,
 		      struct nw_search_sequence *target);
-int ncp_search_for_file_or_subdir(struct ncp_server *server,
-			      struct nw_search_sequence *seq,
-			      struct nw_info_struct *target);
 int ncp_search_for_fileset(struct ncp_server *server,
 			   struct nw_search_sequence *seq,
 			   int* more, int* cnt,
-- 
cgit v1.2.3


From c33ed271263f5fb6ca5ab888b98a55ae5d138c0b Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Sat, 25 Jun 2005 14:59:36 -0700
Subject: [PATCH] list_for_each_entry: fs-dquot.c

Make code more readable with list_for_each_entry_safe.

Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Maximilian Attems <janitor@sternwelten.at>
Signed-off-by: Domen Puncer <domen@coderock.org>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 37212b039a4..b9732335bcd 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -409,13 +409,10 @@ out_dqlock:
  * for this sb+type at all. */
 static void invalidate_dquots(struct super_block *sb, int type)
 {
-	struct dquot *dquot;
-	struct list_head *head;
+	struct dquot *dquot, *tmp;
 
 	spin_lock(&dq_list_lock);
-	for (head = inuse_list.next; head != &inuse_list;) {
-		dquot = list_entry(head, struct dquot, dq_inuse);
-		head = head->next;
+	list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
 		if (dquot->dq_sb != sb)
 			continue;
 		if (dquot->dq_type != type)
-- 
cgit v1.2.3


From 3e1d1d28d99dabe63c64f7f40f1ca1d646de1f73 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Fri, 24 Jun 2005 23:13:50 -0700
Subject: [PATCH] Cleanup patch for process freezing

1. Establish a simple API for process freezing defined in linux/include/sched.h:

   frozen(process)		Check for frozen process
   freezing(process)		Check if a process is being frozen
   freeze(process)		Tell a process to freeze (go to refrigerator)
   thaw_process(process)	Restart process
   frozen_process(process)	Process is frozen now

2. Remove all references to PF_FREEZE and PF_FROZEN from all
   kernel sources except sched.h

3. Fix numerous locations where try_to_freeze is manually done by a driver

4. Remove the argument that is no longer necessary from two function calls.

5. Some whitespace cleanup

6. Clear potential race in refrigerator (provides an open window of PF_FREEZE
   cleared before setting PF_FROZEN, recalc_sigpending does not check
   PF_FROZEN).

This patch does not address the problem of freeze_processes() violating the rule
that a task may only modify its own flags by setting PF_FREEZE. This is not clean
in an SMP environment. freeze(process) is therefore not SMP safe!

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/kafsasyncd.c          | 2 +-
 fs/afs/kafstimod.c           | 2 +-
 fs/jbd/journal.c             | 4 ++--
 fs/jfs/jfs_logmgr.c          | 4 ++--
 fs/jfs/jfs_txnmgr.c          | 8 ++++----
 fs/lockd/clntproc.c          | 2 +-
 fs/xfs/linux-2.6/xfs_buf.c   | 4 ++--
 fs/xfs/linux-2.6/xfs_super.c | 2 +-
 8 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 6fc88ae8ad9..7ac07d0d47b 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -116,7 +116,7 @@ static int kafsasyncd(void *arg)
 		remove_wait_queue(&kafsasyncd_sleepq, &myself);
 		set_current_state(TASK_RUNNING);
 
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 
 		/* discard pending signals */
 		afs_discard_my_signals();
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 86e710dd057..65bc05ab818 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -91,7 +91,7 @@ static int kafstimod(void *arg)
 			complete_and_exit(&kafstimod_dead, 0);
 		}
 
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 
 		/* discard pending signals */
 		afs_discard_my_signals();
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 1e6f2e2ad4a..5e7b4394951 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -167,7 +167,7 @@ loop:
 	}
 
 	wake_up(&journal->j_wait_done_commit);
-	if (current->flags & PF_FREEZE) {
+	if (freezing(current)) {
 		/*
 		 * The simpler the better. Flushing journal isn't a
 		 * good idea, because that depends on threads that may
@@ -175,7 +175,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald\n");
 		spin_unlock(&journal->j_state_lock);
-		refrigerator(PF_FREEZE);
+		refrigerator();
 		spin_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 7c8387ed419..79d07624bfe 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2359,9 +2359,9 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			spin_unlock_irq(&log_redrive_lock);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			add_wait_queue(&jfs_IO_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 8cbaaff1d5f..121c981ff45 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2788,9 +2788,9 @@ int jfs_lazycommit(void *arg)
 		/* In case a wakeup came while all threads were active */
 		jfs_commit_thread_waking = 0;
 
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			LAZY_UNLOCK(flags);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
@@ -2987,9 +2987,9 @@ int jfs_sync(void *arg)
 		/* Add anon_list2 back to anon_list */
 		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			TXN_UNLOCK();
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fd77ed1d710..14b3ce87fa2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -313,7 +313,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
 	prepare_to_wait(queue, &wait, TASK_INTERRUPTIBLE);
 	if (!signalled ()) {
 		schedule_timeout(NLMCLNT_GRACE_WAIT);
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 		if (!signalled ())
 			status = 0;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index c60e69431e1..df0cba239dd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1771,9 +1771,9 @@ xfsbufd(
 
 	INIT_LIST_HEAD(&tmp);
 	do {
-		if (unlikely(current->flags & PF_FREEZE)) {
+		if (unlikely(freezing(current))) {
 			xfsbufd_force_sleep = 1;
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			xfsbufd_force_sleep = 0;
 		}
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5fe9af38aa2..f6dd7de2592 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -483,7 +483,7 @@ xfssyncd(
 		set_current_state(TASK_INTERRUPTIBLE);
 		timeleft = schedule_timeout(timeleft);
 		/* swsusp */
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 		if (vfsp->vfs_flag & VFS_UMOUNT)
 			break;
 
-- 
cgit v1.2.3


From 34f18a9887afaeb6e50168df512e1118f7d73542 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 26 Jun 2005 03:27:20 -0700
Subject: [PATCH] jffs2 build fix

Missed conversion in the swsusp cleanup.

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs2/background.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 1be6de27dd8..638836b277d 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -92,7 +92,7 @@ static int jffs2_garbage_collect_thread(void *_c)
 			schedule();
 		}
 
-		if (try_to_freeze(0))
+		if (try_to_freeze())
 			continue;
 
 		cond_resched();
-- 
cgit v1.2.3


From 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 27 Jun 2005 10:55:12 +0200
Subject: [PATCH] Update cfq io scheduler to time sliced design

This updates the CFQ io scheduler to the new time sliced design (cfq
v3).  It provides full process fairness, while giving excellent
aggregate system throughput even for many competing processes.  It
supports io priorities, either inherited from the cpu nice value or set
directly with the ioprio_get/set syscalls.  The latter closely mimic
set/getpriority.

This import is based on my latest from -mm.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Makefile           |   1 +
 fs/ioprio.c           | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/reiserfs/journal.c |  12 ++++
 3 files changed, 185 insertions(+)
 create mode 100644 fs/ioprio.c

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index fc92e59e9fa..20edcf28bfd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,6 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
+		ioprio.o
 
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
diff --git a/fs/ioprio.c b/fs/ioprio.c
new file mode 100644
index 00000000000..663e420636d
--- /dev/null
+++ b/fs/ioprio.c
@@ -0,0 +1,172 @@
+/*
+ * fs/ioprio.c
+ *
+ * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ *
+ * Helper functions for setting/querying io priorities of processes. The
+ * system calls closely mimmick getpriority/setpriority, see the man page for
+ * those. The prio argument is a composite of prio class and prio data, where
+ * the data argument has meaning within that class. The standard scheduling
+ * classes have 8 distinct prio levels, with 0 being the highest prio and 7
+ * being the lowest.
+ *
+ * IOW, setting BE scheduling class with prio 2 is done ala:
+ *
+ * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
+ *
+ * ioprio_set(PRIO_PROCESS, pid, prio);
+ *
+ * See also Documentation/block/ioprio.txt
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ioprio.h>
+#include <linux/blkdev.h>
+
+static int set_task_ioprio(struct task_struct *task, int ioprio)
+{
+	struct io_context *ioc;
+
+	if (task->uid != current->euid &&
+	    task->uid != current->uid && !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	task_lock(task);
+
+	task->ioprio = ioprio;
+
+	ioc = task->io_context;
+	if (ioc && ioc->set_ioprio)
+		ioc->set_ioprio(ioc, ioprio);
+
+	task_unlock(task);
+	return 0;
+}
+
+asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
+{
+	int class = IOPRIO_PRIO_CLASS(ioprio);
+	int data = IOPRIO_PRIO_DATA(ioprio);
+	struct task_struct *p, *g;
+	struct user_struct *user;
+	int ret;
+
+	switch (class) {
+		case IOPRIO_CLASS_RT:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
+			/* fall through, rt has prio field too */
+		case IOPRIO_CLASS_BE:
+			if (data >= IOPRIO_BE_NR || data < 0)
+				return -EINVAL;
+
+			break;
+		case IOPRIO_CLASS_IDLE:
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	ret = -ESRCH;
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = set_task_ioprio(p, ioprio);
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != who)
+					continue;
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
+asmlinkage int sys_ioprio_get(int which, int who)
+{
+	struct task_struct *g, *p;
+	struct user_struct *user;
+	int ret = -ESRCH;
+
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = p->ioprio;
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != user->uid)
+					continue;
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 7b87707acc3..d1bcf0da672 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -645,18 +645,22 @@ struct buffer_chunk {
 
 static void write_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_logged_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static void write_ordered_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_ordered_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* before we can put our commit blocks on disk, we have to make sure everyone older than
   ** us is on disk too
   */
@@ -1055,6 +1061,7 @@ put_jl:
 
   if (retval)
     reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
+  put_fs_excl();
   return retval;
 }
 
@@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s,
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* if all the work is already done, get out of here */
   if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 
       atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1450,6 +1459,7 @@ flush_older_and_return:
   put_journal_list(s, jl);
   if (flushall)
     up(&journal->j_flush_sem);
+  put_fs_excl();
   return err ;
 } 
 
@@ -2719,6 +2729,7 @@ relock:
   th->t_trans_id = journal->j_trans_id ;
   unlock_journal(p_s_sb) ;
   INIT_LIST_HEAD (&th->t_list);
+  get_fs_excl();
   return 0 ;
 
 out_fail:
@@ -3526,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b
   BUG_ON (th->t_refcount > 1);
   BUG_ON (!th->t_trans_id);
 
+  put_fs_excl();
   current->journal_info = th->t_handle_save;
   reiserfs_check_lock_depth(p_s_sb, "journal end");
   if (journal->j_len == 0) {
-- 
cgit v1.2.3


From 8d451687ca57371d303c5554b377d7f5c2ac6ae0 Mon Sep 17 00:00:00 2001
From: Wen-chien Jesse Sung <jesse@cola.voip.idv.tw>
Date: Tue, 28 Jun 2005 20:44:41 -0700
Subject: [PATCH] fix semaphore handling in __unregister_chrdev_region

This up() should be down() instead.

Signed-off-by: Wen-chien Jesse Sung <jesse@cola.voip.idv.tw>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/char_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index e82aac9cc2f..a69a5d8a406 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -150,7 +150,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 	struct char_device_struct *cd = NULL, **cp;
 	int i = major_to_index(major);
 
-	up(&chrdevs_lock);
+	down(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
 		if ((*cp)->major == major &&
 		    (*cp)->baseminor == baseminor &&
-- 
cgit v1.2.3


From 687a21cee17000177b1935896b9b475acf136678 Mon Sep 17 00:00:00 2001
From: Pekka J Enberg <penberg@cs.Helsinki.FI>
Date: Tue, 28 Jun 2005 20:44:55 -0700
Subject: [PATCH] rename wakeup_bdflush to wakeup_pdflush

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 13e5938a64f..561e63a1496 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -278,7 +278,7 @@ EXPORT_SYMBOL(thaw_bdev);
  */
 static void do_sync(unsigned long wait)
 {
-	wakeup_bdflush(0);
+	wakeup_pdflush(0);
 	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
 	DQUOT_SYNC(NULL);
 	sync_supers();		/* Write the superblocks */
@@ -497,7 +497,7 @@ static void free_more_memory(void)
 	struct zone **zones;
 	pg_data_t *pgdat;
 
-	wakeup_bdflush(1024);
+	wakeup_pdflush(1024);
 	yield();
 
 	for_each_pgdat(pgdat) {
-- 
cgit v1.2.3


From 334a13ec3d01a1a4b4f2249735b793105cb4a519 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2005 20:44:58 -0700
Subject: [PATCH] really remove xattr_acl.h

Looks like it sneaked back with the NFS ACL merge..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs3acl.c | 14 +++++++-------
 fs/nfsd/vfs.c    | 13 ++++++-------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index ee3536fc84a..1b7a3ef2f81 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -2,7 +2,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/nfsacl.h>
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
@@ -53,9 +53,9 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
 	struct posix_acl *acl;
 	int type, error = 0;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
@@ -82,9 +82,9 @@ int nfs3_setxattr(struct dentry *dentry, const char *name,
 	struct posix_acl *acl;
 	int type, error;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
@@ -103,9 +103,9 @@ int nfs3_removexattr(struct dentry *dentry, const char *name)
 	struct inode *inode = dentry->d_inode;
 	int type;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index de340ffd33c..be24ead89d9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -46,10 +46,9 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
-#include <linux/xattr_acl.h>
 #include <linux/posix_acl.h>
-#ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -1872,10 +1871,10 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
 		return ERR_PTR(-EOPNOTSUPP);
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name = XATTR_NAME_ACL_ACCESS;
+			name = POSIX_ACL_XATTR_ACCESS;
 			break;
 		case ACL_TYPE_DEFAULT:
-			name = XATTR_NAME_ACL_DEFAULT;
+			name = POSIX_ACL_XATTR_DEFAULT;
 			break;
 		default:
 			return ERR_PTR(-EOPNOTSUPP);
@@ -1919,17 +1918,17 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 		return -EOPNOTSUPP;
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name = XATTR_NAME_ACL_ACCESS;
+			name = POSIX_ACL_XATTR_ACCESS;
 			break;
 		case ACL_TYPE_DEFAULT:
-			name = XATTR_NAME_ACL_DEFAULT;
+			name = POSIX_ACL_XATTR_DEFAULT;
 			break;
 		default:
 			return -EOPNOTSUPP;
 	}
 
 	if (acl && acl->a_count) {
-		size = xattr_acl_size(acl->a_count);
+		size = posix_acl_xattr_size(acl->a_count);
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
-- 
cgit v1.2.3


From c016e2257acd00a7ffd87fa1eec896138563d1aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=E9bastien=20Dugu?= <sebastien.dugue@bull.net>
Date: Tue, 28 Jun 2005 20:44:59 -0700
Subject: [PATCH] aio-retry-fix: fix aio retry work queueing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the case of buffered AIO, in the aio retry path (aio_run_iocb), when the
retry method returns EIOCBRETRY the kicked iocb is added to the context run
list but is never queued onto the work queue.  The request therefore is
never completed.

This patch fixes that by adding the appropriate call to aio_queue_work in
aio_run_aiocb so that subsequent retries will be handled by the aio worker
thread.

Signed-off-by: S�bastien Dugu� <sebastien.dugue@bull.net>
Acked-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 7afa222f680..06d7d4390fe 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -58,6 +58,7 @@ static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
 static void aio_kick_handler(void *);
+static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
  *	Creates the slab caches used by the aio routines, panic on
@@ -747,6 +748,14 @@ out:
 		 * has already been kicked */
 		if (kiocbIsKicked(iocb)) {
 			__queue_kicked_iocb(iocb);
+
+			/*
+			 * __queue_kicked_iocb will always return 1 here, because
+			 * iocb->ki_run_list is empty at this point so it should
+			 * be safe to unconditionally queue the context into the
+			 * work queue.
+			 */
+			aio_queue_work(ctx);
 		}
 	}
 	return ret;
-- 
cgit v1.2.3


From ec471dc484b8ca5352903ee28796b8b248313547 Mon Sep 17 00:00:00 2001
From: "KAMBAROV, ZAUR" <kambarov@berkeley.edu>
Date: Tue, 28 Jun 2005 20:45:10 -0700
Subject: [PATCH] coverity: fs/udf/namei.c null check

"dir" was dereferenced before null check

Signed-off-by: Zaur Kambarov <zkambarov@coverity.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/namei.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 3f6dc7112bc..4673157b262 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -159,7 +159,7 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
 	char *nameptr;
 	uint8_t lfi;
 	uint16_t liu;
-	loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
+	loff_t size;
 	kernel_lb_addr bloc, eloc;
 	uint32_t extoffset, elen, offset;
 	struct buffer_head *bh = NULL;
@@ -167,6 +167,8 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
 	if (!dir)
 		return NULL;
 
+	size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
+
 	f_pos = (udf_ext0_offset(dir) >> 2);
 
 	fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2;
-- 
cgit v1.2.3


From c7f1721ef284c6e8257c7471a02148db76105036 Mon Sep 17 00:00:00 2001
From: "KAMBAROV, ZAUR" <kambarov@berkeley.edu>
Date: Tue, 28 Jun 2005 20:45:11 -0700
Subject: [PATCH] coverity: fs/ext3/super.c: match_int return check

The return value of  "match_int" is  checked  27 out of 28 times

In lib/parser.c
142  	/**
143  	 * match_int: - scan a decimal representation of an integer from a substring_t
144  	 * @s: substring_t to be scanned
145  	 * @result: resulting integer on success
146  	 *
147  	 * Description: Attempts to parse the &substring_t @s as a decimal integer. On
148  	 * success, sets @result to the integer represented by the string and returns 0.
149  	 * Returns either -ENOMEM or -EINVAL on failure.
150  	 */
151  	int match_int(substring_t *s, int *result)
152  	{
153  		return match_number(s, result, 0);
154  	}

Signed-off-by: Zaur Kambarov <zkambarov@coverity.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b4b3e8a3913..a6d1779d7de 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -944,7 +944,8 @@ clear_qf_name:
 					"for remount\n");
 				return 0;
 			}
-			match_int(&args[0], &option);
+			if (match_int(&args[0], &option) != 0)
+				return 0;
 			*n_blocks_count = option;
 			break;
 		case Opt_nobh:
-- 
cgit v1.2.3


From 21fe3471c3aaa5c489c5d3a4d705291eb7511248 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 28 Jun 2005 20:45:16 -0700
Subject: [PATCH] ext3: reduce allocate-with-reservation lock latencies

Currently in ext3 block reservation code, the global filesystem reservation
tree lock (rsv_block) is hold during the process of searching for a space
to make a new reservation window, including while scaning the block bitmap
to verify if the avalible window has a free block.  Holding the lock during
bitmap scan is unnecessary and could possibly cause scalability issue and
latency issues.

This patch tries to address this by dropping the lock before scan the
bitmap.  Before that we need to reserve the open window in case someone
else is targetting at the same window.  Question was should we reserve the
whole free reservable space or just the window size we need.  Reserve the
whole free reservable space will possibly force other threads which
intended to do block allocation nearby move to another block group(cause
bad layout).  In this patch, we just reserve the desired size before drop
the lock and scan the block bitmap.  This patch fixed a ext3 reservation
latency issue seen on a cvs check out test.  Patch is tested with many fsx,
tiobench, dbench and untar a kernel test.

Signed-Off-By: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 135 ++++++++++++++++++++++++++-----------------------------
 fs/ext3/file.c   |   4 ++
 2 files changed, 67 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ccd632fcc6d..e463dca008e 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -749,24 +749,24 @@ fail_access:
  * 	to find a free region that is of my size and has not
  * 	been reserved.
  *
- *	on succeed, it returns the reservation window to be appended to.
- *	failed, return NULL.
  */
-static struct ext3_reserve_window_node *find_next_reservable_window(
+static int find_next_reservable_window(
 				struct ext3_reserve_window_node *search_head,
-				unsigned long size, int *start_block,
+				struct ext3_reserve_window_node *my_rsv,
+				struct super_block * sb, int start_block,
 				int last_block)
 {
 	struct rb_node *next;
 	struct ext3_reserve_window_node *rsv, *prev;
 	int cur;
+	int size = my_rsv->rsv_goal_size;
 
 	/* TODO: make the start of the reservation window byte-aligned */
 	/* cur = *start_block & ~7;*/
-	cur = *start_block;
+	cur = start_block;
 	rsv = search_head;
 	if (!rsv)
-		return NULL;
+		return -1;
 
 	while (1) {
 		if (cur <= rsv->rsv_end)
@@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
 		 * space with expected-size (or more)...
 		 */
 		if (cur > last_block)
-			return NULL;		/* fail */
+			return -1;		/* fail */
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
 	 * return the reservation window that we could append to.
 	 * succeed.
 	 */
-	*start_block = cur;
-	return prev;
+
+	if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+		rsv_window_remove(sb, my_rsv);
+
+	/*
+	 * Let's book the whole avaliable window for now.  We will check the
+	 * disk bitmap later and then, if there are free blocks then we adjust
+	 * the window size if it's larger than requested.
+	 * Otherwise, we will remove this node from the tree next time
+	 * call find_next_reservable_window.
+	 */
+	my_rsv->rsv_start = cur;
+	my_rsv->rsv_end = cur + size - 1;
+	my_rsv->rsv_alloc_hit = 0;
+
+	if (prev != my_rsv)
+		ext3_rsv_window_add(sb, my_rsv);
+
+	return 0;
 }
 
 /**
@@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
  *	@sb: the super block
  *	@group: the group we are trying to allocate in
  *	@bitmap_bh: the block group block bitmap
+ *
  */
 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 		int goal, struct super_block *sb,
@@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 	struct ext3_reserve_window_node *search_head;
 	int group_first_block, group_end_block, start_block;
 	int first_free_block;
-	int reservable_space_start;
-	struct ext3_reserve_window_node *prev_rsv;
 	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
 	unsigned long size;
+	int ret;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 
 	group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
 				group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 		start_block = goal + group_first_block;
 
 	size = my_rsv->rsv_goal_size;
+
 	if (!rsv_is_empty(&my_rsv->rsv_window)) {
 		/*
 		 * if the old reservation is cross group boundary
@@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 			my_rsv->rsv_goal_size= size;
 		}
 	}
+
+	spin_lock(rsv_lock);
 	/*
 	 * shift the search start to the window near the goal block
 	 */
@@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 	 * need to check the bitmap after we found a reservable window.
 	 */
 retry:
-	prev_rsv = find_next_reservable_window(search_head, size,
-						&start_block, group_end_block);
-	if (prev_rsv == NULL)
-		goto failed;
-	reservable_space_start = start_block;
+	ret = find_next_reservable_window(search_head, my_rsv, sb,
+						start_block, group_end_block);
+
+	if (ret == -1) {
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;
+	}
+
 	/*
 	 * On success, find_next_reservable_window() returns the
 	 * reservation window where there is a reservable space after it.
@@ -937,8 +963,9 @@ retry:
 	 * block. Search start from the start block of the reservable space
 	 * we just found.
 	 */
+	spin_unlock(rsv_lock);
 	first_free_block = bitmap_search_next_usable_block(
-			reservable_space_start - group_first_block,
+			my_rsv->rsv_start - group_first_block,
 			bitmap_bh, group_end_block - group_first_block + 1);
 
 	if (first_free_block < 0) {
@@ -946,54 +973,29 @@ retry:
 		 * no free block left on the bitmap, no point
 		 * to reserve the space. return failed.
 		 */
-		goto failed;
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;		/* failed */
 	}
+
 	start_block = first_free_block + group_first_block;
 	/*
 	 * check if the first free block is within the
-	 * free space we just found
+	 * free space we just reserved
 	 */
-	if ((start_block >= reservable_space_start) &&
-	  (start_block < reservable_space_start + size))
-		goto found_rsv_window;
+	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-	 * this means there is no free block on the reservable space
-	 * we should continue search for next reservable space,
+	 * continue search for next reservable space,
 	 * start from where the free block is,
 	 * we also shift the list head to where we stopped last time
 	 */
-	search_head = prev_rsv;
+	search_head = my_rsv;
+	spin_lock(rsv_lock);
 	goto retry;
-
-found_rsv_window:
-	/*
-	 * great! the reservable space contains some free blocks.
-	 * if the search returns that we should add the new
-	 * window just next to where the old window, we don't
- 	 * need to remove the old window first then add it to the
-	 * same place, just update the new start and new end.
-	 */
-	if (my_rsv != prev_rsv)  {
-		if (!rsv_is_empty(&my_rsv->rsv_window))
-			rsv_window_remove(sb, my_rsv);
-	}
-	my_rsv->rsv_start = reservable_space_start;
-	my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
-	my_rsv->rsv_alloc_hit = 0;
-	if (my_rsv != prev_rsv)  {
-		ext3_rsv_window_add(sb, my_rsv);
-	}
-	return 0;		/* succeed */
-failed:
-	/*
-	 * failed to find a new reservation window in the current
-	 * group, remove the current(stale) reservation window
-	 * if there is any
-	 */
-	if (!rsv_is_empty(&my_rsv->rsv_window))
-		rsv_window_remove(sb, my_rsv);
-	return -1;		/* failed */
 }
 
 /*
@@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			int goal, struct ext3_reserve_window_node * my_rsv,
 			int *errp)
 {
-	spinlock_t *rsv_lock;
 	unsigned long group_first_block;
 	int ret = 0;
 	int fatal;
@@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
 		goto out;
 	}
-	rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 	/*
 	 * goal is a group relative block number (if there is a goal)
 	 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * then we could go to allocate from the reservation window directly.
 	 */
 	while (1) {
-		struct ext3_reserve_window rsv_copy;
-
-		rsv_copy._rsv_start = my_rsv->rsv_start;
-		rsv_copy._rsv_end = my_rsv->rsv_end;
-
-		if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
-			!goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
-			spin_lock(rsv_lock);
+		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+			!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
 			ret = alloc_new_reservation(my_rsv, goal, sb,
 							group, bitmap_bh);
-			rsv_copy._rsv_start = my_rsv->rsv_start;
-			rsv_copy._rsv_end = my_rsv->rsv_end;
-			spin_unlock(rsv_lock);
 			if (ret < 0)
 				break;			/* failed */
 
-			if (!goal_in_my_reservation(&rsv_copy, goal, group, sb))
+			if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
 				goal = -1;
 		}
-		if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
-		    || (rsv_copy._rsv_end < group_first_block))
+		if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-					   &rsv_copy);
+					   &my_rsv->rsv_window);
 		if (ret >= 0) {
 			my_rsv->rsv_alloc_hit++;
 			break;				/* succeed */
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5ad8cf0292d..98e78345ead 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
+	{
+		down(&EXT3_I(inode)->truncate_sem);
 		ext3_discard_reservation(inode);
+		up(&EXT3_I(inode)->truncate_sem);
+	}
 	if (is_dx(inode) && filp->private_data)
 		ext3_htree_free_dir_info(filp->private_data);
 
-- 
cgit v1.2.3


From 869eb76e7b60ebd8f87a358b72e97fa0aef1d1f5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 29 Jun 2005 18:52:28 -0400
Subject: [PATCH] reiserfs: Check if attrs are enabled for attr ioctls

ReiserFS currently will allow the user to set/get attrs for files
regardless if they are enabled.  The patch checks to see if they are
enabled, and returns -NOTTY if they are not.

ext[23] doesn't need this check because attrs are always enabled.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/ioctl.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 94dc42475a0..76caedf737f 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -36,10 +36,16 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 	/* following two cases are taken from fs/ext2/ioctl.c by Remy
 	   Card (card@masi.ibp.fr) */
 	case REISERFS_IOC_GETFLAGS:
+		if (!reiserfs_attrs (inode->i_sb))
+			return -ENOTTY;
+
 		flags = REISERFS_I(inode) -> i_attrs;
 		i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags );
 		return put_user(flags, (int __user *) arg);
 	case REISERFS_IOC_SETFLAGS: {
+		if (!reiserfs_attrs (inode->i_sb))
+			return -ENOTTY;
+
 		if (IS_RDONLY(inode))
 			return -EROFS;
 
-- 
cgit v1.2.3


From 2949ccf9379678df66ecf2ca70ed4656159eacdd Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 29 Jun 2005 18:53:06 -0400
Subject: [PATCH] reiserfs: enable attrs by default if saf

The following patch enables attrs by default if the reiserfs_attrs_cleared
bit is set in the superblock.  This allows chattr-type attrs to be used
without any further action by the user.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 660aefca1fd..d50a5cd860c 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1066,6 +1066,8 @@ static void handle_attrs( struct super_block *s )
 				reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" );
 				REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS );
 		}
+	} else if (le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared) {
+		REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS;
 	}
 }
 
-- 
cgit v1.2.3


From 532a39a3754a3b8ce507414863023f8db21f9a7c Mon Sep 17 00:00:00 2001
From: Pekka J Enberg <penberg@cs.Helsinki.FI>
Date: Thu, 30 Jun 2005 02:59:01 -0700
Subject: [PATCH] fat: fix slab cache leak

This patch plugs a slab cache leak in fat module initialization.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/inode.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8ccee841548..3e31c4a736f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1331,12 +1331,21 @@ void __exit fat_cache_destroy(void);
 
 static int __init init_fat_fs(void)
 {
-	int ret;
+	int err;
 
-	ret = fat_cache_init();
-	if (ret < 0)
-		return ret;
-	return fat_init_inodecache();
+	err = fat_cache_init();
+	if (err)
+		return err;
+
+	err = fat_init_inodecache();
+	if (err)
+		goto failed;
+
+	return 0;
+
+failed:
+	fat_cache_destroy();
+	return err;
 }
 
 static void __exit exit_fat_fs(void)
-- 
cgit v1.2.3


From 1c71e22e4e4b4e7261f147635518d5634136c226 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 30 Jun 2005 02:59:02 -0700
Subject: [PATCH] udf_find_entry() cleanup

udf_find_entry can never be called with a NULL argument, so we shouldn't
check for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/namei.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4673157b262..ac191ed7df0 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -164,11 +164,7 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
 	uint32_t extoffset, elen, offset;
 	struct buffer_head *bh = NULL;
 
-	if (!dir)
-		return NULL;
-
 	size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
-
 	f_pos = (udf_ext0_offset(dir) >> 2);
 
 	fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2;
-- 
cgit v1.2.3


From ba03bda81e160b2724451074cdcb597d14f7d7e0 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 30 Jun 2005 02:59:04 -0700
Subject: [PATCH] freevxfs: fix buffer_head leak

- fix a buffer_head leak in vxfs_getfsh()

- s/SLAB_KERNEL/GFP_KERNEL/

- check sb_bread() return value

- drop pointless buffer-mapped() test.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/freevxfs/vxfs_fshead.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 05b19f70bf9..6dee109aeea 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -78,17 +78,18 @@ vxfs_getfsh(struct inode *ip, int which)
 	struct buffer_head		*bp;
 
 	bp = vxfs_bread(ip, which);
-	if (buffer_mapped(bp)) {
+	if (bp) {
 		struct vxfs_fsh		*fhp;
 
-		if (!(fhp = kmalloc(sizeof(*fhp), SLAB_KERNEL)))
-			return NULL;
+		if (!(fhp = kmalloc(sizeof(*fhp), GFP_KERNEL)))
+			goto out;
 		memcpy(fhp, bp->b_data, sizeof(*fhp));
 
-		brelse(bp);
+		put_bh(bp);
 		return (fhp);
 	}
-
+out:
+	brelse(bp);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 1d2cc3b87b1694df72ab75cee8e12f8b369577ce Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 30 Jun 2005 02:59:05 -0700
Subject: [PATCH] freevxfs: remove 2.4 compatability

This patch removes 2.4 compatability header from freevxfs.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/freevxfs/vxfs.h         |  1 -
 fs/freevxfs/vxfs_kcompat.h | 49 ----------------------------------------------
 fs/freevxfs/vxfs_subr.c    |  1 -
 3 files changed, 51 deletions(-)
 delete mode 100644 fs/freevxfs/vxfs_kcompat.h

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index 8da0252642a..583bd78086d 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -37,7 +37,6 @@
  * superblocks of the Veritas Filesystem.
  */
 #include <linux/types.h>
-#include "vxfs_kcompat.h"
 
 
 /*
diff --git a/fs/freevxfs/vxfs_kcompat.h b/fs/freevxfs/vxfs_kcompat.h
deleted file mode 100644
index 342a4cc860f..00000000000
--- a/fs/freevxfs/vxfs_kcompat.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _VXFS_KCOMPAT_H
-#define _VXFS_KCOMPAT_H
-
-#include <linux/version.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-#include <linux/blkdev.h>
-
-typedef long sector_t;
-
-/* From include/linux/fs.h (Linux 2.5.2-pre3)  */
-static inline struct buffer_head * sb_bread(struct super_block *sb, int block)
-{
-	return bread(sb->s_dev, block, sb->s_blocksize);
-}
-
-/* Dito.  */
-static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int block)
-{
-	bh->b_state |= 1 << BH_Mapped;
-	bh->b_dev = sb->s_dev;
-	bh->b_blocknr = block;
-}
-
-/* From fs/block_dev.c (Linux 2.5.2-pre2)  */
-static inline int sb_set_blocksize(struct super_block *sb, int size)
-{
-	int bits;
-	if (set_blocksize(sb->s_dev, size) < 0)
-		return 0;
-	sb->s_blocksize = size;
-	for (bits = 9, size >>= 9; size >>= 1; bits++)
-		;
-	sb->s_blocksize_bits = bits;
-	return sb->s_blocksize;
-}
-
-/* Dito.  */
-static inline int sb_min_blocksize(struct super_block *sb, int size)
-{
-	int minsize = get_hardsect_size(sb->s_dev);
-	if (size < minsize)
-		size = minsize;
-	return sb_set_blocksize(sb, size);
-}
-
-#endif /* Kernel 2.4 */
-#endif /* _VXFS_KCOMPAT_H */
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5e305612054..50aae77651b 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -36,7 +36,6 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 
-#include "vxfs_kcompat.h"
 #include "vxfs_extern.h"
 
 
-- 
cgit v1.2.3


From 8cb681b9c7fff5cb35b5c05ba4f1b7e285e258fb Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 30 Jun 2005 02:59:05 -0700
Subject: [PATCH] freevxfs: minor cleanups

This patch addresses the following minor issues:

  - Typo in printk
  - Redundant casts
  - Use C99 struct initializers instead of memset
  - Parenthesis around return value
  - Use inline instead of __inline__

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/freevxfs/vxfs_bmap.c   |  2 +-
 fs/freevxfs/vxfs_lookup.c |  8 ++++----
 fs/freevxfs/vxfs_olt.c    | 10 +++++-----
 fs/freevxfs/vxfs_super.c  |  7 +++----
 4 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index bc4b57da306..d3f6b2835bc 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -101,7 +101,7 @@ vxfs_bmap_ext4(struct inode *ip, long bn)
 	return 0;
 
 fail_size:
-	printk("vxfs: indirect extent to big!\n");
+	printk("vxfs: indirect extent too big!\n");
 fail_buf:
 	return 0;
 }
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 506ae251d2c..554eb455722 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -61,13 +61,13 @@ struct file_operations vxfs_dir_operations = {
 };
 
  
-static __inline__ u_long
+static inline u_long
 dir_pages(struct inode *inode)
 {
 	return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 }
  
-static __inline__ u_long
+static inline u_long
 dir_blocks(struct inode *ip)
 {
 	u_long			bsize = ip->i_sb->s_blocksize;
@@ -79,7 +79,7 @@ dir_blocks(struct inode *ip)
  *
  * len <= VXFS_NAMELEN and de != NULL are guaranteed by caller.
  */
-static __inline__ int
+static inline int
 vxfs_match(int len, const char * const name, struct vxfs_direct *de)
 {
 	if (len != de->d_namelen)
@@ -89,7 +89,7 @@ vxfs_match(int len, const char * const name, struct vxfs_direct *de)
 	return !memcmp(name, de->d_name, len);
 }
 
-static __inline__ struct vxfs_direct *
+static inline struct vxfs_direct *
 vxfs_next_entry(struct vxfs_direct *de)
 {
 	return ((struct vxfs_direct *)((char*)de + de->d_reclen));
diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c
index 7a204e31aad..133476201d8 100644
--- a/fs/freevxfs/vxfs_olt.c
+++ b/fs/freevxfs/vxfs_olt.c
@@ -38,7 +38,7 @@
 #include "vxfs_olt.h"
 
 
-static __inline__ void
+static inline void
 vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
 {
 	if (infp->vsi_fshino)
@@ -46,7 +46,7 @@ vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
 	infp->vsi_fshino = fshp->olt_fsino[0];
 }
 
-static __inline__ void
+static inline void
 vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
 {
 	if (infp->vsi_iext)
@@ -54,7 +54,7 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
 	infp->vsi_iext = ilistp->olt_iext[0]; 
 }
 
-static __inline__ u_long
+static inline u_long
 vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize)
 {
 	if (sbp->s_blocksize % bsize)
@@ -104,8 +104,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize)
 		goto fail;
 	}
 
-	oaddr = (char *)bp->b_data + op->olt_size;
-	eaddr = (char *)bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize);
+	oaddr = bp->b_data + op->olt_size;
+	eaddr = bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize);
 
 	while (oaddr < eaddr) {
 		struct vxfs_oltcommon	*ocp =
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 0ae2c7b8182..27f66d3e8a0 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,12 +155,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 
 	sbp->s_flags |= MS_RDONLY;
 
-	infp = kmalloc(sizeof(*infp), GFP_KERNEL);
+	infp = kcalloc(1, sizeof(*infp), GFP_KERNEL);
 	if (!infp) {
 		printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
 		return -ENOMEM;
 	}
-	memset(infp, 0, sizeof(*infp));
 
 	bsize = sb_min_blocksize(sbp, BLOCK_SIZE);
 	if (!bsize) {
@@ -196,7 +195,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 #endif
 
 	sbp->s_magic = rsbp->vs_magic;
-	sbp->s_fs_info = (void *)infp;
+	sbp->s_fs_info = infp;
 
 	infp->vsi_raw = rsbp;
 	infp->vsi_bp = bp;
@@ -263,7 +262,7 @@ vxfs_init(void)
 			sizeof(struct vxfs_inode_info), 0, 
 			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
 	if (vxfs_inode_cachep)
-		return (register_filesystem(&vxfs_fs_type));
+		return register_filesystem(&vxfs_fs_type);
 	return -ENOMEM;
 }
 
-- 
cgit v1.2.3


From c60e81ee1cac32dae1f9bf623dcb6b3b2bde8eab Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 30 Jun 2005 02:59:06 -0700
Subject: [PATCH] reiserfs: handle_attrs() fix

Fix a use-uninitialised bug.

Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d50a5cd860c..4b80ab95d33 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1053,10 +1053,9 @@ static void handle_barrier_mode(struct super_block *s, unsigned long bits) {
 
 static void handle_attrs( struct super_block *s )
 {
-	struct reiserfs_super_block * rs;
+	struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
 
 	if( reiserfs_attrs( s ) ) {
-		rs = SB_DISK_SUPER_BLOCK (s);
 		if( old_format_only(s) ) {
 			reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" );
 			REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS );
-- 
cgit v1.2.3


From ef6689eff4b58273fed9e54293a3da983b321e9a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 30 Jun 2005 22:13:14 -0700
Subject: [PATCH] fatfs sectioning fix

Fixup for the recent slab leak fix

Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/cache.c | 2 +-
 fs/fat/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 7c52e465a61..77c24fcf712 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -56,7 +56,7 @@ int __init fat_cache_init(void)
 	return 0;
 }
 
-void __exit fat_cache_destroy(void)
+void fat_cache_destroy(void)
 {
 	if (kmem_cache_destroy(fat_cache_cachep))
 		printk(KERN_INFO "fat_cache: not all structures were freed\n");
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3e31c4a736f..96ae85b67eb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1327,7 +1327,7 @@ out_fail:
 EXPORT_SYMBOL(fat_fill_super);
 
 int __init fat_cache_init(void);
-void __exit fat_cache_destroy(void);
+void fat_cache_destroy(void);
 
 static int __init init_fat_fs(void)
 {
-- 
cgit v1.2.3