454 files changed, 30411 insertions, 18175 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 00000000000..e847f504a47
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
+/*
+ *  linux/fs/9p/9p.c
+ *
+ *  This file contains functions 9P2000 functions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+	       char *version, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+	msg.id = TVERSION;
+	msg.params.tversion.msize = msize;
+	msg.params.tversion.version = version;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+	      u32 fid, u32 afid, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+		aname, fid, afid);
+	msg.id = TATTACH;
+	msg.params.tattach.fid = fid;
+	msg.params.tattach.afid = afid;
+	msg.params.tattach.uname = uname;
+	msg.params.tattach.aname = aname;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	msg.id = TCLUNK;
+	msg.params.tclunk.fid = fid;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tid to release
+ *
+ */
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "oldtag %d\n", tag);
+	msg.id = TFLUSH;
+	msg.params.tflush.oldtag = tag;
+	return v9fs_mux_rpc(v9ses, &msg, NULL);
+}
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	if (fcall)
+		*fcall = NULL;
+
+	msg.id = TSTAT;
+	msg.params.tstat.fid = fid;
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
+	msg.id = TWSTAT;
+	msg.params.twstat.fid = fid;
+	msg.params.twstat.stat = stat;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+	    char *name, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+	msg.id = TWALK;
+	msg.params.twalk.fid = fid;
+	msg.params.twalk.newfid = newfid;
+
+	if (name) {
+		msg.params.twalk.nwname = 1;
+		msg.params.twalk.wnames = &name;
+	} else {
+		msg.params.twalk.nwname = 0;
+	}
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+	    struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+	msg.id = TOPEN;
+	msg.params.topen.fid = fid;
+	msg.params.topen.mode = mode;
+
+	errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+
+	return errorno;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+	      struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	msg.id = TREMOVE;
+	msg.params.tremove.fid = fid;
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+	      u32 perm, u8 mode, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+		fid, name, perm, mode);
+
+	msg.id = TCREATE;
+	msg.params.tcreate.fid = fid;
+	msg.params.tcreate.name = name;
+	msg.params.tcreate.perm = perm;
+	msg.params.tcreate.mode = mode;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+	    u32 count, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	struct v9fs_fcall *rc = NULL;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
+		(long unsigned int)offset, count);
+	msg.id = TREAD;
+	msg.params.tread.fid = fid;
+	msg.params.tread.offset = offset;
+	msg.params.tread.count = count;
+	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+	if (!errorno) {
+		errorno = rc->params.rread.count;
+		dump_data(rc->params.rread.data, rc->params.rread.count);
+	}
+
+	if (fcall)
+		*fcall = rc;
+	else
+		kfree(rc);
+
+	return errorno;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
+	     u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	struct v9fs_fcall *rc = NULL;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
+		(unsigned long long)offset, count);
+	dump_data(data, count);
+
+	msg.id = TWRITE;
+	msg.params.twrite.fid = fid;
+	msg.params.twrite.offset = offset;
+	msg.params.twrite.count = count;
+	msg.params.twrite.data = data;
+
+	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+	if (!errorno)
+		errorno = rc->params.rwrite.count;
+
+	if (fcall)
+		*fcall = rc;
+	else
+		kfree(rc);
+
+	return errorno;
+}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 00000000000..f55424216be
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
+/*
+ * linux/fs/9p/9p.h
+ *
+ * 9P protocol definitions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* Message Types */
+enum {
+	TVERSION = 100,
+	RVERSION,
+	TAUTH = 102,
+	RAUTH,
+	TATTACH = 104,
+	RATTACH,
+	TERROR = 106,
+	RERROR,
+	TFLUSH = 108,
+	RFLUSH,
+	TWALK = 110,
+	RWALK,
+	TOPEN = 112,
+	ROPEN,
+	TCREATE = 114,
+	RCREATE,
+	TREAD = 116,
+	RREAD,
+	TWRITE = 118,
+	RWRITE,
+	TCLUNK = 120,
+	RCLUNK,
+	TREMOVE = 122,
+	RREMOVE,
+	TSTAT = 124,
+	RSTAT,
+	TWSTAT = 126,
+	RWSTAT,
+};
+
+/* modes */
+enum {
+	V9FS_OREAD = 0x00,
+	V9FS_OWRITE = 0x01,
+	V9FS_ORDWR = 0x02,
+	V9FS_OEXEC = 0x03,
+	V9FS_OEXCL = 0x04,
+	V9FS_OTRUNC = 0x10,
+	V9FS_OREXEC = 0x20,
+	V9FS_ORCLOSE = 0x40,
+	V9FS_OAPPEND = 0x80,
+};
+
+/* permissions */
+enum {
+	V9FS_DMDIR = 0x80000000,
+	V9FS_DMAPPEND = 0x40000000,
+	V9FS_DMEXCL = 0x20000000,
+	V9FS_DMMOUNT = 0x10000000,
+	V9FS_DMAUTH = 0x08000000,
+	V9FS_DMTMP = 0x04000000,
+	V9FS_DMSYMLINK = 0x02000000,
+	V9FS_DMLINK = 0x01000000,
+	/* 9P2000.u extensions */
+	V9FS_DMDEVICE = 0x00800000,
+	V9FS_DMNAMEDPIPE = 0x00200000,
+	V9FS_DMSOCKET = 0x00100000,
+	V9FS_DMSETUID = 0x00080000,
+	V9FS_DMSETGID = 0x00040000,
+};
+
+/* qid.types */
+enum {
+	V9FS_QTDIR = 0x80,
+	V9FS_QTAPPEND = 0x40,
+	V9FS_QTEXCL = 0x20,
+	V9FS_QTMOUNT = 0x10,
+	V9FS_QTAUTH = 0x08,
+	V9FS_QTTMP = 0x04,
+	V9FS_QTSYMLINK = 0x02,
+	V9FS_QTLINK = 0x01,
+	V9FS_QTFILE = 0x00,
+};
+
+/* ample room for Twrite/Rread header (iounit) */
+#define V9FS_IOHDRSZ	24
+
+/* qids are the unique ID for a file (like an inode */
+struct v9fs_qid {
+	u8 type;
+	u32 version;
+	u64 path;
+};
+
+/* Plan 9 file metadata (stat) structure */
+struct v9fs_stat {
+	u16 size;
+	u16 type;
+	u32 dev;
+	struct v9fs_qid qid;
+	u32 mode;
+	u32 atime;
+	u32 mtime;
+	u64 length;
+	char *name;
+	char *uid;
+	char *gid;
+	char *muid;
+	char *extension;	/* 9p2000.u extensions */
+	u32 n_uid;		/* 9p2000.u extensions */
+	u32 n_gid;		/* 9p2000.u extensions */
+	u32 n_muid;		/* 9p2000.u extensions */
+	char data[0];
+};
+
+/* Structures for Protocol Operations */
+
+struct Tversion {
+	u32 msize;
+	char *version;
+};
+
+struct Rversion {
+	u32 msize;
+	char *version;
+};
+
+struct Tauth {
+	u32 afid;
+	char *uname;
+	char *aname;
+};
+
+struct Rauth {
+	struct v9fs_qid qid;
+};
+
+struct Rerror {
+	char *error;
+	u32 errno;		/* 9p2000.u extension */
+};
+
+struct Tflush {
+	u32 oldtag;
+};
+
+struct Rflush {
+};
+
+struct Tattach {
+	u32 fid;
+	u32 afid;
+	char *uname;
+	char *aname;
+};
+
+struct Rattach {
+	struct v9fs_qid qid;
+};
+
+struct Twalk {
+	u32 fid;
+	u32 newfid;
+	u32 nwname;
+	char **wnames;
+};
+
+struct Rwalk {
+	u32 nwqid;
+	struct v9fs_qid *wqids;
+};
+
+struct Topen {
+	u32 fid;
+	u8 mode;
+};
+
+struct Ropen {
+	struct v9fs_qid qid;
+	u32 iounit;
+};
+
+struct Tcreate {
+	u32 fid;
+	char *name;
+	u32 perm;
+	u8 mode;
+};
+
+struct Rcreate {
+	struct v9fs_qid qid;
+	u32 iounit;
+};
+
+struct Tread {
+	u32 fid;
+	u64 offset;
+	u32 count;
+};
+
+struct Rread {
+	u32 count;
+	u8 *data;
+};
+
+struct Twrite {
+	u32 fid;
+	u64 offset;
+	u32 count;
+	u8 *data;
+};
+
+struct Rwrite {
+	u32 count;
+};
+
+struct Tclunk {
+	u32 fid;
+};
+
+struct Rclunk {
+};
+
+struct Tremove {
+	u32 fid;
+};
+
+struct Rremove {
+};
+
+struct Tstat {
+	u32 fid;
+};
+
+struct Rstat {
+	struct v9fs_stat *stat;
+};
+
+struct Twstat {
+	u32 fid;
+	struct v9fs_stat *stat;
+};
+
+struct Rwstat {
+};
+
+/*
+  * fcall is the primary packet structure
+  *
+  */
+
+struct v9fs_fcall {
+	u32 size;
+	u8 id;
+	u16 tag;
+
+	union {
+		struct Tversion tversion;
+		struct Rversion rversion;
+		struct Tauth tauth;
+		struct Rauth rauth;
+		struct Rerror rerror;
+		struct Tflush tflush;
+		struct Rflush rflush;
+		struct Tattach tattach;
+		struct Rattach rattach;
+		struct Twalk twalk;
+		struct Rwalk rwalk;
+		struct Topen topen;
+		struct Ropen ropen;
+		struct Tcreate tcreate;
+		struct Rcreate rcreate;
+		struct Tread tread;
+		struct Rread rread;
+		struct Twrite twrite;
+		struct Rwrite rwrite;
+		struct Tclunk tclunk;
+		struct Rclunk rclunk;
+		struct Tremove tremove;
+		struct Rremove rremove;
+		struct Tstat tstat;
+		struct Rstat rstat;
+		struct Twstat twstat;
+		struct Rwstat rwstat;
+	} params;
+};
+
+#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+
+int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+		   char *version, struct v9fs_fcall **rcall);
+
+int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+		  u32 fid, u32 afid, struct v9fs_fcall **rcall);
+
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+		 struct v9fs_fcall **rcall);
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
+
+int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
+		struct v9fs_fcall **rcall);
+
+int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+		 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+
+int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+		char *name, struct v9fs_fcall **rcall);
+
+int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+		struct v9fs_fcall **rcall);
+
+int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+		  struct v9fs_fcall **rcall);
+
+int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+		  u32 perm, u8 mode, struct v9fs_fcall **rcall);
+
+int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
+		u64 offset, u32 count, struct v9fs_fcall **rcall);
+
+int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+		 u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 00000000000..e4e4ffe5a7d
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_9P_FS) := 9p2000.o
+
+9p2000-objs := \
+	vfs_super.o \
+	vfs_inode.o \
+	vfs_file.o \
+	vfs_dir.o \
+	vfs_dentry.o \
+	error.o \
+	mux.o \
+	trans_fd.o \
+	trans_sock.o \
+	9p.o \
+	conv.o \
+	v9fs.o \
+	fid.o
+
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 00000000000..18121af99d3
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,708 @@
+/*
+ * linux/fs/9p/conv.c
+ *
+ * 9P protocol conversion functions
+ *
+ *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+
+/*
+ * Buffer to help with string parsing
+ */
+struct cbuf {
+	unsigned char *sp;
+	unsigned char *p;
+	unsigned char *ep;
+};
+
+static inline void buf_init(struct cbuf *buf, void *data, int datalen)
+{
+	buf->sp = buf->p = data;
+	buf->ep = data + datalen;
+}
+
+static inline int buf_check_overflow(struct cbuf *buf)
+{
+	return buf->p > buf->ep;
+}
+
+static inline int buf_check_size(struct cbuf *buf, int len)
+{
+	if (buf->p+len > buf->ep) {
+		if (buf->p < buf->ep) {
+			eprintk(KERN_ERR, "buffer overflow\n");
+			buf->p = buf->ep + 1;
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static inline void *buf_alloc(struct cbuf *buf, int len)
+{
+	void *ret = NULL;
+
+	if (buf_check_size(buf, len)) {
+		ret = buf->p;
+		buf->p += len;
+	}
+
+	return ret;
+}
+
+static inline void buf_put_int8(struct cbuf *buf, u8 val)
+{
+	if (buf_check_size(buf, 1)) {
+		buf->p[0] = val;
+		buf->p++;
+	}
+}
+
+static inline void buf_put_int16(struct cbuf *buf, u16 val)
+{
+	if (buf_check_size(buf, 2)) {
+		*(__le16 *) buf->p = cpu_to_le16(val);
+		buf->p += 2;
+	}
+}
+
+static inline void buf_put_int32(struct cbuf *buf, u32 val)
+{
+	if (buf_check_size(buf, 4)) {
+		*(__le32 *)buf->p = cpu_to_le32(val);
+		buf->p += 4;
+	}
+}
+
+static inline void buf_put_int64(struct cbuf *buf, u64 val)
+{
+	if (buf_check_size(buf, 8)) {
+		*(__le64 *)buf->p = cpu_to_le64(val);
+		buf->p += 8;
+	}
+}
+
+static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
+{
+	if (buf_check_size(buf, slen + 2)) {
+		buf_put_int16(buf, slen);
+		memcpy(buf->p, s, slen);
+		buf->p += slen;
+	}
+}
+
+static inline void buf_put_string(struct cbuf *buf, const char *s)
+{
+	buf_put_stringn(buf, s, strlen(s));
+}
+
+static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
+{
+	if (buf_check_size(buf, datalen)) {
+		memcpy(buf->p, data, datalen);
+		buf->p += datalen;
+	}
+}
+
+static inline u8 buf_get_int8(struct cbuf *buf)
+{
+	u8 ret = 0;
+
+	if (buf_check_size(buf, 1)) {
+		ret = buf->p[0];
+		buf->p++;
+	}
+
+	return ret;
+}
+
+static inline u16 buf_get_int16(struct cbuf *buf)
+{
+	u16 ret = 0;
+
+	if (buf_check_size(buf, 2)) {
+		ret = le16_to_cpu(*(__le16 *)buf->p);
+		buf->p += 2;
+	}
+
+	return ret;
+}
+
+static inline u32 buf_get_int32(struct cbuf *buf)
+{
+	u32 ret = 0;
+
+	if (buf_check_size(buf, 4)) {
+		ret = le32_to_cpu(*(__le32 *)buf->p);
+		buf->p += 4;
+	}
+
+	return ret;
+}
+
+static inline u64 buf_get_int64(struct cbuf *buf)
+{
+	u64 ret = 0;
+
+	if (buf_check_size(buf, 8)) {
+		ret = le64_to_cpu(*(__le64 *)buf->p);
+		buf->p += 8;
+	}
+
+	return ret;
+}
+
+static inline int
+buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
+{
+	u16 len = 0;
+
+	len = buf_get_int16(buf);
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
+		memcpy(data, buf->p, len);
+		data[len] = 0;
+		buf->p += len;
+		len++;
+	}
+
+	return len;
+}
+
+static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
+{
+	char *ret;
+	u16 len;
+
+	ret = NULL;
+	len = buf_get_int16(buf);
+
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
+		buf_check_size(sbuf, len+1)) {
+
+		memcpy(sbuf->p, buf->p, len);
+		sbuf->p[len] = 0;
+		ret = sbuf->p;
+		buf->p += len;
+		sbuf->p += len + 1;
+	}
+
+	return ret;
+}
+
+static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+{
+	int ret = 0;
+
+	if (buf_check_size(buf, datalen)) {
+		memcpy(data, buf->p, datalen);
+		buf->p += datalen;
+		ret = datalen;
+	}
+
+	return ret;
+}
+
+static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
+				  int datalen)
+{
+	char *ret = NULL;
+	int n = 0;
+
+	if (buf_check_size(dbuf, datalen)) {
+		n = buf_get_data(buf, dbuf->p, datalen);
+		if (n > 0) {
+			ret = dbuf->p;
+			dbuf->p += n;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * v9fs_size_stat - calculate the size of a variable length stat struct
+ * @v9ses: session information
+ * @stat: metadata (stat) structure
+ *
+ */
+
+static int v9fs_size_stat(struct v9fs_session_info *v9ses,
+			  struct v9fs_stat *stat)
+{
+	int size = 0;
+
+	if (stat == NULL) {
+		eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
+		return 0;
+	}
+
+	size =			/* 2 + *//* size[2] */
+	    2 +			/* type[2] */
+	    4 +			/* dev[4] */
+	    1 +			/* qid.type[1] */
+	    4 +			/* qid.vers[4] */
+	    8 +			/* qid.path[8] */
+	    4 +			/* mode[4] */
+	    4 +			/* atime[4] */
+	    4 +			/* mtime[4] */
+	    8 +			/* length[8] */
+	    8;			/* minimum sum of string lengths */
+
+	if (stat->name)
+		size += strlen(stat->name);
+	if (stat->uid)
+		size += strlen(stat->uid);
+	if (stat->gid)
+		size += strlen(stat->gid);
+	if (stat->muid)
+		size += strlen(stat->muid);
+
+	if (v9ses->extended) {
+		size += 4 +	/* n_uid[4] */
+		    4 +		/* n_gid[4] */
+		    4 +		/* n_muid[4] */
+		    2;		/* string length of extension[4] */
+		if (stat->extension)
+			size += strlen(stat->extension);
+	}
+
+	return size;
+}
+
+/**
+ * serialize_stat - safely format a stat structure for transmission
+ * @v9ses: session info
+ * @stat: metadata (stat) structure
+ * @bufp: buffer to serialize structure into
+ *
+ */
+
+static int
+serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
+	       struct cbuf *bufp)
+{
+	buf_put_int16(bufp, stat->size);
+	buf_put_int16(bufp, stat->type);
+	buf_put_int32(bufp, stat->dev);
+	buf_put_int8(bufp, stat->qid.type);
+	buf_put_int32(bufp, stat->qid.version);
+	buf_put_int64(bufp, stat->qid.path);
+	buf_put_int32(bufp, stat->mode);
+	buf_put_int32(bufp, stat->atime);
+	buf_put_int32(bufp, stat->mtime);
+	buf_put_int64(bufp, stat->length);
+
+	buf_put_string(bufp, stat->name);
+	buf_put_string(bufp, stat->uid);
+	buf_put_string(bufp, stat->gid);
+	buf_put_string(bufp, stat->muid);
+
+	if (v9ses->extended) {
+		buf_put_string(bufp, stat->extension);
+		buf_put_int32(bufp, stat->n_uid);
+		buf_put_int32(bufp, stat->n_gid);
+		buf_put_int32(bufp, stat->n_muid);
+	}
+
+	if (buf_check_overflow(bufp))
+		return 0;
+
+	return stat->size;
+}
+
+/**
+ * deserialize_stat - safely decode a recieved metadata (stat) structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @stat: metadata (stat) structure
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline int
+deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
+		 struct v9fs_stat *stat, struct cbuf *dbufp)
+{
+
+	stat->size = buf_get_int16(bufp);
+	stat->type = buf_get_int16(bufp);
+	stat->dev = buf_get_int32(bufp);
+	stat->qid.type = buf_get_int8(bufp);
+	stat->qid.version = buf_get_int32(bufp);
+	stat->qid.path = buf_get_int64(bufp);
+	stat->mode = buf_get_int32(bufp);
+	stat->atime = buf_get_int32(bufp);
+	stat->mtime = buf_get_int32(bufp);
+	stat->length = buf_get_int64(bufp);
+	stat->name = buf_get_stringb(bufp, dbufp);
+	stat->uid = buf_get_stringb(bufp, dbufp);
+	stat->gid = buf_get_stringb(bufp, dbufp);
+	stat->muid = buf_get_stringb(bufp, dbufp);
+
+	if (v9ses->extended) {
+		stat->extension = buf_get_stringb(bufp, dbufp);
+		stat->n_uid = buf_get_int32(bufp);
+		stat->n_gid = buf_get_int32(bufp);
+		stat->n_muid = buf_get_int32(bufp);
+	}
+
+	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+		return 0;
+
+	return stat->size + 2;
+}
+
+/**
+ * deserialize_statb - wrapper for decoding a received metadata structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
+						  *v9ses, struct cbuf *bufp,
+						  struct cbuf *dbufp)
+{
+	struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
+
+	if (ret) {
+		int n = deserialize_stat(v9ses, bufp, ret, dbufp);
+		if (n <= 0)
+			return NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * v9fs_deserialize_stat - decode a received metadata structure
+ * @v9ses: session info
+ * @buf: buffer to deserialize
+ * @buflen: length of received buffer
+ * @stat: metadata structure to decode into
+ * @statlen: length of destination metadata structure
+ *
+ */
+
+int
+v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
+		      u32 buflen, struct v9fs_stat *stat, u32 statlen)
+{
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+	struct cbuf dbuffer;
+	struct cbuf *dbufp = &dbuffer;
+
+	buf_init(bufp, buf, buflen);
+	buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
+		 statlen - sizeof(struct v9fs_stat));
+
+	return deserialize_stat(v9ses, bufp, stat, dbufp);
+}
+
+static inline int
+v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
+{
+	int size = 4 + 1 + 2;	/* size[4] msg[1] tag[2] */
+	int i = 0;
+
+	switch (fcall->id) {
+	default:
+		eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
+		return 0;
+	case TVERSION:		/* msize[4] version[s] */
+		size += 4 + 2 + strlen(fcall->params.tversion.version);
+		break;
+	case TAUTH:		/* afid[4] uname[s] aname[s] */
+		size += 4 + 2 + strlen(fcall->params.tauth.uname) +
+		    2 + strlen(fcall->params.tauth.aname);
+		break;
+	case TFLUSH:		/* oldtag[2] */
+		size += 2;
+		break;
+	case TATTACH:		/* fid[4] afid[4] uname[s] aname[s] */
+		size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
+		    2 + strlen(fcall->params.tattach.aname);
+		break;
+	case TWALK:		/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+		size += 4 + 4 + 2;
+		/* now compute total for the array of names */
+		for (i = 0; i < fcall->params.twalk.nwname; i++)
+			size += 2 + strlen(fcall->params.twalk.wnames[i]);
+		break;
+	case TOPEN:		/* fid[4] mode[1] */
+		size += 4 + 1;
+		break;
+	case TCREATE:		/* fid[4] name[s] perm[4] mode[1] */
+		size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
+		break;
+	case TREAD:		/* fid[4] offset[8] count[4] */
+		size += 4 + 8 + 4;
+		break;
+	case TWRITE:		/* fid[4] offset[8] count[4] data[count] */
+		size += 4 + 8 + 4 + fcall->params.twrite.count;
+		break;
+	case TCLUNK:		/* fid[4] */
+		size += 4;
+		break;
+	case TREMOVE:		/* fid[4] */
+		size += 4;
+		break;
+	case TSTAT:		/* fid[4] */
+		size += 4;
+		break;
+	case TWSTAT:		/* fid[4] stat[n] */
+		fcall->params.twstat.stat->size =
+		    v9fs_size_stat(v9ses, fcall->params.twstat.stat);
+		size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
+	}
+	return size;
+}
+
+/*
+ * v9fs_serialize_fcall - marshall fcall struct into a packet
+ * @v9ses: session information
+ * @fcall: structure to convert
+ * @data: buffer to serialize fcall into
+ * @datalen: length of buffer to serialize fcall into
+ *
+ */
+
+int
+v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
+		     void *data, u32 datalen)
+{
+	int i = 0;
+	struct v9fs_stat *stat = NULL;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	buf_init(bufp, data, datalen);
+
+	if (!fcall) {
+		eprintk(KERN_ERR, "no fcall\n");
+		return -EINVAL;
+	}
+
+	fcall->size = v9fs_size_fcall(v9ses, fcall);
+
+	buf_put_int32(bufp, fcall->size);
+	buf_put_int8(bufp, fcall->id);
+	buf_put_int16(bufp, fcall->tag);
+
+	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
+		fcall->tag);
+
+	/* now encode it */
+	switch (fcall->id) {
+	default:
+		eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
+		return -EPROTO;
+	case TVERSION:
+		buf_put_int32(bufp, fcall->params.tversion.msize);
+		buf_put_string(bufp, fcall->params.tversion.version);
+		break;
+	case TAUTH:
+		buf_put_int32(bufp, fcall->params.tauth.afid);
+		buf_put_string(bufp, fcall->params.tauth.uname);
+		buf_put_string(bufp, fcall->params.tauth.aname);
+		break;
+	case TFLUSH:
+		buf_put_int16(bufp, fcall->params.tflush.oldtag);
+		break;
+	case TATTACH:
+		buf_put_int32(bufp, fcall->params.tattach.fid);
+		buf_put_int32(bufp, fcall->params.tattach.afid);
+		buf_put_string(bufp, fcall->params.tattach.uname);
+		buf_put_string(bufp, fcall->params.tattach.aname);
+		break;
+	case TWALK:
+		buf_put_int32(bufp, fcall->params.twalk.fid);
+		buf_put_int32(bufp, fcall->params.twalk.newfid);
+		buf_put_int16(bufp, fcall->params.twalk.nwname);
+		for (i = 0; i < fcall->params.twalk.nwname; i++)
+			buf_put_string(bufp, fcall->params.twalk.wnames[i]);
+		break;
+	case TOPEN:
+		buf_put_int32(bufp, fcall->params.topen.fid);
+		buf_put_int8(bufp, fcall->params.topen.mode);
+		break;
+	case TCREATE:
+		buf_put_int32(bufp, fcall->params.tcreate.fid);
+		buf_put_string(bufp, fcall->params.tcreate.name);
+		buf_put_int32(bufp, fcall->params.tcreate.perm);
+		buf_put_int8(bufp, fcall->params.tcreate.mode);
+		break;
+	case TREAD:
+		buf_put_int32(bufp, fcall->params.tread.fid);
+		buf_put_int64(bufp, fcall->params.tread.offset);
+		buf_put_int32(bufp, fcall->params.tread.count);
+		break;
+	case TWRITE:
+		buf_put_int32(bufp, fcall->params.twrite.fid);
+		buf_put_int64(bufp, fcall->params.twrite.offset);
+		buf_put_int32(bufp, fcall->params.twrite.count);
+		buf_put_data(bufp, fcall->params.twrite.data,
+			     fcall->params.twrite.count);
+		break;
+	case TCLUNK:
+		buf_put_int32(bufp, fcall->params.tclunk.fid);
+		break;
+	case TREMOVE:
+		buf_put_int32(bufp, fcall->params.tremove.fid);
+		break;
+	case TSTAT:
+		buf_put_int32(bufp, fcall->params.tstat.fid);
+		break;
+	case TWSTAT:
+		buf_put_int32(bufp, fcall->params.twstat.fid);
+		stat = fcall->params.twstat.stat;
+
+		buf_put_int16(bufp, stat->size + 2);
+		serialize_stat(v9ses, stat, bufp);
+		break;
+	}
+
+	if (buf_check_overflow(bufp))
+		return -EIO;
+
+	return fcall->size;
+}
+
+/**
+ * deserialize_fcall - unmarshal a response
+ * @v9ses: session information
+ * @msgsize: size of rcall message
+ * @buf: recieved buffer
+ * @buflen: length of received buffer
+ * @rcall: fcall structure to populate
+ * @rcalllen: length of fcall structure to populate
+ *
+ */
+
+int
+v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
+		       void *buf, u32 buflen, struct v9fs_fcall *rcall,
+		       int rcalllen)
+{
+
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+	struct cbuf dbuffer;
+	struct cbuf *dbufp = &dbuffer;
+	int i = 0;
+
+	buf_init(bufp, buf, buflen);
+	buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
+		 rcalllen - sizeof(struct v9fs_fcall));
+
+	rcall->size = msgsize;
+	rcall->id = buf_get_int8(bufp);
+	rcall->tag = buf_get_int16(bufp);
+
+	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
+		rcall->tag);
+	switch (rcall->id) {
+	default:
+		eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
+		return -EPROTO;
+	case RVERSION:
+		rcall->params.rversion.msize = buf_get_int32(bufp);
+		rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+		break;
+	case RFLUSH:
+		break;
+	case RATTACH:
+		rcall->params.rattach.qid.type = buf_get_int8(bufp);
+		rcall->params.rattach.qid.version = buf_get_int32(bufp);
+		rcall->params.rattach.qid.path = buf_get_int64(bufp);
+		break;
+	case RWALK:
+		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
+		rcall->params.rwalk.wqids = buf_alloc(dbufp,
+		      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
+		if (rcall->params.rwalk.wqids)
+			for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
+				rcall->params.rwalk.wqids[i].type =
+				    buf_get_int8(bufp);
+				rcall->params.rwalk.wqids[i].version =
+				    buf_get_int16(bufp);
+				rcall->params.rwalk.wqids[i].path =
+				    buf_get_int64(bufp);
+			}
+		break;
+	case ROPEN:
+		rcall->params.ropen.qid.type = buf_get_int8(bufp);
+		rcall->params.ropen.qid.version = buf_get_int32(bufp);
+		rcall->params.ropen.qid.path = buf_get_int64(bufp);
+		rcall->params.ropen.iounit = buf_get_int32(bufp);
+		break;
+	case RCREATE:
+		rcall->params.rcreate.qid.type = buf_get_int8(bufp);
+		rcall->params.rcreate.qid.version = buf_get_int32(bufp);
+		rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+		rcall->params.rcreate.iounit = buf_get_int32(bufp);
+		break;
+	case RREAD:
+		rcall->params.rread.count = buf_get_int32(bufp);
+		rcall->params.rread.data = buf_get_datab(bufp, dbufp,
+			rcall->params.rread.count);
+		break;
+	case RWRITE:
+		rcall->params.rwrite.count = buf_get_int32(bufp);
+		break;
+	case RCLUNK:
+		break;
+	case RREMOVE:
+		break;
+	case RSTAT:
+		buf_get_int16(bufp);
+		rcall->params.rstat.stat =
+		    deserialize_statb(v9ses, bufp, dbufp);
+		break;
+	case RWSTAT:
+		break;
+	case RERROR:
+		rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
+		if (v9ses->extended)
+			rcall->params.rerror.errno = buf_get_int16(bufp);
+		break;
+	}
+
+	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+		return -EIO;
+
+	return rcall->size;
+}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 00000000000..ee849613c61
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
+/*
+ * linux/fs/9p/conv.h
+ *
+ * 9P protocol conversion definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
+			  u32 buflen, struct v9fs_stat *stat, u32 statlen);
+int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
+			 void *buf, u32 buflen);
+int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
+			   void *buf, u32 buflen, struct v9fs_fcall *rcall,
+			   int rcalllen);
+
+/* this one is actually in error.c right now */
+int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 00000000000..4445f06919d
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
+/*
+ *  linux/fs/9p/debug.h - V9FS Debug Definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#define DEBUG_ERROR		(1<<0)
+#define DEBUG_CURRENT		(1<<1)
+#define DEBUG_9P	        (1<<2)
+#define DEBUG_VFS	        (1<<3)
+#define DEBUG_CONV		(1<<4)
+#define DEBUG_MUX		(1<<5)
+#define DEBUG_TRANS		(1<<6)
+#define DEBUG_SLABS	      	(1<<7)
+
+#define DEBUG_DUMP_PKT		0
+
+extern int v9fs_debug_level;
+
+#define dprintk(level, format, arg...) \
+do {  \
+	if((v9fs_debug_level & level)==level) \
+		printk(KERN_NOTICE "-- %s (%d): " \
+		format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#define eprintk(level, format, arg...) \
+do { \
+	printk(level "v9fs: %s (%d): " \
+		format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#if DEBUG_DUMP_PKT
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+	int i, j;
+	int len = datalen;
+
+	printk(KERN_DEBUG "data ");
+	for (i = 0; i < len; i += 4) {
+		for (j = 0; (j < 4) && (i + j < len); j++)
+			printk(KERN_DEBUG "%02x", data[i + j]);
+		printk(KERN_DEBUG " ");
+	}
+	printk(KERN_DEBUG "\n");
+}
+#else				/* DEBUG_DUMP_PKT */
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+
+}
+#endif				/* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 00000000000..fee5d19179c
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
+/*
+ * linux/fs/9p/error.c
+ *
+ * Error string handling
+ *
+ * Plan 9 uses error strings, Unix uses error numbers.  These functions
+ * try to help manage that and provide for dynamically adding error
+ * mappings.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+#include "debug.h"
+#include "error.h"
+
+/**
+ * v9fs_error_init - preload
+ * @errstr: error string
+ *
+ */
+
+int v9fs_error_init(void)
+{
+	struct errormap *c;
+	int bucket;
+
+	/* initialize hash table */
+	for (bucket = 0; bucket < ERRHASHSZ; bucket++)
+		INIT_HLIST_HEAD(&hash_errmap[bucket]);
+
+	/* load initial error map into hash table */
+	for (c = errmap; c->name != NULL; c++) {
+		bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+		INIT_HLIST_NODE(&c->list);
+		hlist_add_head(&c->list, &hash_errmap[bucket]);
+	}
+
+	return 1;
+}
+
+/**
+ * errstr2errno - convert error string to error number
+ * @errstr: error string
+ *
+ */
+
+int v9fs_errstr2errno(char *errstr)
+{
+	int errno = 0;
+	struct hlist_node *p = NULL;
+	struct errormap *c = NULL;
+	int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+
+	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+		if (!strcmp(c->name, errstr)) {
+			errno = c->val;
+			break;
+		}
+	}
+
+	if (errno == 0) {
+		/* TODO: if error isn't found, add it dynamically */
+		printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
+		       errstr);
+		errno = 1;
+	}
+
+	return -errno;
+}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 00000000000..78f89acf7c9
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,178 @@
+/*
+ * linux/fs/9p/error.h
+ *
+ * Huge Nasty Error Table
+ *
+ * Plan 9 uses error strings, Unix uses error numbers.  This table tries to
+ * match UNIX strings and Plan 9 strings to unix error numbers.  It is used
+ * to preload the dynamic error table which can also track user-specific error
+ * strings.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/errno.h>
+#include <asm/errno.h>
+
+struct errormap {
+	char *name;
+	int val;
+
+	struct hlist_node list;
+};
+
+#define ERRHASHSZ		32
+static struct hlist_head hash_errmap[ERRHASHSZ];
+
+/* FixMe - reduce to a reasonable size */
+static struct errormap errmap[] = {
+	{"Operation not permitted", EPERM},
+	{"wstat prohibited", EPERM},
+	{"No such file or directory", ENOENT},
+	{"directory entry not found", ENOENT},
+	{"file not found", ENOENT},
+	{"Interrupted system call", EINTR},
+	{"Input/output error", EIO},
+	{"No such device or address", ENXIO},
+	{"Argument list too long", E2BIG},
+	{"Bad file descriptor", EBADF},
+	{"Resource temporarily unavailable", EAGAIN},
+	{"Cannot allocate memory", ENOMEM},
+	{"Permission denied", EACCES},
+	{"Bad address", EFAULT},
+	{"Block device required", ENOTBLK},
+	{"Device or resource busy", EBUSY},
+	{"File exists", EEXIST},
+	{"Invalid cross-device link", EXDEV},
+	{"No such device", ENODEV},
+	{"Not a directory", ENOTDIR},
+	{"Is a directory", EISDIR},
+	{"Invalid argument", EINVAL},
+	{"Too many open files in system", ENFILE},
+	{"Too many open files", EMFILE},
+	{"Text file busy", ETXTBSY},
+	{"File too large", EFBIG},
+	{"No space left on device", ENOSPC},
+	{"Illegal seek", ESPIPE},
+	{"Read-only file system", EROFS},
+	{"Too many links", EMLINK},
+	{"Broken pipe", EPIPE},
+	{"Numerical argument out of domain", EDOM},
+	{"Numerical result out of range", ERANGE},
+	{"Resource deadlock avoided", EDEADLK},
+	{"File name too long", ENAMETOOLONG},
+	{"No locks available", ENOLCK},
+	{"Function not implemented", ENOSYS},
+	{"Directory not empty", ENOTEMPTY},
+	{"Too many levels of symbolic links", ELOOP},
+	{"No message of desired type", ENOMSG},
+	{"Identifier removed", EIDRM},
+	{"No data available", ENODATA},
+	{"Machine is not on the network", ENONET},
+	{"Package not installed", ENOPKG},
+	{"Object is remote", EREMOTE},
+	{"Link has been severed", ENOLINK},
+	{"Communication error on send", ECOMM},
+	{"Protocol error", EPROTO},
+	{"Bad message", EBADMSG},
+	{"File descriptor in bad state", EBADFD},
+	{"Streams pipe error", ESTRPIPE},
+	{"Too many users", EUSERS},
+	{"Socket operation on non-socket", ENOTSOCK},
+	{"Message too long", EMSGSIZE},
+	{"Protocol not available", ENOPROTOOPT},
+	{"Protocol not supported", EPROTONOSUPPORT},
+	{"Socket type not supported", ESOCKTNOSUPPORT},
+	{"Operation not supported", EOPNOTSUPP},
+	{"Protocol family not supported", EPFNOSUPPORT},
+	{"Network is down", ENETDOWN},
+	{"Network is unreachable", ENETUNREACH},
+	{"Network dropped connection on reset", ENETRESET},
+	{"Software caused connection abort", ECONNABORTED},
+	{"Connection reset by peer", ECONNRESET},
+	{"No buffer space available", ENOBUFS},
+	{"Transport endpoint is already connected", EISCONN},
+	{"Transport endpoint is not connected", ENOTCONN},
+	{"Cannot send after transport endpoint shutdown", ESHUTDOWN},
+	{"Connection timed out", ETIMEDOUT},
+	{"Connection refused", ECONNREFUSED},
+	{"Host is down", EHOSTDOWN},
+	{"No route to host", EHOSTUNREACH},
+	{"Operation already in progress", EALREADY},
+	{"Operation now in progress", EINPROGRESS},
+	{"Is a named type file", EISNAM},
+	{"Remote I/O error", EREMOTEIO},
+	{"Disk quota exceeded", EDQUOT},
+/* errors from fossil, vacfs, and u9fs */
+	{"fid unknown or out of range", EBADF},
+	{"permission denied", EACCES},
+	{"file does not exist", ENOENT},
+	{"authentication failed", ECONNREFUSED},
+	{"bad offset in directory read", ESPIPE},
+	{"bad use of fid", EBADF},
+	{"wstat can't convert between files and directories", EPERM},
+	{"directory is not empty", ENOTEMPTY},
+	{"file exists", EEXIST},
+	{"file already exists", EEXIST},
+	{"file or directory already exists", EEXIST},
+	{"fid already in use", EBADF},
+	{"file in use", ETXTBSY},
+	{"i/o error", EIO},
+	{"file already open for I/O", ETXTBSY},
+	{"illegal mode", EINVAL},
+	{"illegal name", ENAMETOOLONG},
+	{"not a directory", ENOTDIR},
+	{"not a member of proposed group", EPERM},
+	{"not owner", EACCES},
+	{"only owner can change group in wstat", EACCES},
+	{"read only file system", EROFS},
+	{"no access to special file", EPERM},
+	{"i/o count too large", EIO},
+	{"unknown group", EINVAL},
+	{"unknown user", EINVAL},
+	{"bogus wstat buffer", EPROTO},
+	{"exclusive use file already open", EAGAIN},
+	{"corrupted directory entry", EIO},
+	{"corrupted file entry", EIO},
+	{"corrupted block label", EIO},
+	{"corrupted meta data", EIO},
+	{"illegal offset", EINVAL},
+	{"illegal path element", ENOENT},
+	{"root of file system is corrupted", EIO},
+	{"corrupted super block", EIO},
+	{"protocol botch", EPROTO},
+	{"file system is full", ENOSPC},
+	{"file is in use", EAGAIN},
+	{"directory entry is not allocated", ENOENT},
+	{"file is read only", EROFS},
+	{"file has been removed", EIDRM},
+	{"only support truncation to zero length", EPERM},
+	{"cannot remove root", EPERM},
+	{"file too big", EFBIG},
+	{"venti i/o error", EIO},
+	/* these are not errors */
+	{"u9fs rhostsauth: no authentication required", 0},
+	{"u9fs authnone: no authentication required", 0},
+	{NULL, -1}
+};
+
+extern int v9fs_error_init(void);
+extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 00000000000..d95f8626d17
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,255 @@
+/*
+ * V9FS FID Management
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_fid_insert - add a fid to a dentry
+ * @fid: fid to add
+ * @dentry: dentry that it is being added to
+ *
+ */
+
+static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+{
+	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+	dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
+		dentry->d_iname, dentry);
+	if (dentry->d_fsdata == NULL) {
+		dentry->d_fsdata =
+		    kmalloc(sizeof(struct list_head), GFP_KERNEL);
+		if (dentry->d_fsdata == NULL) {
+			dprintk(DEBUG_ERROR, "Out of memory\n");
+			return -ENOMEM;
+		}
+		fid_list = (struct list_head *)dentry->d_fsdata;
+		INIT_LIST_HEAD(fid_list);	/* Initialize list head */
+	}
+
+	fid->uid = current->uid;
+	fid->pid = current->pid;
+	list_add(&fid->list, fid_list);
+	return 0;
+}
+
+/**
+ * v9fs_fid_create - allocate a FID structure
+ * @dentry - dentry to link newly created fid to
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_create(struct dentry *dentry,
+	struct v9fs_session_info *v9ses, int fid, int create)
+{
+	struct v9fs_fid *new;
+
+	dprintk(DEBUG_9P, "fid create dentry %p, fid %d, create %d\n",
+		dentry, fid, create);
+
+	new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+	if (new == NULL) {
+		dprintk(DEBUG_ERROR, "Out of Memory\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	new->fid = fid;
+	new->v9ses = v9ses;
+	new->fidopen = 0;
+	new->fidcreate = create;
+	new->fidclunked = 0;
+	new->iounit = 0;
+	new->rdir_pos = 0;
+	new->rdir_fcall = NULL;
+
+	if (v9fs_fid_insert(new, dentry) == 0)
+		return new;
+	else {
+		dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
+		kfree(new);
+		return NULL;
+	}
+}
+
+/**
+ * v9fs_fid_destroy - deallocate a FID structure
+ * @fid: fid to destroy
+ *
+ */
+
+void v9fs_fid_destroy(struct v9fs_fid *fid)
+{
+	list_del(&fid->list);
+	kfree(fid);
+}
+
+/**
+ * v9fs_fid_walk_up - walks from the process current directory
+ * 	up to the specified dentry.
+ */
+static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
+{
+	int fidnum, cfidnum, err;
+	struct v9fs_fid *cfid;
+	struct dentry *cde;
+	struct v9fs_session_info *v9ses;
+
+	v9ses = v9fs_inode2v9ses(current->fs->pwd->d_inode);
+	cfid = v9fs_fid_lookup(current->fs->pwd);
+	if (cfid == NULL) {
+		dprintk(DEBUG_ERROR, "process cwd doesn't have a fid\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	cfidnum = cfid->fid;
+	cde = current->fs->pwd;
+	/* TODO: take advantage of multiwalk */
+
+	fidnum = v9fs_get_idpool(&v9ses->fidpool);
+	if (fidnum < 0) {
+		dprintk(DEBUG_ERROR, "could not get a new fid num\n");
+		err = -ENOENT;
+		goto clunk_fid;
+	}
+
+	while (cde != dentry) {
+		if (cde == cde->d_parent) {
+			dprintk(DEBUG_ERROR, "can't find dentry\n");
+			err = -ENOENT;
+			goto clunk_fid;
+		}
+
+		err = v9fs_t_walk(v9ses, cfidnum, fidnum, "..", NULL);
+		if (err < 0) {
+			dprintk(DEBUG_ERROR, "problem walking to parent\n");
+			goto clunk_fid;
+		}
+
+		cfidnum = fidnum;
+		cde = cde->d_parent;
+	}
+
+	return v9fs_fid_create(dentry, v9ses, fidnum, 0);
+
+clunk_fid:
+	v9fs_t_clunk(v9ses, fidnum, NULL);
+	return ERR_PTR(err);
+}
+
+/**
+ * v9fs_fid_lookup - retrieve the right fid from a  particular dentry
+ * @dentry: dentry to look for fid in
+ * @type: intent of lookup (operation or traversal)
+ *
+ * search list of fids associated with a dentry for a fid with a matching
+ * thread id or uid.  If that fails, look up the dentry's parents to see if you
+ * can find a matching fid.
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
+{
+	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+	struct v9fs_fid *current_fid = NULL;
+	struct v9fs_fid *temp = NULL;
+	struct v9fs_fid *return_fid = NULL;
+
+	dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+	if (fid_list) {
+		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+			if (!current_fid->fidcreate) {
+				return_fid = current_fid;
+				break;
+			}
+		}
+
+		if (!return_fid)
+			return_fid = current_fid;
+	}
+
+	/* we are at the root but didn't match */
+	if ((!return_fid) && (dentry->d_parent == dentry)) {
+		/* TODO: clone attach with new uid */
+		return_fid = current_fid;
+	}
+
+	if (!return_fid) {
+		struct dentry *par = current->fs->pwd->d_parent;
+		int count = 1;
+		while (par != NULL) {
+			if (par == dentry)
+				break;
+			count++;
+			if (par == par->d_parent) {
+				dprintk(DEBUG_ERROR,
+					"got to root without finding dentry\n");
+				break;
+			}
+			par = par->d_parent;
+		}
+
+/* XXX - there may be some duplication we can get rid of */
+		if (par == dentry) {
+			return_fid = v9fs_fid_walk_up(dentry);
+			if (IS_ERR(return_fid))
+				return_fid = NULL;
+		}
+	}
+
+	return return_fid;
+}
+
+struct v9fs_fid *v9fs_fid_get_created(struct dentry *dentry)
+{
+	struct list_head *fid_list;
+	struct v9fs_fid *fid, *ftmp, *ret;
+
+	dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	fid_list = (struct list_head *)dentry->d_fsdata;
+	ret = NULL;
+	if (fid_list) {
+		list_for_each_entry_safe(fid, ftmp, fid_list, list) {
+			if (fid->fidcreate && fid->pid == current->pid) {
+				list_del(&fid->list);
+				ret = fid;
+				break;
+			}
+		}
+	}
+
+	dprintk(DEBUG_9P, "return %p\n", ret);
+	return ret;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 00000000000..84c673a44c8
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,60 @@
+/*
+ * V9FS FID Management
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/list.h>
+
+#define FID_OP   0
+#define FID_WALK 1
+#define FID_CREATE 2
+
+struct v9fs_fid {
+	struct list_head list;	 /* list of fids associated with a dentry */
+	struct list_head active; /* XXX - debug */
+
+	u32 fid;
+	unsigned char fidopen;	  /* set when fid is opened */
+	unsigned char fidcreate;  /* set when fid was just created */
+	unsigned char fidclunked; /* set when fid has already been clunked */
+
+	struct v9fs_qid qid;
+	u32 iounit;
+
+	/* readdir stuff */
+	int rdir_fpos;
+	loff_t rdir_pos;
+	struct v9fs_fcall *rdir_fcall;
+
+	/* management stuff */
+	pid_t pid;		/* thread associated with this fid */
+	uid_t uid;		/* user associated with this fid */
+
+	/* private data */
+	struct file *filp;	/* backpointer to File struct for open files */
+	struct v9fs_session_info *v9ses;	/* session info for this FID */
+};
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry);
+struct v9fs_fid *v9fs_fid_get_created(struct dentry *);
+void v9fs_fid_destroy(struct v9fs_fid *fid);
+struct v9fs_fid *v9fs_fid_create(struct dentry *,
+	struct v9fs_session_info *v9ses, int fid, int create);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 00000000000..8835b576f74
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,475 @@
+/*
+ * linux/fs/9p/mux.c
+ *
+ * Protocol Multiplexer
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "transport.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * dprintcond - print condition of session info
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static inline int
+dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
+		req->rcall);
+	return 0;
+}
+
+/**
+ * xread - force read of a certain number of bytes
+ * @v9ses: session info structure
+ * @ptr: pointer to buffer
+ * @sz: number of bytes to read
+ *
+ * Chuck Cranor CS-533 project1
+ */
+
+static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+{
+	int rd = 0;
+	int ret = 0;
+	while (rd < sz) {
+		ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
+		if (ret <= 0) {
+			dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
+			return ret;
+		}
+		rd += ret;
+		ptr += ret;
+	}
+	return (rd);
+}
+
+/**
+ * read_message - read a full 9P2000 fcall packet
+ * @v9ses: session info structure
+ * @rcall: fcall structure to read into
+ * @rcalllen: size of fcall buffer
+ *
+ */
+
+static int
+read_message(struct v9fs_session_info *v9ses,
+	     struct v9fs_fcall *rcall, int rcalllen)
+{
+	unsigned char buf[4];
+	void *data;
+	int size = 0;
+	int res = 0;
+
+	res = xread(v9ses, buf, sizeof(buf));
+	if (res < 0) {
+		dprintk(DEBUG_ERROR,
+			"Reading of count field failed returned: %d\n", res);
+		return res;
+	}
+
+	if (res < 4) {
+		dprintk(DEBUG_ERROR,
+			"Reading of count field failed returned: %d\n", res);
+		return -EIO;
+	}
+
+	size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+	dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+
+	/* adjust for the four bytes of size */
+	size -= 4;
+
+	if (size > v9ses->maxdata) {
+		dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
+		return -E2BIG;
+	}
+
+	data = kmalloc(size, GFP_KERNEL);
+	if (!data) {
+		eprintk(KERN_WARNING, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	res = xread(v9ses, data, size);
+	if (res < size) {
+		dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
+			res);
+		kfree(data);
+		return res;
+	}
+
+	/* we now have an in-memory string that is the reply.
+	 * deserialize it. There is very little to go wrong at this point
+	 * save for v9fs_alloc errors.
+	 */
+	res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
+				     rcall, rcalllen);
+
+	kfree(data);
+
+	if (res < 0)
+		return res;
+
+	return 0;
+}
+
+/**
+ * v9fs_recv - receive an RPC response for a particular tag
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	int ret = 0;
+
+	dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
+	ret = wait_event_interruptible(v9ses->read_wait,
+		       ((v9ses->transport->status != Connected) ||
+			(req->rcall != 0) || (req->err < 0) ||
+			dprintcond(v9ses, req)));
+
+	dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+
+	spin_lock(&v9ses->muxlock);
+	list_del(&req->next);
+	spin_unlock(&v9ses->muxlock);
+
+	if (req->err < 0)
+		return req->err;
+
+	if (v9ses->transport->status == Disconnected)
+		return -ECONNRESET;
+
+	return ret;
+}
+
+/**
+ * v9fs_send - send a 9P request
+ * @v9ses: session info structure
+ * @req: RPC request to send
+ *
+ */
+
+static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	int ret = -1;
+	void *data = NULL;
+	struct v9fs_fcall *tcall = req->tcall;
+
+	data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	tcall->size = 0;	/* enforce size recalculation */
+	ret =
+	    v9fs_serialize_fcall(v9ses, tcall, data,
+				 v9ses->maxdata + V9FS_IOHDRSZ);
+	if (ret < 0)
+		goto free_data;
+
+	spin_lock(&v9ses->muxlock);
+	list_add(&req->next, &v9ses->mux_fcalls);
+	spin_unlock(&v9ses->muxlock);
+
+	dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
+		tcall->size);
+	ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+
+	if (ret != tcall->size) {
+		spin_lock(&v9ses->muxlock);
+		list_del(&req->next);
+		kfree(req->rcall);
+
+		spin_unlock(&v9ses->muxlock);
+		if (ret >= 0)
+			ret = -EREMOTEIO;
+	} else
+		ret = 0;
+
+      free_data:
+	kfree(data);
+	return ret;
+}
+
+/**
+ * v9fs_mux_rpc - send a request, receive a response
+ * @v9ses: session info structure
+ * @tcall: fcall to send
+ * @rcall: buffer to place response into
+ *
+ */
+
+long
+v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
+	     struct v9fs_fcall **rcall)
+{
+	int tid = -1;
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_rpcreq req;
+	int ret = -1;
+
+	if (!v9ses)
+		return -EINVAL;
+
+	if (!v9ses->transport || v9ses->transport->status != Connected)
+		return -EIO;
+
+	if (rcall)
+		*rcall = NULL;
+
+	if (tcall->id != TVERSION) {
+		tid = v9fs_get_idpool(&v9ses->tidpool);
+		if (tid < 0)
+			return -ENOMEM;
+	}
+
+	tcall->tag = tid;
+
+	req.tcall = tcall;
+	req.err = 0;
+	req.rcall = NULL;
+
+	ret = v9fs_send(v9ses, &req);
+
+	if (ret < 0) {
+		if (tcall->id != TVERSION)
+			v9fs_put_idpool(tid, &v9ses->tidpool);
+		dprintk(DEBUG_MUX, "error %d\n", ret);
+		return ret;
+	}
+
+	ret = v9fs_recv(v9ses, &req);
+
+	fcall = req.rcall;
+
+	dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
+	if (ret == -ERESTARTSYS) {
+		if (v9ses->transport->status != Disconnected
+		    && tcall->id != TFLUSH) {
+			unsigned long flags;
+
+			dprintk(DEBUG_MUX, "flushing the tag: %d\n",
+				tcall->tag);
+			clear_thread_flag(TIF_SIGPENDING);
+			v9fs_t_flush(v9ses, tcall->tag);
+			spin_lock_irqsave(&current->sighand->siglock, flags);
+			recalc_sigpending();
+			spin_unlock_irqrestore(&current->sighand->siglock,
+					       flags);
+			dprintk(DEBUG_MUX, "flushing done\n");
+		}
+
+		goto release_req;
+	} else if (ret < 0)
+		goto release_req;
+
+	if (!fcall)
+		ret = -EIO;
+	else {
+		if (fcall->id == RERROR) {
+			ret = v9fs_errstr2errno(fcall->params.rerror.error);
+			if (ret == 0) {	/* string match failed */
+				if (fcall->params.rerror.errno)
+					ret = -(fcall->params.rerror.errno);
+				else
+					ret = -ESERVERFAULT;
+			}
+		} else if (fcall->id != tcall->id + 1) {
+			dprintk(DEBUG_ERROR,
+				"fcall mismatch: expected %d, got %d\n",
+				tcall->id + 1, fcall->id);
+			ret = -EIO;
+		}
+	}
+
+      release_req:
+	if (tcall->id != TVERSION)
+		v9fs_put_idpool(tid, &v9ses->tidpool);
+	if (rcall)
+		*rcall = fcall;
+	else
+		kfree(fcall);
+
+	return ret;
+}
+
+/**
+ * v9fs_mux_cancel_requests - cancels all pending requests
+ *
+ * @v9ses: session info structure
+ * @err: error code to return to the requests
+ */
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+{
+	struct v9fs_rpcreq *rptr;
+	struct v9fs_rpcreq *rreq;
+
+	dprintk(DEBUG_MUX, " %d\n", err);
+	spin_lock(&v9ses->muxlock);
+	list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+		rreq->err = err;
+	}
+	spin_unlock(&v9ses->muxlock);
+	wake_up_all(&v9ses->read_wait);
+}
+
+/**
+ * v9fs_recvproc - kproc to handle demultiplexing responses
+ * @data: session info structure
+ *
+ */
+
+static int v9fs_recvproc(void *data)
+{
+	struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
+	struct v9fs_fcall *rcall = NULL;
+	struct v9fs_rpcreq *rptr;
+	struct v9fs_rpcreq *req;
+	struct v9fs_rpcreq *rreq;
+	int err = 0;
+
+	allow_signal(SIGKILL);
+	set_current_state(TASK_INTERRUPTIBLE);
+	complete(&v9ses->proccmpl);
+	while (!kthread_should_stop() && err >= 0) {
+		req = rptr = rreq = NULL;
+
+		rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+		if (!rcall) {
+			eprintk(KERN_ERR, "no memory for buffers\n");
+			break;
+		}
+
+		err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
+		spin_lock(&v9ses->muxlock);
+		if (err < 0) {
+			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+				rreq->err = err;
+			}
+			if(err != -ERESTARTSYS)
+				eprintk(KERN_ERR,
+					"Transport error while reading message %d\n", err);
+		} else {
+			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+				if (rreq->tcall->tag == rcall->tag) {
+					req = rreq;
+					req->rcall = rcall;
+					break;
+				}
+			}
+		}
+
+		if (req && (req->tcall->id == TFLUSH)) {
+			struct v9fs_rpcreq *treq = NULL;
+			list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
+				if (treq->tcall->tag ==
+				    req->tcall->params.tflush.oldtag) {
+					list_del(&rptr->next);
+					kfree(treq->rcall);
+					break;
+				}
+			}
+		}
+
+		spin_unlock(&v9ses->muxlock);
+
+		if (!req) {
+			if (err >= 0)
+				dprintk(DEBUG_ERROR,
+					"unexpected response: id %d tag %d\n",
+					rcall->id, rcall->tag);
+
+			kfree(rcall);
+		}
+
+		wake_up_all(&v9ses->read_wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	v9ses->transport->close(v9ses->transport);
+
+	/* Inform all pending processes about the failure */
+	wake_up_all(&v9ses->read_wait);
+
+	if (signal_pending(current))
+		complete(&v9ses->proccmpl);
+
+	dprintk(DEBUG_MUX, "recvproc: end\n");
+	v9ses->recvproc = NULL;
+
+	return err >= 0;
+}
+
+/**
+ * v9fs_mux_init - initialize multiplexer (spawn kproc)
+ * @v9ses: session info structure
+ * @dev_name: mount device information (to create unique kproc)
+ *
+ */
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+{
+	char procname[60];
+
+	strncpy(procname, dev_name, sizeof(procname));
+	procname[sizeof(procname) - 1] = 0;
+
+	init_waitqueue_head(&v9ses->read_wait);
+	init_completion(&v9ses->fcread);
+	init_completion(&v9ses->proccmpl);
+	spin_lock_init(&v9ses->muxlock);
+	INIT_LIST_HEAD(&v9ses->mux_fcalls);
+	v9ses->recvproc = NULL;
+	v9ses->curfcall = NULL;
+
+	v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
+					 "v9fs_recvproc %s", procname);
+
+	if (IS_ERR(v9ses->recvproc)) {
+		eprintk(KERN_ERR, "cannot create receiving thread\n");
+		v9fs_session_close(v9ses);
+		return -ECONNABORTED;
+	}
+
+	wake_up_process(v9ses->recvproc);
+	wait_for_completion(&v9ses->proccmpl);
+
+	return 0;
+}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 00000000000..4994cb10bad
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,41 @@
+/*
+ * linux/fs/9p/mux.h
+ *
+ * Multiplexer Definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* structure to manage each RPC transaction */
+
+struct v9fs_rpcreq {
+	struct v9fs_fcall *tcall;
+	struct v9fs_fcall *rcall;
+	int err;	/* error code if response failed */
+
+	/* XXX - could we put scatter/gather buffers here? */
+
+	struct list_head next;
+};
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
+long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
+		  struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 00000000000..63b58ce98ff
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
+/*
+ * linux/fs/9p/trans_fd.c
+ *
+ * File Descriptor Transport Layer
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+struct v9fs_trans_fd {
+	struct file *in_file;
+	struct file *out_file;
+};
+
+/**
+ * v9fs_fd_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+{
+	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+
+	if (!trans || trans->status != Connected || !ts)
+		return -EIO;
+
+	return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+}
+
+/**
+ * v9fs_fd_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+{
+	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+	mm_segment_t oldfs = get_fs();
+	int ret = 0;
+
+	if (!trans || trans->status != Connected || !ts)
+		return -EIO;
+
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+	set_fs(oldfs);
+
+	return ret;
+}
+
+/**
+ * v9fs_fd_init - initialize file descriptor transport
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	struct v9fs_trans_fd *ts = NULL;
+	struct v9fs_transport *trans = v9ses->transport;
+
+	if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
+		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		return -ENOPROTOOPT;
+	}
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+
+	if (!ts)
+		return -ENOMEM;
+
+	ts->in_file = fget( v9ses->rfdno );
+	ts->out_file = fget( v9ses->wfdno );
+
+	if (!ts->in_file || !ts->out_file) {
+		if (ts->in_file)
+			fput(ts->in_file);
+
+		if (ts->out_file)
+			fput(ts->out_file);
+
+		kfree(ts);
+		return -EIO;
+	}
+
+	trans->priv = ts;
+	trans->status = Connected;
+
+	return 0;
+}
+
+
+/**
+ * v9fs_fd_close - shutdown file descriptor
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_fd *ts;
+
+	if (!trans)
+		return;
+
+	trans->status = Disconnected;
+	ts = trans->priv;
+
+	if (!ts)
+		return;
+
+	if (ts->in_file)
+		fput(ts->in_file);
+
+	if (ts->out_file)
+		fput(ts->out_file);
+
+	kfree(ts);
+}
+
+struct v9fs_transport v9fs_trans_fd = {
+	.init = v9fs_fd_init,
+	.write = v9fs_fd_send,
+	.read = v9fs_fd_recv,
+	.close = v9fs_fd_close,
+};
+
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 00000000000..01e26f0013a
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,290 @@
+/*
+ * linux/fs/9p/trans_socket.c
+ *
+ * Socket Transport Layer
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+#define V9FS_PORT 564
+
+struct v9fs_trans_sock {
+	struct socket *s;
+};
+
+/**
+ * v9fs_sock_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
+{
+	struct msghdr msg;
+	struct kvec iov;
+	int result;
+	mm_segment_t oldfs;
+	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+	if (trans->status == Disconnected)
+		return -EREMOTEIO;
+
+	result = -EINVAL;
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+
+	iov.iov_base = v;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	msg.msg_flags = MSG_NOSIGNAL;
+
+	result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+
+	dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
+	set_fs(oldfs);
+
+	if (result <= 0) {
+		if (result != -ERESTARTSYS)
+			trans->status = Disconnected;
+	}
+
+	return result;
+}
+
+/**
+ * v9fs_sock_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
+{
+	struct kvec iov;
+	struct msghdr msg;
+	int result = -1;
+	mm_segment_t oldfs;
+	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+	dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
+	dump_data(v, len);
+
+	down(&trans->writelock);
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+	iov.iov_base = v;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	msg.msg_flags = MSG_NOSIGNAL;
+	result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+	set_fs(oldfs);
+
+	if (result < 0) {
+		if (result != -ERESTARTSYS)
+			trans->status = Disconnected;
+	}
+
+	up(&trans->writelock);
+	return result;
+}
+
+/**
+ * v9fs_tcp_init - initialize TCP socket
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	struct socket *csocket = NULL;
+	struct sockaddr_in sin_server;
+	int rc = 0;
+	struct v9fs_trans_sock *ts = NULL;
+	struct v9fs_transport *trans = v9ses->transport;
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+
+	if (!ts)
+		return -ENOMEM;
+
+	trans->priv = ts;
+	ts->s = NULL;
+
+	if (!addr)
+		return -EINVAL;
+
+	dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
+
+	sin_server.sin_family = AF_INET;
+	sin_server.sin_addr.s_addr = in_aton(addr);
+	sin_server.sin_port = htons(v9ses->port);
+	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+	rc = csocket->ops->connect(csocket,
+				   (struct sockaddr *)&sin_server,
+				   sizeof(struct sockaddr_in), 0);
+	if (rc < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_tcp: problem connecting socket to %s\n",
+			addr);
+		return rc;
+	}
+	csocket->sk->sk_allocation = GFP_NOIO;
+	ts->s = csocket;
+	trans->status = Connected;
+
+	return 0;
+}
+
+/**
+ * v9fs_unix_init - initialize UNIX domain socket
+ * @v9ses: session information
+ * @dev_name: path to named pipe
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
+	       char *data)
+{
+	int rc;
+	struct socket *csocket;
+	struct sockaddr_un sun_server;
+	struct v9fs_transport *trans;
+	struct v9fs_trans_sock *ts;
+
+	rc = 0;
+	csocket = NULL;
+	trans = v9ses->transport;
+
+	if (strlen(dev_name) > UNIX_PATH_MAX) {
+		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+			dev_name);
+		return -ENOMEM;
+	}
+
+	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	trans->priv = ts;
+	ts->s = NULL;
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	sun_server.sun_family = PF_UNIX;
+	strcpy(sun_server.sun_path, dev_name);
+	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+		sizeof(struct sockaddr_un) - 1, 0);	/* -1 *is* important */
+	if (rc < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
+			dev_name, rc);
+		return rc;
+	}
+	csocket->sk->sk_allocation = GFP_NOIO;
+	ts->s = csocket;
+	trans->status = Connected;
+
+	return 0;
+}
+
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_sock_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_sock *ts;
+
+	if (!trans)
+		return;
+
+	ts = trans->priv;
+
+	if ((ts) && (ts->s)) {
+		dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
+		sock_release(ts->s);
+		ts->s = NULL;
+		trans->status = Disconnected;
+		dprintk(DEBUG_TRANS, "socket closed\n");
+	}
+
+	if (ts)
+		kfree(ts);
+
+	trans->priv = NULL;
+}
+
+struct v9fs_transport v9fs_trans_tcp = {
+	.init = v9fs_tcp_init,
+	.write = v9fs_sock_send,
+	.read = v9fs_sock_recv,
+	.close = v9fs_sock_close,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+	.init = v9fs_unix_init,
+	.write = v9fs_sock_send,
+	.read = v9fs_sock_recv,
+	.close = v9fs_sock_close,
+};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 00000000000..9e9cd418efd
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
+/*
+ * linux/fs/9p/transport.h
+ *
+ * Transport Definition
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+enum v9fs_transport_status {
+	Connected,
+	Disconnected,
+	Hung,
+};
+
+struct v9fs_transport {
+	enum v9fs_transport_status status;
+	struct semaphore writelock;
+	struct semaphore readlock;
+	void *priv;
+
+	int (*init) (struct v9fs_session_info *, const char *, char *);
+	int (*write) (struct v9fs_transport *, void *, int);
+	int (*read) (struct v9fs_transport *, void *, int);
+	void (*close) (struct v9fs_transport *);
+};
+
+extern struct v9fs_transport v9fs_trans_tcp;
+extern struct v9fs_transport v9fs_trans_unix;
+extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 00000000000..82303f3bf76
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,458 @@
+/*
+ *  linux/fs/9p/v9fs.c
+ *
+ *  This file contains functions assisting in mapping VFS to 9P2000
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/parser.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+
+/* TODO: sysfs or debugfs interface */
+int v9fs_debug_level = 0;	/* feature-rific global debug level  */
+
+/*
+  * Option Parsing (code inspired by NFS code)
+  *
+  */
+
+enum {
+	/* Options that take integer arguments */
+	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+	Opt_rfdno, Opt_wfdno,
+	/* String options */
+	Opt_name, Opt_remotename,
+	/* Options that take no arguments */
+	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+	/* Error token */
+	Opt_err
+};
+
+static match_table_t tokens = {
+	{Opt_port, "port=%u"},
+	{Opt_msize, "msize=%u"},
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_afid, "afid=%u"},
+	{Opt_rfdno, "rfdno=%u"},
+	{Opt_wfdno, "wfdno=%u"},
+	{Opt_debug, "debug=%u"},
+	{Opt_name, "name=%s"},
+	{Opt_remotename, "aname=%s"},
+	{Opt_unix, "proto=unix"},
+	{Opt_tcp, "proto=tcp"},
+	{Opt_fd, "proto=fd"},
+	{Opt_tcp, "tcp"},
+	{Opt_unix, "unix"},
+	{Opt_fd, "fd"},
+	{Opt_legacy, "noextend"},
+	{Opt_nodevmap, "nodevmap"},
+	{Opt_err, NULL}
+};
+
+/*
+ *  Parse option string.
+ */
+
+/**
+ * v9fs_parse_options - parse mount options into session structure
+ * @options: options string passed from mount
+ * @v9ses: existing v9fs session information
+ *
+ */
+
+static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int ret;
+
+	/* setup defaults */
+	v9ses->port = V9FS_PORT;
+	v9ses->maxdata = 9000;
+	v9ses->proto = PROTO_TCP;
+	v9ses->extended = 1;
+	v9ses->afid = ~0;
+	v9ses->debug = 0;
+	v9ses->rfdno = ~0;
+	v9ses->wfdno = ~0;
+
+	if (!options)
+		return;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+		token = match_token(p, tokens, args);
+		if (token < Opt_name) {
+			if ((ret = match_int(&args[0], &option)) < 0) {
+				dprintk(DEBUG_ERROR,
+					"integer field, but no integer?\n");
+				continue;
+			}
+
+		}
+		switch (token) {
+		case Opt_port:
+			v9ses->port = option;
+			break;
+		case Opt_msize:
+			v9ses->maxdata = option;
+			break;
+		case Opt_uid:
+			v9ses->uid = option;
+			break;
+		case Opt_gid:
+			v9ses->gid = option;
+			break;
+		case Opt_afid:
+			v9ses->afid = option;
+			break;
+		case Opt_rfdno:
+			v9ses->rfdno = option;
+			break;
+		case Opt_wfdno:
+			v9ses->wfdno = option;
+			break;
+		case Opt_debug:
+			v9ses->debug = option;
+			break;
+		case Opt_tcp:
+			v9ses->proto = PROTO_TCP;
+			break;
+		case Opt_unix:
+			v9ses->proto = PROTO_UNIX;
+			break;
+		case Opt_fd:
+			v9ses->proto = PROTO_FD;
+			break;
+		case Opt_name:
+			match_strcpy(v9ses->name, &args[0]);
+			break;
+		case Opt_remotename:
+			match_strcpy(v9ses->remotename, &args[0]);
+			break;
+		case Opt_legacy:
+			v9ses->extended = 0;
+			break;
+		case Opt_nodevmap:
+			v9ses->nodev = 1;
+			break;
+		default:
+			continue;
+		}
+	}
+}
+
+/**
+ * v9fs_inode2v9ses - safely extract v9fs session info from super block
+ * @inode: inode to extract information from
+ *
+ * Paranoid function to extract v9ses information from superblock,
+ * if anything is missing it will report an error.
+ *
+ */
+
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+	return (inode->i_sb->s_fs_info);
+}
+
+/**
+ * v9fs_get_idpool - allocate numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ *            the lock included in struct idr?
+ */
+
+int v9fs_get_idpool(struct v9fs_idpool *p)
+{
+	int i = 0;
+	int error;
+
+retry:
+	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+		return 0;
+
+	if (down_interruptible(&p->lock) == -EINTR) {
+		eprintk(KERN_WARNING, "Interrupted while locking\n");
+		return -1;
+	}
+
+	error = idr_get_new(&p->pool, NULL, &i);
+	up(&p->lock);
+
+	if (error == -EAGAIN)
+		goto retry;
+	else if (error)
+		return -1;
+
+	return i;
+}
+
+/**
+ * v9fs_put_idpool - release numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ *            the lock included in struct idr?
+ */
+
+void v9fs_put_idpool(int id, struct v9fs_idpool *p)
+{
+	if (down_interruptible(&p->lock) == -EINTR) {
+		eprintk(KERN_WARNING, "Interrupted while locking\n");
+		return;
+	}
+	idr_remove(&p->pool, id);
+	up(&p->lock);
+}
+
+/**
+ * v9fs_session_init - initialize session
+ * @v9ses: session information structure
+ * @dev_name: device being mounted
+ * @data: options
+ *
+ */
+
+int
+v9fs_session_init(struct v9fs_session_info *v9ses,
+		  const char *dev_name, char *data)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_transport *trans_proto;
+	int n = 0;
+	int newfid = -1;
+	int retval = -EINVAL;
+
+	v9ses->name = __getname();
+	if (!v9ses->name)
+		return -ENOMEM;
+
+	v9ses->remotename = __getname();
+	if (!v9ses->remotename) {
+		putname(v9ses->name);
+		return -ENOMEM;
+	}
+
+	strcpy(v9ses->name, V9FS_DEFUSER);
+	strcpy(v9ses->remotename, V9FS_DEFANAME);
+
+	v9fs_parse_options(data, v9ses);
+
+	/* set global debug level */
+	v9fs_debug_level = v9ses->debug;
+
+	/* id pools that are session-dependent: FIDs and TIDs */
+	idr_init(&v9ses->fidpool.pool);
+	init_MUTEX(&v9ses->fidpool.lock);
+	idr_init(&v9ses->tidpool.pool);
+	init_MUTEX(&v9ses->tidpool.lock);
+
+
+	switch (v9ses->proto) {
+	case PROTO_TCP:
+		trans_proto = &v9fs_trans_tcp;
+		break;
+	case PROTO_UNIX:
+		trans_proto = &v9fs_trans_unix;
+		*v9ses->remotename = 0;
+		break;
+	case PROTO_FD:
+		trans_proto = &v9fs_trans_fd;
+		*v9ses->remotename = 0;
+		break;
+	default:
+		printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
+		retval = -ENOPROTOOPT;
+		goto SessCleanUp;
+	};
+
+	v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL);
+	if (!v9ses->transport) {
+		retval = -ENOMEM;
+		goto SessCleanUp;
+	}
+
+	memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport));
+
+	if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
+		eprintk(KERN_ERR, "problem initializing transport\n");
+		goto SessCleanUp;
+	}
+
+	v9ses->inprogress = 0;
+	v9ses->shutdown = 0;
+	v9ses->session_hung = 0;
+
+	if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+		dprintk(DEBUG_ERROR, "problem initializing mux\n");
+		goto SessCleanUp;
+	}
+
+	if (v9ses->afid == ~0) {
+		if (v9ses->extended)
+			retval =
+			    v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
+					   &fcall);
+		else
+			retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
+						&fcall);
+
+		if (retval < 0) {
+			dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
+			goto FreeFcall;
+		}
+
+		/* Really should check for 9P1 and report error */
+		if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+			dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
+			v9ses->extended = 1;
+		} else {
+			dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
+			v9ses->extended = 0;
+		}
+
+		n = fcall->params.rversion.msize;
+		kfree(fcall);
+
+		if (n < v9ses->maxdata)
+			v9ses->maxdata = n;
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "couldn't allocate FID\n");
+		retval = -ENOMEM;
+		goto SessCleanUp;
+	}
+	/* it is a little bit ugly, but we have to prevent newfid */
+	/* being the same as afid, so if it is, get a new fid     */
+	if (v9ses->afid != ~0 && newfid == v9ses->afid) {
+		newfid = v9fs_get_idpool(&v9ses->fidpool);
+		if (newfid < 0) {
+			eprintk(KERN_WARNING, "couldn't allocate FID\n");
+			retval = -ENOMEM;
+			goto SessCleanUp;
+		}
+	}
+
+	if ((retval =
+	     v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
+			   v9ses->afid, NULL))
+	    < 0) {
+		dprintk(DEBUG_ERROR, "cannot attach\n");
+		goto SessCleanUp;
+	}
+
+	if (v9ses->afid != ~0) {
+		if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+			dprintk(DEBUG_ERROR, "clunk failed\n");
+	}
+
+	return newfid;
+
+      FreeFcall:
+	kfree(fcall);
+
+      SessCleanUp:
+	v9fs_session_close(v9ses);
+	return retval;
+}
+
+/**
+ * v9fs_session_close - shutdown a session
+ * @v9ses: session information structure
+ *
+ */
+
+void v9fs_session_close(struct v9fs_session_info *v9ses)
+{
+	if (v9ses->recvproc) {
+		send_sig(SIGKILL, v9ses->recvproc, 1);
+		wait_for_completion(&v9ses->proccmpl);
+	}
+
+	if (v9ses->transport)
+		v9ses->transport->close(v9ses->transport);
+
+	putname(v9ses->name);
+	putname(v9ses->remotename);
+}
+
+/**
+ * v9fs_session_cancel - mark transport as disconnected
+ * 	and cancel all pending requests.
+ */
+void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+	v9ses->transport->status = Disconnected;
+	v9fs_mux_cancel_requests(v9ses, -EIO);
+}
+
+extern int v9fs_error_init(void);
+
+/**
+ * v9fs_init - Initialize module
+ *
+ */
+
+static int __init init_v9fs(void)
+{
+	v9fs_error_init();
+
+	printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
+
+	return register_filesystem(&v9fs_fs_type);
+}
+
+/**
+ * v9fs_init - shutdown module
+ *
+ */
+
+static void __exit exit_v9fs(void)
+{
+	unregister_filesystem(&v9fs_fs_type);
+}
+
+module_init(init_v9fs)
+module_exit(exit_v9fs)
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
+MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 00000000000..45dcef42bdd
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,103 @@
+/*
+ * V9FS definitions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/*
+  * Idpool structure provides lock and id management
+  *
+  */
+
+struct v9fs_idpool {
+	struct semaphore lock;
+	struct idr pool;
+};
+
+/*
+  * Session structure provides information for an opened session
+  *
+  */
+
+struct v9fs_session_info {
+	/* options */
+	unsigned int maxdata;
+	unsigned char extended;	/* set to 1 if we are using UNIX extensions */
+	unsigned char nodev;	/* set to 1 if no disable device mapping */
+	unsigned short port;	/* port to connect to */
+	unsigned short debug;	/* debug level */
+	unsigned short proto;	/* protocol to use */
+	unsigned int afid;	/* authentication fid */
+	unsigned int rfdno;	/* read file descriptor number */
+	unsigned int wfdno;	/* write file descriptor number */
+
+
+	char *name;		/* user name to mount as */
+	char *remotename;	/* name of remote hierarchy being mounted */
+	unsigned int uid;	/* default uid/muid for legacy support */
+	unsigned int gid;	/* default gid for legacy support */
+
+	/* book keeping */
+	struct v9fs_idpool fidpool;	/* The FID pool for file descriptors */
+	struct v9fs_idpool tidpool;	/* The TID pool for transactions ids */
+
+	/* transport information */
+	struct v9fs_transport *transport;
+
+	int inprogress;		/* session in progress => true */
+	int shutdown;		/* session shutting down. no more attaches. */
+	unsigned char session_hung;
+
+	/* mux private data */
+	struct v9fs_fcall *curfcall;
+	wait_queue_head_t read_wait;
+	struct completion fcread;
+	struct completion proccmpl;
+	struct task_struct *recvproc;
+
+	spinlock_t muxlock;
+	struct list_head mux_fcalls;
+};
+
+/* possible values of ->proto */
+enum {
+	PROTO_TCP,
+	PROTO_UNIX,
+	PROTO_FD,
+};
+
+int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+void v9fs_session_close(struct v9fs_session_info *v9ses);
+int v9fs_get_idpool(struct v9fs_idpool *p);
+void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+
+#define V9FS_MAGIC 0x01021997
+
+/* other default globals */
+#define V9FS_PORT		564
+#define V9FS_DEFUSER	"nobody"
+#define V9FS_DEFANAME	""
+
+/* inital pool sizes for fids and tags */
+#define V9FS_START_FIDS 8192
+#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 00000000000..2f2cea7ee3e
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
+/*
+ * V9FS VFS extensions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* plan9 semantics are that created files are implicitly opened.
+ * But linux semantics are that you call create, then open.
+ * the plan9 approach is superior as it provides an atomic
+ * open.
+ * we track the create fid here. When the file is opened, if fidopen is
+ * non-zero, we use the fid and can skip some steps.
+ * there may be a better way to do this, but I don't know it.
+ * one BAD way is to clunk the fid on create, then open it again:
+ * you lose the atomicity of file open
+ */
+
+/* special case:
+ * unlink calls remove, which is an implicit clunk. So we have to track
+ * that kind of thing so that we don't try to clunk a dead fid.
+ */
+
+extern struct file_system_type v9fs_fs_type;
+extern struct file_operations v9fs_file_operations;
+extern struct file_operations v9fs_dir_operations;
+extern struct dentry_operations v9fs_dentry_operations;
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+ino_t v9fs_qid2ino(struct v9fs_qid *qid);
+void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
+		       struct super_block *);
+int v9fs_dir_release(struct inode *inode, struct file *filp);
+int v9fs_file_open(struct inode *inode, struct file *file);
+void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 00000000000..a6aa947de0f
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
+/*
+ *  linux/fs/9p/vfs_dentry.c
+ *
+ * This file contians vfs dentry ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_dentry_validate - VFS dcache hook to validate cache
+ * @dentry:  dentry that is being validated
+ * @nd: path data
+ *
+ * dcache really shouldn't be used for 9P2000 as at all due to
+ * potential attached semantics to directory traversal (walk).
+ *
+ * FUTURE: look into how to use dcache to allow multi-stage
+ * walks in Plan 9 & potential for better dcache operation which
+ * would remain valid for Plan 9 semantics.  Older versions
+ * had validation via stat for those interested.  However, since
+ * stat has the same approximate overhead as walk there really
+ * is no difference.  The only improvement would be from a
+ * time-decay cache like NFS has and that undermines the
+ * synchronous nature of 9P2000.
+ *
+ */
+
+static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *dc = current->fs->pwd;
+
+	dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
+	if (v9fs_fid_lookup(dentry)) {
+		dprintk(DEBUG_VFS, "VALID\n");
+		return 1;
+	}
+
+	while (dc != NULL) {
+		if (dc == dentry) {
+			dprintk(DEBUG_VFS, "VALID\n");
+			return 1;
+		}
+		if (dc == dc->d_parent)
+			break;
+
+		dc = dc->d_parent;
+	}
+
+	dprintk(DEBUG_VFS, "INVALID\n");
+	return 0;
+}
+
+/**
+ * v9fs_dentry_release - called when dentry is going to be freed
+ * @dentry:  dentry that is being release
+ *
+ */
+
+void v9fs_dentry_release(struct dentry *dentry)
+{
+	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+	if (dentry->d_fsdata != NULL) {
+		struct list_head *fid_list = dentry->d_fsdata;
+		struct v9fs_fid *temp = NULL;
+		struct v9fs_fid *current_fid = NULL;
+		struct v9fs_fcall *fcall = NULL;
+
+		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+			if (v9fs_t_clunk
+			    (current_fid->v9ses, current_fid->fid, &fcall))
+				dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+					FCALL_ERROR(fcall));
+
+			v9fs_put_idpool(current_fid->fid,
+					&current_fid->v9ses->fidpool);
+
+			kfree(fcall);
+			v9fs_fid_destroy(current_fid);
+		}
+
+		kfree(dentry->d_fsdata);	/* free the list_head */
+	}
+}
+
+struct dentry_operations v9fs_dentry_operations = {
+	.d_revalidate = v9fs_dentry_validate,
+	.d_release = v9fs_dentry_release,
+};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 00000000000..57a43b8feef
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,223 @@
+/*
+ * linux/fs/9p/vfs_dir.c
+ *
+ * This file contains vfs directory ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * dt_type - return file type
+ * @mistat: mistat structure
+ *
+ */
+
+static inline int dt_type(struct v9fs_stat *mistat)
+{
+	unsigned long perm = mistat->mode;
+	int rettype = DT_REG;
+
+	if (perm & V9FS_DMDIR)
+		rettype = DT_DIR;
+	if (perm & V9FS_DMSYMLINK)
+		rettype = DT_LNK;
+
+	return rettype;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filep: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *file = filp->private_data;
+	unsigned int i, n;
+	int fid = -1;
+	int ret = 0;
+	struct v9fs_stat *mi = NULL;
+	int over = 0;
+
+	dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
+
+	fid = file->fid;
+
+	mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	if (!mi)
+		return -ENOMEM;
+
+	if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
+		kfree(file->rdir_fcall);
+		file->rdir_fcall = NULL;
+	}
+
+	if (file->rdir_fcall) {
+		n = file->rdir_fcall->params.rread.count;
+		i = file->rdir_fpos;
+		while (i < n) {
+			int s = v9fs_deserialize_stat(v9ses,
+				  file->rdir_fcall->params.rread.data + i,
+			          n - i, mi, v9ses->maxdata);
+
+			if (s == 0) {
+				dprintk(DEBUG_ERROR,
+					"error while deserializing mistat\n");
+				ret = -EIO;
+				goto FreeStructs;
+			}
+
+			over = filldir(dirent, mi->name, strlen(mi->name),
+				    filp->f_pos, v9fs_qid2ino(&mi->qid),
+				    dt_type(mi));
+
+			if (over) {
+				file->rdir_fpos = i;
+				file->rdir_pos = filp->f_pos;
+				break;
+			}
+
+			i += s;
+			filp->f_pos += s;
+		}
+
+		if (!over) {
+			kfree(file->rdir_fcall);
+			file->rdir_fcall = NULL;
+		}
+	}
+
+	while (!over) {
+		ret = v9fs_t_read(v9ses, fid, filp->f_pos,
+					    v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+		if (ret < 0) {
+			dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
+				ret, fcall);
+			goto FreeStructs;
+		} else if (ret == 0)
+			break;
+
+		n = ret;
+		i = 0;
+		while (i < n) {
+			int s = v9fs_deserialize_stat(v9ses,
+			          fcall->params.rread.data + i, n - i, mi,
+			          v9ses->maxdata);
+
+			if (s == 0) {
+				dprintk(DEBUG_ERROR,
+					"error while deserializing mistat\n");
+				return -EIO;
+			}
+
+			over = filldir(dirent, mi->name, strlen(mi->name),
+				    filp->f_pos, v9fs_qid2ino(&mi->qid),
+				    dt_type(mi));
+
+			if (over) {
+				file->rdir_fcall = fcall;
+				file->rdir_fpos = i;
+				file->rdir_pos = filp->f_pos;
+				fcall = NULL;
+				break;
+			}
+
+			i += s;
+			filp->f_pos += s;
+		}
+
+		kfree(fcall);
+	}
+
+      FreeStructs:
+	kfree(fcall);
+	kfree(mi);
+	return ret;
+}
+
+/**
+ * v9fs_dir_release - close a directory
+ * @inode: inode of the directory
+ * @filp: file pointer to a directory
+ *
+ */
+
+int v9fs_dir_release(struct inode *inode, struct file *filp)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *fid = filp->private_data;
+	int fidnum = -1;
+
+	dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
+		fid->fid);
+	fidnum = fid->fid;
+
+	filemap_fdatawrite(inode->i_mapping);
+	filemap_fdatawait(inode->i_mapping);
+
+	if (fidnum >= 0) {
+		dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
+			fid->fid);
+
+		if (v9fs_t_clunk(v9ses, fidnum, NULL))
+			dprintk(DEBUG_ERROR, "clunk failed\n");
+
+		v9fs_put_idpool(fid->fid, &v9ses->fidpool);
+
+		kfree(fid->rdir_fcall);
+		kfree(fid);
+
+		filp->private_data = NULL;
+	}
+
+	d_drop(filp->f_dentry);
+	return 0;
+}
+
+struct file_operations v9fs_dir_operations = {
+	.read = generic_read_dir,
+	.readdir = v9fs_dir_readdir,
+	.open = v9fs_file_open,
+	.release = v9fs_dir_release,
+};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 00000000000..bbc3cc63854
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,321 @@
+/*
+ *  linux/fs/9p/vfs_file.c
+ *
+ * This file contians vfs file ops for 9P2000.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+
+/**
+ * v9fs_file_open - open a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ */
+
+int v9fs_file_open(struct inode *inode, struct file *file)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9fid, *fid;
+	struct v9fs_fcall *fcall = NULL;
+	int open_mode = 0;
+	unsigned int iounit = 0;
+	int newfid = -1;
+	long result = -1;
+
+	dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+
+	v9fid = v9fs_fid_get_created(file->f_dentry);
+	if (!v9fid)
+		v9fid = v9fs_fid_lookup(file->f_dentry);
+
+	if (!v9fid) {
+		dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
+		return -EBADF;
+	}
+
+	if (!v9fid->fidcreate) {
+		fid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+		if (fid == NULL) {
+			dprintk(DEBUG_ERROR, "Out of Memory\n");
+			return -ENOMEM;
+		}
+
+		fid->fidopen = 0;
+		fid->fidcreate = 0;
+		fid->fidclunked = 0;
+		fid->iounit = 0;
+		fid->v9ses = v9ses;
+
+		newfid = v9fs_get_idpool(&v9ses->fidpool);
+		if (newfid < 0) {
+			eprintk(KERN_WARNING, "newfid fails!\n");
+			return -ENOSPC;
+		}
+
+		result =
+		    v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, NULL);
+
+		if (result < 0) {
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+			return -EBADF;
+		}
+
+		fid->fid = newfid;
+		v9fid = fid;
+		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+		/* translate open mode appropriately */
+		open_mode = file->f_flags & 0x3;
+
+		if (file->f_flags & O_EXCL)
+			open_mode |= V9FS_OEXCL;
+
+		if (v9ses->extended) {
+			if (file->f_flags & O_TRUNC)
+				open_mode |= V9FS_OTRUNC;
+
+			if (file->f_flags & O_APPEND)
+				open_mode |= V9FS_OAPPEND;
+		}
+
+		result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
+		if (result < 0) {
+			dprintk(DEBUG_ERROR,
+				"open failed, open_mode 0x%x: %s\n", open_mode,
+				FCALL_ERROR(fcall));
+			kfree(fcall);
+			return result;
+		}
+
+		iounit = fcall->params.ropen.iounit;
+		kfree(fcall);
+	} else {
+		/* create case */
+		newfid = v9fid->fid;
+		iounit = v9fid->iounit;
+		v9fid->fidcreate = 0;
+	}
+
+	file->private_data = v9fid;
+
+	v9fid->rdir_pos = 0;
+	v9fid->rdir_fcall = NULL;
+	v9fid->fidopen = 1;
+	v9fid->filp = file;
+	v9fid->iounit = iounit;
+
+	return 0;
+}
+
+/**
+ * v9fs_file_lock - lock a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ * XXX - this looks like a local only lock, we should extend into 9P
+ *       by using open exclusive
+ */
+
+static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	int res = 0;
+	struct inode *inode = filp->f_dentry->d_inode;
+
+	dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+
+	/* No mandatory locks */
+	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_fdatawrite(inode->i_mapping);
+		filemap_fdatawait(inode->i_mapping);
+		invalidate_inode_pages(&inode->i_data);
+	}
+
+	return res;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+static ssize_t
+v9fs_file_read(struct file *filp, char __user * data, size_t count,
+	       loff_t * offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9f = filp->private_data;
+	struct v9fs_fcall *fcall = NULL;
+	int fid = v9f->fid;
+	int rsize = 0;
+	int result = 0;
+	int total = 0;
+	int n;
+
+	dprintk(DEBUG_VFS, "\n");
+
+	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+	if (v9f->iounit != 0 && rsize > v9f->iounit)
+		rsize = v9f->iounit;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+
+		if (result < 0) {
+			printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
+			       result);
+
+			kfree(fcall);
+			return total;
+		} else
+			*offset += result;
+
+		n = copy_to_user(data, fcall->params.rread.data, result);
+		if (n) {
+			dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n);
+			kfree(fcall);
+			return -EFAULT;
+		}
+
+		count -= result;
+		data += result;
+		total += result;
+
+		kfree(fcall);
+
+		if (result < rsize)
+			break;
+	} while (count);
+
+	return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+		size_t count, loff_t * offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9fid = filp->private_data;
+	struct v9fs_fcall *fcall;
+	int fid = v9fid->fid;
+	int result = -EIO;
+	int rsize = 0;
+	int total = 0;
+	char *buf;
+
+	dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
+		(int)*offset);
+	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+	if (v9fid->iounit != 0 && rsize > v9fid->iounit)
+		rsize = v9fid->iounit;
+
+	buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result = copy_from_user(buf, data, rsize);
+		if (result) {
+			dprintk(DEBUG_ERROR, "Problem copying from user\n");
+			kfree(buf);
+			return -EFAULT;
+		}
+
+		dump_data(buf, rsize);
+		result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
+		if (result < 0) {
+			eprintk(KERN_ERR, "error while writing: %s(%d)\n",
+				FCALL_ERROR(fcall), result);
+			kfree(fcall);
+			kfree(buf);
+			return result;
+		} else
+			*offset += result;
+
+		kfree(fcall);
+		fcall = NULL;
+
+		if (result != rsize) {
+			eprintk(KERN_ERR,
+				"short write: v9fs_t_write returned %d\n",
+				result);
+			break;
+		}
+
+		count -= result;
+		data += result;
+		total += result;
+	} while (count);
+
+	kfree(buf);
+	return total;
+}
+
+struct file_operations v9fs_file_operations = {
+	.llseek = generic_file_llseek,
+	.read = v9fs_file_read,
+	.write = v9fs_file_write,
+	.open = v9fs_file_open,
+	.release = v9fs_dir_release,
+	.lock = v9fs_file_lock,
+};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 00000000000..2b696ae6655
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1365 @@
+/*
+ *  linux/fs/9p/vfs_inode.c
+ *
+ * This file contains vfs inode ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static struct inode_operations v9fs_dir_inode_operations;
+static struct inode_operations v9fs_dir_inode_operations_ext;
+static struct inode_operations v9fs_file_inode_operations;
+static struct inode_operations v9fs_symlink_inode_operations;
+
+/**
+ * unixmode2p9mode - convert unix mode bits to plan 9
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+{
+	int res;
+	res = mode & 0777;
+	if (S_ISDIR(mode))
+		res |= V9FS_DMDIR;
+	if (v9ses->extended) {
+		if (S_ISLNK(mode))
+			res |= V9FS_DMSYMLINK;
+		if (v9ses->nodev == 0) {
+			if (S_ISSOCK(mode))
+				res |= V9FS_DMSOCKET;
+			if (S_ISFIFO(mode))
+				res |= V9FS_DMNAMEDPIPE;
+			if (S_ISBLK(mode))
+				res |= V9FS_DMDEVICE;
+			if (S_ISCHR(mode))
+				res |= V9FS_DMDEVICE;
+		}
+
+		if ((mode & S_ISUID) == S_ISUID)
+			res |= V9FS_DMSETUID;
+		if ((mode & S_ISGID) == S_ISGID)
+			res |= V9FS_DMSETGID;
+		if ((mode & V9FS_DMLINK))
+			res |= V9FS_DMLINK;
+	}
+
+	return res;
+}
+
+/**
+ * p9mode2unixmode- convert plan9 mode bits to unix mode bits
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+{
+	int res;
+
+	res = mode & 0777;
+
+	if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+		res |= S_IFDIR;
+	else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+		res |= S_IFLNK;
+	else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFSOCK;
+	else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFIFO;
+	else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFBLK;
+	else
+		res |= S_IFREG;
+
+	if (v9ses->extended) {
+		if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+			res |= S_ISUID;
+
+		if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+			res |= S_ISGID;
+	}
+
+	return res;
+}
+
+/**
+ * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * @v9ses: 9P session info (for determining extended mode)
+ * @mistat: structure to initialize
+ *
+ */
+
+static void
+v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+{
+	mistat->type = ~0;
+	mistat->dev = ~0;
+	mistat->qid.type = ~0;
+	mistat->qid.version = ~0;
+	*((long long *)&mistat->qid.path) = ~0;
+	mistat->mode = ~0;
+	mistat->atime = ~0;
+	mistat->mtime = ~0;
+	mistat->length = ~0;
+	mistat->name = mistat->data;
+	mistat->uid = mistat->data;
+	mistat->gid = mistat->data;
+	mistat->muid = mistat->data;
+	if (v9ses->extended) {
+		mistat->n_uid = ~0;
+		mistat->n_gid = ~0;
+		mistat->n_muid = ~0;
+		mistat->extension = mistat->data;
+	}
+	*mistat->data = 0;
+}
+
+/**
+ * v9fs_mistat2unix - convert mistat to unix stat
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @buf: unix metadata (stat) structure to populate
+ * @sb: superblock
+ *
+ */
+
+static void
+v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
+		 struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
+
+	buf->st_nlink = 1;
+
+	buf->st_atime = mistat->atime;
+	buf->st_mtime = mistat->mtime;
+	buf->st_ctime = mistat->mtime;
+
+	buf->st_uid = (unsigned short)-1;
+	buf->st_gid = (unsigned short)-1;
+
+	if (v9ses && v9ses->extended) {
+		/* TODO: string to uid mapping via user-space daemon */
+		if (mistat->n_uid != -1)
+			sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
+
+		if (mistat->n_gid != -1)
+			sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
+	}
+
+	if (buf->st_uid == (unsigned short)-1)
+		buf->st_uid = v9ses->uid;
+	if (buf->st_gid == (unsigned short)-1)
+		buf->st_gid = v9ses->gid;
+
+	buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
+	if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
+		char type = 0;
+		int major = -1;
+		int minor = -1;
+		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+		switch (type) {
+		case 'c':
+			buf->st_mode &= ~S_IFBLK;
+			buf->st_mode |= S_IFCHR;
+			break;
+		case 'b':
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+				type, mistat->extension);
+		};
+		buf->st_rdev = MKDEV(major, minor);
+	} else
+		buf->st_rdev = 0;
+
+	buf->st_size = mistat->length;
+
+	buf->st_blksize = sb->s_blocksize;
+	buf->st_blocks =
+	    (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = NULL;
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+	dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+	inode = new_inode(sb);
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blksize = sb->s_blocksize;
+		inode->i_blocks = 0;
+		inode->i_rdev = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+		switch (mode & S_IFMT) {
+		case S_IFIFO:
+		case S_IFBLK:
+		case S_IFCHR:
+		case S_IFSOCK:
+			if(!v9ses->extended) {
+				dprintk(DEBUG_ERROR, "special files without extended mode\n");
+				return ERR_PTR(-EINVAL);
+			}
+			init_special_inode(inode, inode->i_mode,
+					   inode->i_rdev);
+			break;
+		case S_IFREG:
+			inode->i_op = &v9fs_file_inode_operations;
+			inode->i_fop = &v9fs_file_operations;
+			break;
+		case S_IFLNK:
+			if(!v9ses->extended) {
+				dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+				return ERR_PTR(-EINVAL);
+			}
+			inode->i_op = &v9fs_symlink_inode_operations;
+			break;
+		case S_IFDIR:
+			inode->i_nlink++;
+			if(v9ses->extended)
+				inode->i_op = &v9fs_dir_inode_operations_ext;
+			else
+				inode->i_op = &v9fs_dir_inode_operations;
+			inode->i_fop = &v9fs_dir_operations;
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+				mode, mode & S_IFMT);
+			return ERR_PTR(-EINVAL);
+		}
+	} else {
+		eprintk(KERN_WARNING, "Problem allocating inode\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	return inode;
+}
+
+/**
+ * v9fs_create - helper function to create files and directories
+ * @dir: directory inode file is being created in
+ * @file_dentry: dentry file is being created in
+ * @perm: permissions file is being created with
+ * @open_mode: resulting open mode for file
+ *
+ */
+
+static int
+v9fs_create(struct inode *dir,
+	    struct dentry *file_dentry,
+	    unsigned int perm, unsigned int open_mode)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct super_block *sb = dir->i_sb;
+	struct v9fs_fid *dirfid =
+	    v9fs_fid_lookup(file_dentry->d_parent);
+	struct v9fs_fid *fid = NULL;
+	struct inode *file_inode = NULL;
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_qid qid;
+	struct stat newstat;
+	int dirfidnum = -1;
+	long newfid = -1;
+	int result = 0;
+	unsigned int iounit = 0;
+	int wfidno = -1;
+
+	perm = unixmode2p9mode(v9ses, perm);
+
+	dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
+		file_dentry, perm, open_mode);
+
+	if (!dirfid)
+		return -EBADF;
+
+	dirfidnum = dirfid->fid;
+	if (dirfidnum < 0) {
+		dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
+			dir->i_ino);
+		return -EBADF;
+	}
+
+	if (file_dentry->d_inode) {
+		dprintk(DEBUG_ERROR,
+			"Odd. There is an inode for dir %lu, name :%s:\n",
+			dir->i_ino, file_dentry->d_name.name);
+		return -EEXIST;
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "no free fids available\n");
+		return -ENOSPC;
+	}
+
+	result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		newfid = -1;
+		goto CleanUpFid;
+	}
+
+	kfree(fcall);
+
+	result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
+			       perm, open_mode, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
+			FCALL_ERROR(fcall), result);
+
+		goto CleanUpFid;
+	}
+
+	iounit = fcall->params.rcreate.iounit;
+	qid = fcall->params.rcreate.qid;
+	kfree(fcall);
+
+	fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
+	dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
+	if (!fid) {
+		result = -ENOMEM;
+		goto CleanUpFid;
+	}
+
+	fid->qid = qid;
+	fid->iounit = iounit;
+
+	/* walk to the newly created file and put the fid in the dentry */
+	wfidno = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "no free fids available\n");
+		return -ENOSPC;
+	}
+
+	result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
+		(char *) file_dentry->d_name.name, NULL);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		wfidno = -1;
+		goto CleanUpFid;
+	}
+
+	if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall)) {
+			v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		}
+
+		goto CleanUpFid;
+	}
+
+	if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
+	    (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
+	    (perm & V9FS_DMDEVICE))
+		return 0;
+
+	result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
+			result);
+		goto CleanUpFid;
+	}
+
+	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+
+	file_inode = v9fs_get_inode(sb, newstat.st_mode);
+	if ((!file_inode) || IS_ERR(file_inode)) {
+		dprintk(DEBUG_ERROR, "create inode failed\n");
+		result = -EBADF;
+		goto CleanUpFid;
+	}
+
+	v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+	kfree(fcall);
+	d_instantiate(file_dentry, file_inode);
+
+	if (perm & V9FS_DMDIR) {
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+		else
+			dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
+				FCALL_ERROR(fcall));
+		kfree(fcall);
+		fid->fidopen = 0;
+		fid->fidcreate = 0;
+		d_drop(file_dentry);
+	}
+
+	return 0;
+
+      CleanUpFid:
+	kfree(fcall);
+
+	if (newfid >= 0) {
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+		else
+			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+				FCALL_ERROR(fcall));
+
+		kfree(fcall);
+	}
+	if (wfidno >= 0) {
+		if (!v9fs_t_clunk(v9ses, wfidno, &fcall))
+			v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		else
+			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+				FCALL_ERROR(fcall));
+
+		kfree(fcall);
+	}
+	return result;
+}
+
+/**
+ * v9fs_remove - helper function to remove files and directories
+ * @dir: directory inode that is being deleted
+ * @file:  dentry that is being deleted
+ * @rmdir: removing a directory
+ *
+ */
+
+static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct super_block *sb = NULL;
+	struct v9fs_session_info *v9ses = NULL;
+	struct v9fs_fid *v9fid = NULL;
+	struct inode *file_inode = NULL;
+	int fid = -1;
+	int result = 0;
+
+	dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+		rmdir);
+
+	file_inode = file->d_inode;
+	sb = file_inode->i_sb;
+	v9ses = v9fs_inode2v9ses(file_inode);
+	v9fid = v9fs_fid_lookup(file);
+
+	if (!v9fid) {
+		dprintk(DEBUG_ERROR,
+			"no v9fs_fid\n");
+		return -EBADF;
+	}
+
+	fid = v9fid->fid;
+	if (fid < 0) {
+		dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
+			file_inode->i_ino);
+		return -EBADF;
+	}
+
+	result = v9fs_t_remove(v9ses, fid, &fcall);
+	if (result < 0)
+		dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
+			FCALL_ERROR(fcall), result);
+	else {
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+		v9fs_fid_destroy(v9fid);
+	}
+
+	kfree(fcall);
+	return result;
+}
+
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being deleted
+ * @dentry:  dentry that is being deleted
+ * @perm: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
+		struct nameidata *nd)
+{
+	return v9fs_create(inode, dentry, perm, O_RDWR);
+}
+
+/**
+ * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
+ * @inode:  inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
+{
+	return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
+}
+
+/**
+ * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
+ * @dir:  inode that is being walked from
+ * @dentry: dentry that is being walked to?
+ * @nameidata: path data
+ *
+ */
+
+static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+				      struct nameidata *nameidata)
+{
+	struct super_block *sb;
+	struct v9fs_session_info *v9ses;
+	struct v9fs_fid *dirfid;
+	struct v9fs_fid *fid;
+	struct inode *inode;
+	struct v9fs_fcall *fcall = NULL;
+	struct stat newstat;
+	int dirfidnum = -1;
+	int newfid = -1;
+	int result = 0;
+
+	dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+		dir, dentry->d_iname, dentry, nameidata);
+
+	sb = dir->i_sb;
+	v9ses = v9fs_inode2v9ses(dir);
+	dirfid = v9fs_fid_lookup(dentry->d_parent);
+
+	if (!dirfid) {
+		dprintk(DEBUG_ERROR, "no dirfid\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	dirfidnum = dirfid->fid;
+
+	if (dirfidnum < 0) {
+		dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
+			dir, dir->i_ino);
+		return ERR_PTR(-EBADF);
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "newfid fails!\n");
+		return ERR_PTR(-ENOSPC);
+	}
+
+	result =
+	    v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
+			NULL);
+	if (result < 0) {
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		if (result == -ENOENT) {
+			d_add(dentry, NULL);
+			dprintk(DEBUG_VFS,
+				"Return negative dentry %p count %d\n",
+				dentry, atomic_read(&dentry->d_count));
+			return NULL;
+		}
+		dprintk(DEBUG_ERROR, "walk error:%d\n", result);
+		goto FreeFcall;
+	}
+
+	result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		goto FreeFcall;
+	}
+
+	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+	inode = v9fs_get_inode(sb, newstat.st_mode);
+
+	if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
+		eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
+			PTR_ERR(inode));
+
+		result = -ENOSPC;
+		goto FreeFcall;
+	}
+
+	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+
+	fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
+	if (fid == NULL) {
+		dprintk(DEBUG_ERROR, "couldn't insert\n");
+		result = -ENOMEM;
+		goto FreeFcall;
+	}
+
+	fid->qid = fcall->params.rstat.stat->qid;
+
+	dentry->d_op = &v9fs_dentry_operations;
+	v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+
+	d_add(dentry, inode);
+	kfree(fcall);
+
+	return NULL;
+
+      FreeFcall:
+	kfree(fcall);
+	return ERR_PTR(result);
+}
+
+/**
+ * v9fs_vfs_unlink - VFS unlink hook to delete an inode
+ * @i:  inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+{
+	return v9fs_remove(i, d, 0);
+}
+
+/**
+ * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
+ * @i:  inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+{
+	return v9fs_remove(i, d, 1);
+}
+
+/**
+ * v9fs_vfs_rename - VFS hook to rename an inode
+ * @old_dir:  old dir inode
+ * @old_dentry: old dentry
+ * @new_dir: new dir inode
+ * @new_dentry: new dentry
+ *
+ */
+
+static int
+v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
+	struct v9fs_fid *olddirfid =
+	    v9fs_fid_lookup(old_dentry->d_parent);
+	struct v9fs_fid *newdirfid =
+	    v9fs_fid_lookup(new_dentry->d_parent);
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_fcall *fcall = NULL;
+	int fid = -1;
+	int olddirfidnum = -1;
+	int newdirfidnum = -1;
+	int retval = 0;
+
+	dprintk(DEBUG_VFS, "\n");
+
+	if (!mistat)
+		return -ENOMEM;
+
+	if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
+		dprintk(DEBUG_ERROR, "problem with arguments\n");
+		return -EBADF;
+	}
+
+	/* 9P can only handle file rename in the same directory */
+	if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
+		dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+		retval = -EPERM;
+		goto FreeFcallnBail;
+	}
+
+	fid = oldfid->fid;
+	olddirfidnum = olddirfid->fid;
+	newdirfidnum = newdirfid->fid;
+
+	if (fid < 0) {
+		dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
+			old_inode->i_ino);
+		retval = -EBADF;
+		goto FreeFcallnBail;
+	}
+
+	v9fs_blank_mistat(v9ses, mistat);
+
+	strcpy(mistat->data + 1, v9ses->name);
+	mistat->name = mistat->data + 1 + strlen(v9ses->name);
+
+	if (new_dentry->d_name.len >
+	    (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
+		dprintk(DEBUG_ERROR, "new name too long\n");
+		goto FreeFcallnBail;
+	}
+
+	strcpy(mistat->name, new_dentry->d_name.name);
+	retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+
+      FreeFcallnBail:
+	kfree(mistat);
+
+	if (retval < 0)
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+
+	kfree(fcall);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_getattr - retreive file metadata
+ * @mnt - mount information
+ * @dentry - file to get attributes on
+ * @stat - metadata structure to populate
+ *
+ */
+
+static int
+v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+	int err = -EPERM;
+
+	dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
+	if (!fid) {
+		dprintk(DEBUG_ERROR,
+			"couldn't find fid associated with dentry\n");
+		return -EBADF;
+	}
+
+	err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+	if (err < 0)
+		dprintk(DEBUG_ERROR, "stat error\n");
+	else {
+		v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+				  dentry->d_inode->i_sb);
+		generic_fillattr(dentry->d_inode, stat);
+	}
+
+	kfree(fcall);
+	return err;
+}
+
+/**
+ * v9fs_vfs_setattr - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	int res = -EPERM;
+
+	dprintk(DEBUG_VFS, "\n");
+
+	if (!mistat)
+		return -ENOMEM;
+
+	if (!fid) {
+		dprintk(DEBUG_ERROR,
+			"Couldn't find fid associated with dentry\n");
+		return -EBADF;
+	}
+
+	v9fs_blank_mistat(v9ses, mistat);
+	if (iattr->ia_valid & ATTR_MODE)
+		mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+
+	if (iattr->ia_valid & ATTR_MTIME)
+		mistat->mtime = iattr->ia_mtime.tv_sec;
+
+	if (iattr->ia_valid & ATTR_ATIME)
+		mistat->atime = iattr->ia_atime.tv_sec;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		mistat->length = iattr->ia_size;
+
+	if (v9ses->extended) {
+		char *ptr = mistat->data+1;
+
+		if (iattr->ia_valid & ATTR_UID) {
+			mistat->uid = ptr;
+			ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
+			mistat->n_uid = iattr->ia_uid;
+		}
+
+		if (iattr->ia_valid & ATTR_GID) {
+			mistat->gid = ptr;
+			ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
+			mistat->n_gid = iattr->ia_gid;
+		}
+	}
+
+	res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+
+	if (res < 0)
+		dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+
+	kfree(mistat);
+	kfree(fcall);
+
+	if (res >= 0)
+		res = inode_setattr(dentry->d_inode, iattr);
+
+	return res;
+}
+
+/**
+ * v9fs_mistat2inode - populate an inode structure with mistat info
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
+		  struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+	inode->i_nlink = 1;
+
+	inode->i_atime.tv_sec = mistat->atime;
+	inode->i_mtime.tv_sec = mistat->mtime;
+	inode->i_ctime.tv_sec = mistat->mtime;
+
+	inode->i_uid = -1;
+	inode->i_gid = -1;
+
+	if (v9ses->extended) {
+		/* TODO: string to uid mapping via user-space daemon */
+		inode->i_uid = mistat->n_uid;
+		inode->i_gid = mistat->n_gid;
+
+		if (mistat->n_uid == -1)
+			sscanf(mistat->uid, "%x", &inode->i_uid);
+
+		if (mistat->n_gid == -1)
+			sscanf(mistat->gid, "%x", &inode->i_gid);
+	}
+
+	if (inode->i_uid == -1)
+		inode->i_uid = v9ses->uid;
+	if (inode->i_gid == -1)
+		inode->i_gid = v9ses->gid;
+
+	inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+	if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
+		char type = 0;
+		int major = -1;
+		int minor = -1;
+		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+		switch (type) {
+		case 'c':
+			inode->i_mode &= ~S_IFBLK;
+			inode->i_mode |= S_IFCHR;
+			break;
+		case 'b':
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+				type, mistat->extension);
+		};
+		inode->i_rdev = MKDEV(major, minor);
+	} else
+		inode->i_rdev = 0;
+
+	inode->i_size = mistat->length;
+
+	inode->i_blksize = sb->s_blocksize;
+	inode->i_blocks =
+	    (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_qid2ino - convert qid into inode number
+ * @qid: qid to hash
+ *
+ * BUG: potential for inode number collisions?
+ */
+
+ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+{
+	u64 path = qid->path + 2;
+	ino_t i = 0;
+
+	if (sizeof(ino_t) == sizeof(path))
+		memcpy(&i, &path, sizeof(ino_t));
+	else
+		i = (ino_t) (path ^ (path >> 32));
+
+	return i;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	int retval = -EPERM;
+	struct v9fs_fid *newfid;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+
+	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+		symname);
+
+	if (!mistat)
+		return -ENOMEM;
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeFcall;
+	}
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, S_IFLNK, 0);
+	if (retval != 0)
+		goto FreeFcall;
+
+	newfid = v9fs_fid_lookup(dentry);
+
+	/* issue a twstat */
+	v9fs_blank_mistat(v9ses, mistat);
+	strcpy(mistat->data + 1, symname);
+	mistat->extension = mistat->data + 1;
+	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeFcall;
+	}
+
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeFcall;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+      FreeFcall:
+	kfree(mistat);
+	kfree(fcall);
+
+	return retval;
+}
+
+/**
+ * v9fs_readlink - read a symlink's location (internal version)
+ * @dentry: dentry for symlink
+ * @buffer: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	int retval = -EPERM;
+
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
+
+	if (!fid) {
+		dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
+		retval = -EBADF;
+		goto FreeFcall;
+	}
+
+	if (!v9ses->extended) {
+		retval = -EBADF;
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeFcall;
+	}
+
+	dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
+	retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		goto FreeFcall;
+	}
+
+	if (!fcall)
+		return -EIO;
+
+	if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+		retval = -EINVAL;
+		goto FreeFcall;
+	}
+
+	/* copy extension buffer into buffer */
+	if (strlen(fcall->params.rstat.stat->extension) < buflen)
+		buflen = strlen(fcall->params.rstat.stat->extension);
+
+	memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+
+	retval = buflen;
+
+      FreeFcall:
+	kfree(fcall);
+
+	return retval;
+}
+
+/**
+ * v9fs_vfs_readlink - read a symlink's location
+ * @dentry: dentry for symlink
+ * @buf: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
+			     int buflen)
+{
+	int retval;
+	int ret;
+	char *link = __getname();
+
+	if (buflen > PATH_MAX)
+		buflen = PATH_MAX;
+
+	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+	retval = v9fs_readlink(dentry, link, buflen);
+
+	if (retval > 0) {
+		if ((ret = copy_to_user(buffer, link, retval)) != 0) {
+			dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
+				ret);
+			retval = ret;
+		}
+	}
+
+	putname(link);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	int len = 0;
+	char *link = __getname();
+
+	dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+
+	if (!link)
+		link = ERR_PTR(-ENOMEM);
+	else {
+		len = v9fs_readlink(dentry, link, strlen(link));
+
+		if (len < 0) {
+			putname(link);
+			link = ERR_PTR(len);
+		} else
+			link[len] = 0;
+	}
+	nd_set_link(nd, link);
+
+	return NULL;
+}
+
+/**
+ * v9fs_vfs_put_link - release a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+	char *s = nd_get_link(nd);
+
+	dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+	if (!IS_ERR(s))
+		putname(s);
+}
+
+/**
+ * v9fs_vfs_link - create a hardlink
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+/* XXX - lots of code dup'd from symlink and creates,
+ * figure out a better reuse strategy
+ */
+
+static int
+v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
+	      struct dentry *dentry)
+{
+	int retval = -EPERM;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
+	struct v9fs_fid *newfid = NULL;
+	char *symname = __getname();
+
+	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+		old_dentry->d_name.name);
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeMem;
+	}
+
+	/* get fid of old_dentry */
+	sprintf(symname, "hardlink(%d)\n", oldfid->fid);
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
+	if (retval != 0)
+		goto FreeMem;
+
+	newfid = v9fs_fid_lookup(dentry);
+	if (!newfid) {
+		dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+		goto FreeMem;
+	}
+
+	/* issue a twstat */
+	v9fs_blank_mistat(v9ses, mistat);
+	strcpy(mistat->data + 1, symname);
+	mistat->extension = mistat->data + 1;
+	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+	kfree(fcall);
+	fcall = NULL;
+
+      FreeMem:
+	kfree(mistat);
+	kfree(fcall);
+	putname(symname);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_mknod - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @dev_t: device associated with special file
+ *
+ */
+
+static int
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+	int retval = -EPERM;
+	struct v9fs_fid *newfid;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	char *symname = __getname();
+
+	dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+	if (!mistat)
+		return -ENOMEM;
+
+	if (!new_valid_dev(rdev)) {
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeMem;
+	}
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, mode, 0);
+
+	if (retval != 0)
+		goto FreeMem;
+
+	newfid = v9fs_fid_lookup(dentry);
+	if (!newfid) {
+		dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	/* build extension */
+	if (S_ISBLK(mode))
+		sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+	else if (S_ISCHR(mode))
+		sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+	else if (S_ISFIFO(mode))
+		;	/* DO NOTHING */
+	else {
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	if (!S_ISFIFO(mode)) {
+		/* issue a twstat */
+		v9fs_blank_mistat(v9ses, mistat);
+		strcpy(mistat->data + 1, symname);
+		mistat->extension = mistat->data + 1;
+		retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+		if (retval < 0) {
+			dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+				FCALL_ERROR(fcall));
+			goto FreeMem;
+		}
+	}
+
+	/* need to update dcache so we show up */
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+      FreeMem:
+	kfree(mistat);
+	kfree(fcall);
+	putname(symname);
+
+	return retval;
+}
+
+static struct inode_operations v9fs_dir_inode_operations_ext = {
+	.create = v9fs_vfs_create,
+	.lookup = v9fs_vfs_lookup,
+	.symlink = v9fs_vfs_symlink,
+	.link = v9fs_vfs_link,
+	.unlink = v9fs_vfs_unlink,
+	.mkdir = v9fs_vfs_mkdir,
+	.rmdir = v9fs_vfs_rmdir,
+	.mknod = v9fs_vfs_mknod,
+	.rename = v9fs_vfs_rename,
+	.readlink = v9fs_vfs_readlink,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_dir_inode_operations = {
+	.create = v9fs_vfs_create,
+	.lookup = v9fs_vfs_lookup,
+	.unlink = v9fs_vfs_unlink,
+	.mkdir = v9fs_vfs_mkdir,
+	.rmdir = v9fs_vfs_rmdir,
+	.mknod = v9fs_vfs_mknod,
+	.rename = v9fs_vfs_rename,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_file_inode_operations = {
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_symlink_inode_operations = {
+	.readlink = v9fs_vfs_readlink,
+	.follow_link = v9fs_vfs_follow_link,
+	.put_link = v9fs_vfs_put_link,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 00000000000..82c5b008407
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,265 @@
+/*
+ *  linux/fs/9p/vfs_super.c
+ *
+ * This file contians superblock ops for 9P2000. It is intended that
+ * you mount this file system on directories.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static void v9fs_clear_inode(struct inode *);
+static struct super_operations v9fs_super_ops;
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+
+static void v9fs_clear_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+}
+
+/**
+ * v9fs_set_super - set the superblock
+ * @s: super block
+ * @data: file system specific data
+ *
+ */
+
+static int v9fs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+	return set_anon_super(s, data);
+}
+
+/**
+ * v9fs_fill_super - populate superblock with info
+ * @sb: superblock
+ * @v9ses: session information
+ *
+ */
+
+static void
+v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
+		int flags)
+{
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
+	sb->s_blocksize = 1 << sb->s_blocksize_bits;
+	sb->s_magic = V9FS_MAGIC;
+	sb->s_op = &v9fs_super_ops;
+
+	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
+	    MS_NODIRATIME | MS_NOATIME;
+}
+
+/**
+ * v9fs_get_sb - mount a superblock
+ * @fs_type: file system type
+ * @flags: mount flags
+ * @dev_name: device name that was mounted
+ * @data: mount options
+ *
+ */
+
+static struct super_block *v9fs_get_sb(struct file_system_type
+				       *fs_type, int flags,
+				       const char *dev_name, void *data)
+{
+	struct super_block *sb = NULL;
+	struct v9fs_fcall *fcall = NULL;
+	struct inode *inode = NULL;
+	struct dentry *root = NULL;
+	struct v9fs_session_info *v9ses = NULL;
+	struct v9fs_fid *root_fid = NULL;
+	int mode = S_IRWXUGO | S_ISVTX;
+	uid_t uid = current->fsuid;
+	gid_t gid = current->fsgid;
+	int stat_result = 0;
+	int newfid = 0;
+	int retval = 0;
+
+	dprintk(DEBUG_VFS, " \n");
+
+	v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+	if (!v9ses)
+		return ERR_PTR(-ENOMEM);
+
+	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
+		dprintk(DEBUG_ERROR, "problem initiating session\n");
+		return ERR_PTR(newfid);
+	}
+
+	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+
+	v9fs_fill_super(sb, v9ses, flags);
+
+	inode = v9fs_get_inode(sb, S_IFDIR | mode);
+	if (IS_ERR(inode)) {
+		retval = PTR_ERR(inode);
+		goto put_back_sb;
+	}
+
+	inode->i_uid = uid;
+	inode->i_gid = gid;
+
+	root = d_alloc_root(inode);
+
+	if (!root) {
+		retval = -ENOMEM;
+		goto put_back_sb;
+	}
+
+	sb->s_root = root;
+
+	stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (stat_result < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		v9fs_t_clunk(v9ses, newfid, NULL);
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+	} else {
+		/* Setup the Root Inode */
+		root_fid = v9fs_fid_create(root, v9ses, newfid, 0);
+		if (root_fid == NULL) {
+			retval = -ENOMEM;
+			goto put_back_sb;
+		}
+
+		root_fid->qid = fcall->params.rstat.stat->qid;
+		root->d_inode->i_ino =
+		    v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+		v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+	}
+
+	kfree(fcall);
+
+	if (stat_result < 0) {
+		retval = stat_result;
+		goto put_back_sb;
+	}
+
+	return sb;
+
+put_back_sb:
+	/* deactivate_super calls v9fs_kill_super which will frees the rest */
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	return ERR_PTR(retval);
+}
+
+/**
+ * v9fs_kill_super - Kill Superblock
+ * @s: superblock
+ *
+ */
+
+static void v9fs_kill_super(struct super_block *s)
+{
+	struct v9fs_session_info *v9ses = s->s_fs_info;
+
+	dprintk(DEBUG_VFS, " %p\n", s);
+
+	v9fs_dentry_release(s->s_root);	/* clunk root */
+
+	kill_anon_super(s);
+
+	v9fs_session_close(v9ses);
+	kfree(v9ses);
+	dprintk(DEBUG_VFS, "exiting kill_super\n");
+}
+
+/**
+ * v9fs_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ *
+ */
+
+static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
+
+	if (v9ses->debug != 0)
+		seq_printf(m, ",debug=%u", v9ses->debug);
+	if (v9ses->port != V9FS_PORT)
+		seq_printf(m, ",port=%u", v9ses->port);
+	if (v9ses->maxdata != 9000)
+		seq_printf(m, ",msize=%u", v9ses->maxdata);
+	if (v9ses->afid != ~0)
+		seq_printf(m, ",afid=%u", v9ses->afid);
+	if (v9ses->proto == PROTO_UNIX)
+		seq_puts(m, ",proto=unix");
+	if (v9ses->extended == 0)
+		seq_puts(m, ",noextend");
+	if (v9ses->nodev == 1)
+		seq_puts(m, ",nodevmap");
+	seq_printf(m, ",name=%s", v9ses->name);
+	seq_printf(m, ",aname=%s", v9ses->remotename);
+	seq_printf(m, ",uid=%u", v9ses->uid);
+	seq_printf(m, ",gid=%u", v9ses->gid);
+	return 0;
+}
+
+static void
+v9fs_umount_begin(struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+	v9fs_session_cancel(v9ses);
+}
+
+static struct super_operations v9fs_super_ops = {
+	.statfs = simple_statfs,
+	.clear_inode = v9fs_clear_inode,
+	.show_options = v9fs_show_options,
+	.umount_begin = v9fs_umount_begin,
+};
+
+struct file_system_type v9fs_fs_type = {
+	.name = "9P",
+	.get_sb = v9fs_get_sb,
+	.kill_sb = v9fs_kill_super,
+	.owner = THIS_MODULE,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5d0c4be43db..01a295232f7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -363,12 +363,15 @@ config INOTIFY
 	bool "Inotify file change notification support"
 	default y
 	---help---
-	  Say Y here to enable inotify support and the /dev/inotify character
-	  device.  Inotify is a file change notification system and a
+	  Say Y here to enable inotify support and the associated system
+	  calls.  Inotify is a file change notification system and a
 	  replacement for dnotify.  Inotify fixes numerous shortcomings in
 	  dnotify and introduces several new features.  It allows monitoring
-	  of both files and directories via a single open fd.  Multiple file
-	  events are supported.
+	  of both files and directories via a single open fd.  Other features
+	  include multiple file events, one-shot support, and unmount
+	  notification.
+
+	  For more information, see Documentation/filesystems/inotify.txt
 
 	  If unsure, say Y.
 
@@ -379,10 +382,8 @@ config QUOTA
 	  usage (also called disk quotas). Currently, it works for the
 	  ext2, ext3, and reiserfs file system. ext3 also supports journalled
 	  quotas for which you don't need to run quotacheck(8) after an unclean
-	  shutdown. You need additional software in order to use quota support
-	  (you can download sources from
-	  <http://www.sf.net/projects/linuxquota/>). For further details, read
-	  the Quota mini-HOWTO, available from
+	  shutdown.
+	  For further details, read the Quota mini-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>, or the documentation provided
 	  with the quota tools. Probably the quota support is only useful for
 	  multi user systems. If unsure, say N.
@@ -400,8 +401,7 @@ config QFMT_V2
 	depends on QUOTA
 	help
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
-	  need this functionality say Y here. Note that you will need recent
-	  quota utilities (>= 3.01) for new quota format with this kernel.
+	  need this functionality say Y here.
 
 config QUOTACTL
 	bool
@@ -462,6 +462,22 @@ config AUTOFS4_FS
 	  local network, you probably do not need an automounter, and can say
 	  N here.
 
+config FUSE_FS
+	tristate "Filesystem in Userspace support"
+	help
+	  With FUSE it is possible to implement a fully functional filesystem
+	  in a userspace program.
+
+	  There's also companion library: libfuse.  This library along with
+	  utilities is available from the FUSE homepage:
+	  <http://fuse.sourceforge.net/>
+
+	  See <file:Documentation/filesystems/fuse.txt> for more information.
+	  See <file:Documentation/Changes> for needed library/utility version.
+
+	  If you want to develop a userspace FS, or if you want to use
+	  a filesystem based on FUSE, answer Y or M.
+
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -780,28 +796,6 @@ config SYSFS
 
 	Designers of embedded systems may wish to say N here to conserve space.
 
-config DEVPTS_FS_XATTR
-	bool "/dev/pts Extended Attributes"
-	depends on UNIX98_PTYS
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config DEVPTS_FS_SECURITY
-	bool "/dev/pts Security Labels"
-	depends on DEVPTS_FS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the /dev/pts filesystem.
-
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
 config TMPFS
 	bool "Virtual memory file system support (former shm fs)"
 	help
@@ -814,30 +808,9 @@ config TMPFS
 
 	  See <file:Documentation/filesystems/tmpfs.txt> for details.
 
-config TMPFS_XATTR
-	bool "tmpfs Extended Attributes"
-	depends on TMPFS
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config TMPFS_SECURITY
-	bool "tmpfs Security Labels"
-	depends on TMPFS_XATTR
-	help
-	  Security labels support alternative access control models
-	  implemented by security modules like SELinux.  This option
-	  enables an extended attribute handler for file security
-	  labels in the tmpfs filesystem.
-	  If you are not using a security module that requires using
-	  extended attributes for file security labels, say N.
-
 config HUGETLBFS
 	bool "HugeTLB file system support"
-	depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
+	depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
 
 config HUGETLB_PAGE
 	def_bool HUGETLBFS
@@ -856,6 +829,18 @@ config RAMFS
 	  To compile this as a module, choose M here: the module will be called
 	  ramfs.
 
+config RELAYFS_FS
+	tristate "Relayfs file system support"
+	---help---
+	  Relayfs is a high-speed data relay filesystem designed to provide
+	  an efficient mechanism for tools and facilities to relay large
+	  amounts of data from kernel space to user space.
+
+	  To compile this code as a module, choose M here: the module will be
+	  called relayfs.
+
+	  If unsure, say N.
+
 endmenu
 
 menu "Miscellaneous filesystems"
@@ -1734,6 +1719,17 @@ config AFS_FS
 config RXRPC
 	tristate
 
+config 9P_FS
+	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+	depends on INET && EXPERIMENTAL
+	help
+	  If you say Y here, you will get experimental support for
+	  Plan 9 resource sharing via the 9P2000 protocol.
+
+	  See <http://v9fs.sf.net> for more information.
+
+	  If unsure, say N.
+
 endmenu
 
 menu "Partition Types"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 434c19d076a..175b2e8177c 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -57,7 +57,7 @@ config BINFMT_SHARED_FLAT
 
 config BINFMT_AOUT
 	tristate "Kernel support for a.out and ECOFF binaries"
-	depends on (X86 && !X86_64) || ALPHA || ARM || M68K || SPARC32
+	depends on X86_32 || ALPHA || ARM || M68K || SPARC32
 	---help---
 	  A.out (Assembler.OUTput) is a set of formats for libraries and
 	  executables used in the earliest versions of UNIX.  Linux used
diff --git a/fs/Makefile b/fs/Makefile
index cf95eb894fd..1972da18627 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,10 +89,13 @@ obj-$(CONFIG_QNX4FS_FS)		+= qnx4/
 obj-$(CONFIG_AUTOFS_FS)		+= autofs/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
+obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_UDF_FS)		+= udf/
+obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
+obj-$(CONFIG_9P_FS)		+= 9p/
 obj-$(CONFIG_AFS_FS)		+= afs/
 obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 63f5df9afb7..fd528433de4 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -97,7 +97,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
 extern struct inode_operations adfs_file_inode_operations;
 extern struct file_operations adfs_file_operations;
 
-extern inline __u32 signed_asl(__u32 val, signed int shift)
+static inline __u32 signed_asl(__u32 val, signed int shift)
 {
 	if (shift >= 0)
 		val <<= shift;
@@ -112,7 +112,7 @@ extern inline __u32 signed_asl(__u32 val, signed int shift)
  *
  * The root directory ID should always be looked up in the map [3.4]
  */
-extern inline int
+static inline int
 __adfs_block_map(struct super_block *sb, unsigned int object_id,
 		 unsigned int block)
 {
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f200453..9ebe881c678 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
 affs_delete_inode(struct inode *inode)
 {
 	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	if (S_ISREG(inode->i_mode))
 		affs_truncate(inode);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 23c12512802..4975c9c193d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -29,7 +29,7 @@ static int afs_file_release(struct inode *inode, struct file *file);
 
 static int afs_file_readpage(struct file *file, struct page *page);
 static int afs_file_invalidatepage(struct page *page, unsigned long offset);
-static int afs_file_releasepage(struct page *page, int gfp_flags);
+static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
 
 static ssize_t afs_file_write(struct file *file, const char __user *buf,
 			      size_t size, loff_t *off);
@@ -279,7 +279,7 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset)
 /*
  * release a page and cleanup its private data
  */
-static int afs_file_releasepage(struct page *page, int gfp_flags)
+static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	struct cachefs_page *pageio;
 
@@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, int gfp_flags)
 		cachefs_uncache_page(vnode->cache, page);
 #endif
 
-		pageio = (struct cachefs_page *) page->private;
-		page->private = 0;
+		pageio = (struct cachefs_page *) page_private(page);
+		set_page_private(page, 0);
 		ClearPagePrivate(page);
 
 		if (pageio)
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index bfc28abe1cb..31ee06590de 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
 				       struct nameidata *nd);
 static int afs_mntpt_open(struct inode *inode, struct file *file);
-static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
 
 struct file_operations afs_mntpt_file_operations = {
 	.open		= afs_mntpt_open,
@@ -233,7 +233,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 /*
  * follow a link from a mountpoint directory, thus causing it to be mounted
  */
-static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct vfsmount *newmnt;
 	struct dentry *old_dentry;
@@ -249,7 +249,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	newmnt = afs_mntpt_do_automount(dentry);
 	if (IS_ERR(newmnt)) {
 		path_release(nd);
-		return PTR_ERR(newmnt);
+		return (void *)newmnt;
 	}
 
 	old_dentry = nd->dentry;
@@ -267,7 +267,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	kleave(" = %d", err);
-	return err;
+	return ERR_PTR(err);
 } /* end afs_mntpt_follow_link() */
 
 /*****************************************************************************/
diff --git a/fs/aio.c b/fs/aio.c
index 06d7d4390fe..edfca5b7553 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
+#include <linux/rcuref.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -397,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 	if (unlikely(!req))
 		return NULL;
 
-	req->ki_flags = 1 << KIF_LOCKED;
+	req->ki_flags = 0;
 	req->ki_users = 2;
 	req->ki_key = 0;
 	req->ki_ctx = ctx;
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
@@ -567,6 +568,10 @@ static void use_mm(struct mm_struct *mm)
 	atomic_inc(&mm->mm_count);
 	tsk->mm = mm;
 	tsk->active_mm = mm;
+	/*
+	 * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
+	 * it won't work. Update it accordingly if you change it here
+	 */
 	activate_mm(active_mm, mm);
 	task_unlock(tsk);
 
@@ -717,19 +722,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	ret = retry(iocb);
 	current->io_wait = NULL;
 
-	if (-EIOCBRETRY != ret) {
- 		if (-EIOCBQUEUED != ret) {
-			BUG_ON(!list_empty(&iocb->ki_wait.task_list));
-			aio_complete(iocb, ret, 0);
-			/* must not access the iocb after this */
-		}
-	} else {
-		/*
-		 * Issue an additional retry to avoid waiting forever if
-		 * no waits were queued (e.g. in case of a short read).
-		 */
-		if (list_empty(&iocb->ki_wait.task_list))
-			kiocbSetKicked(iocb);
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+		BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+		aio_complete(iocb, ret, 0);
 	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
@@ -873,16 +868,24 @@ static void aio_kick_handler(void *data)
  * and if required activate the aio work queue to process
  * it
  */
-static void queue_kicked_iocb(struct kiocb *iocb)
+static void try_queue_kicked_iocb(struct kiocb *iocb)
 {
  	struct kioctx	*ctx = iocb->ki_ctx;
 	unsigned long flags;
 	int run = 0;
 
-	WARN_ON((!list_empty(&iocb->ki_wait.task_list)));
+	/* We're supposed to be the only path putting the iocb back on the run
+	 * list.  If we find that the iocb is *back* on a wait queue already
+	 * than retry has happened before we could queue the iocb.  This also
+	 * means that the retry could have completed and freed our iocb, no
+	 * good. */
+	BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
 
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
-	run = __queue_kicked_iocb(iocb);
+	/* set this inside the lock so that we can't race with aio_run_iocb()
+	 * testing it and putting the iocb on the run list under the lock */
+	if (!kiocbTryKick(iocb))
+		run = __queue_kicked_iocb(iocb);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	if (run)
 		aio_queue_work(ctx);
@@ -905,10 +908,7 @@ void fastcall kick_iocb(struct kiocb *iocb)
 		return;
 	}
 
-	/* If its already kicked we shouldn't queue it again */
-	if (!kiocbTryKick(iocb)) {
-		queue_kicked_iocb(iocb);
-	}
+	try_queue_kicked_iocb(iocb);
 }
 EXPORT_SYMBOL(kick_iocb);
 
@@ -1296,8 +1296,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
 }
 
 /*
- * Default retry method for aio_read (also used for first time submit)
- * Responsible for updating iocb state as retries progress
+ * aio_p{read,write} are the default  ki_retry methods for
+ * IO_CMD_P{READ,WRITE}.  They maintains kiocb retry state around potentially
+ * multiple calls to f_op->aio_read().  They loop around partial progress
+ * instead of returning -EIOCBRETRY because they don't have the means to call
+ * kick_iocb().
  */
 static ssize_t aio_pread(struct kiocb *iocb)
 {
@@ -1306,25 +1309,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	struct inode *inode = mapping->host;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_read(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
+	do {
+		ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		/*
+		 * Can't just depend on iocb->ki_left to determine
+		 * whether we are done. This may have been a short read.
+		 */
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
 
-	/*
-	 * Can't just depend on iocb->ki_left to determine
-	 * whether we are done. This may have been a short read.
-	 */
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
 		/*
-		 * For pipes and sockets we return once we have
-		 * some data; for regular files we retry till we
-		 * complete the entire read or find that we can't
-		 * read any more data (e.g short reads).
+		 * For pipes and sockets we return once we have some data; for
+		 * regular files we retry till we complete the entire read or
+		 * find that we can't read any more data (e.g short reads).
 		 */
-		if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))
-			ret = -EIOCBRETRY;
-	}
+	} while (ret > 0 && iocb->ki_left > 0 &&
+		 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
 
 	/* This means we must have transferred all that we could */
 	/* No need to retry anymore */
@@ -1334,27 +1337,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	return ret;
 }
 
-/*
- * Default retry method for aio_write (also used for first time submit)
- * Responsible for updating iocb state as retries progress
- */
+/* see aio_pread() */
 static ssize_t aio_pwrite(struct kiocb *iocb)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_write(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
-
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
-
-		ret = -EIOCBRETRY;
-	}
+	do {
+		ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
+	} while (ret > 0 && iocb->ki_left > 0);
 
-	/* This means we must have transferred all that we could */
-	/* No need to retry anymore */
 	if ((ret == 0) || (iocb->ki_left == 0))
 		ret = iocb->ki_nbytes - iocb->ki_left;
 
@@ -1400,6 +1397,9 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
 		if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
 			kiocb->ki_left)))
 			break;
+		ret = security_file_permission(file, MAY_READ);
+		if (unlikely(ret))
+			break;
 		ret = -EINVAL;
 		if (file->f_op->aio_read)
 			kiocb->ki_retry = aio_pread;
@@ -1412,6 +1412,9 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
 		if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
 			kiocb->ki_left)))
 			break;
+		ret = security_file_permission(file, MAY_WRITE);
+		if (unlikely(ret))
+			break;
 		ret = -EINVAL;
 		if (file->f_op->aio_write)
 			kiocb->ki_retry = aio_pwrite;
@@ -1523,10 +1526,8 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 
 	spin_lock_irq(&ctx->ctx_lock);
-	if (likely(list_empty(&ctx->run_list))) {
-		aio_run_iocb(req);
-	} else {
-		list_add_tail(&req->ki_run_list, &ctx->run_list);
+	aio_run_iocb(req);
+	if (!list_empty(&ctx->run_list)) {
 		/* drain the run list */
 		while (__aio_run_iocbs(ctx))
 			;
@@ -1669,7 +1670,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 				ret = -EFAULT;
 		}
 	} else
-		printk(KERN_DEBUG "iocb has no cancel operation\n");
+		ret = -EINVAL;
 
 	put_ioctx(ctx);
 
diff --git a/fs/attr.c b/fs/attr.c
index b1796fb9e52..67bcd9b14ea 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -117,9 +117,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	struct timespec now;
 	unsigned int ia_valid = attr->ia_valid;
 
-	if (!inode)
-		BUG();
-
 	mode = inode->i_mode;
 	now = current_fs_time(inode->i_sb);
 
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272d..990c28da5ae 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
+	struct super_block *sb;
 	unsigned long exp_timeout;
 	ino_t next_dir_ino;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
 void autofs_hash_delete(struct autofs_dir_ent *);
 struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
 void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_dirhash *);
+void autofs_hash_nuke(struct autofs_sb_info *);
 
 /* Expiration-handling functions */
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd079..5ccfcf26310 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
 
 /* Delete everything.  This is used on filesystem destruction, so we
    make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_dirhash *dh)
+void autofs_hash_nuke(struct autofs_sb_info *sbi)
 {
 	int i;
 	struct autofs_dir_ent *ent, *nent;
 
 	for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
-		for ( ent = dh->h[i] ; ent ; ent = nent ) {
+		for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
 			nent = ent->next;
 			if ( ent->dentry )
 				dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
 			kfree(ent);
 		}
 	}
+	shrink_dcache_sb(sbi->sb);
 }
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf..65e5ed42190 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
 	if ( !sbi->catatonic )
 		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
-	autofs_hash_nuke(&sbi->dirhash);
+	autofs_hash_nuke(sbi);
 	for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
 		if ( test_bit(n, sbi->symlink_bitmap) )
 			kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
 	s->s_magic = AUTOFS_SUPER_MAGIC;
 	s->s_op = &autofs_sops;
 	s->s_time_gran = 1;
+	sbi->sb = s;
 
 	root_inode = iget(s, AUTOFS_ROOT_INO);
 	root = d_alloc_root(root_inode);
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index f028396f138..52e8772b066 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -12,11 +12,12 @@
 
 #include "autofs_i.h"
 
-static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
+/* Nothing to release.. */
+static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
 	nd_set_link(nd, s);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations autofs_symlink_inode_operations = {
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index c265a66edf0..2ea2c98fd84 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,11 +12,11 @@
 
 #include "autofs_i.h"
 
-static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	nd_set_link(nd, (char *)ino->u.symlink);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations autofs4_symlink_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index de5bb280a82..e0a6025f1d0 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -41,8 +41,8 @@ static struct inode *befs_alloc_inode(struct super_block *sb);
 static void befs_destroy_inode(struct inode *inode);
 static int befs_init_inodecache(void);
 static void befs_destroy_inodecache(void);
-static int befs_follow_link(struct dentry *, struct nameidata *);
-static void befs_put_link(struct dentry *, struct nameidata *);
+static void *befs_follow_link(struct dentry *, struct nameidata *);
+static void befs_put_link(struct dentry *, struct nameidata *, void *);
 static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
 			char **out, int *out_len);
 static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -461,7 +461,7 @@ befs_destroy_inodecache(void)
  * The data stream become link name. Unless the LONG_SYMLINK
  * flag is set.
  */
-static int
+static void *
 befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
@@ -487,10 +487,10 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	nd_set_link(nd, link);
-	return 0;
+	return NULL;
 }
 
-static void befs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
 	befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
 	if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88be..1fbc53f14ab 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
 	unsigned long si_lasti;
 	unsigned long * si_imap;
 	struct buffer_head * si_sbh;		/* buffer header w/superblock */
-	struct bfs_super_block * si_bfs_sb;	/* superblock in si_sbh->b_data */
 };
 
 /*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057f..5af928fa044 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
  *	fs/bfs/dir.c
  *	BFS directory operations.
  *	Copyright (C) 1999,2000  Tigran Aivazian <tigran@veritas.com>
+ *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
  */
 
 #include <linux/time.h>
@@ -20,9 +21,9 @@
 #define dprintf(x...)
 #endif
 
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino);
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
 static struct buffer_head * bfs_find_entry(struct inode * dir, 
-	const char * name, int namelen, struct bfs_dirent ** res_dir);
+	const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
 
 static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 {
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 			de = (struct bfs_dirent *)(bh->b_data + offset);
 			if (de->ino) {
 				int size = strnlen(de->name, BFS_NAMELEN);
-				if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) {
+				if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
 					brelse(bh);
 					unlock_kernel();
 					return 0;
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
 	lock_kernel();
 	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
 	if (bh) {
-		unsigned long ino = le32_to_cpu(de->ino);
+		unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
 		brelse(bh);
 		inode = iget(dir->i_sb, ino);
 		if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
 	inode = dentry->d_inode;
 	lock_kernel();
 	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
-	if (!bh || de->ino != inode->i_ino) 
+	if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
 		goto out_brelse;
 
 	if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
 				old_dentry->d_name.name, 
 				old_dentry->d_name.len, &old_de);
 
-	if (!old_bh || old_de->ino != old_inode->i_ino)
+	if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
 		goto end_rename;
 
 	error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
 	.rename			= bfs_rename,
 };
 
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino)
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
 {
 	struct buffer_head * bh;
 	struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
 				}
 				dir->i_mtime = CURRENT_TIME_SEC;
 				mark_inode_dirty(dir);
-				de->ino = ino;
+				de->ino = cpu_to_le16((u16)ino);
 				for (i=0; i<BFS_NAMELEN; i++)
 					de->name[i] = (i < namelen) ? name[i] : 0;
 				mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
 	return -ENOSPC;
 }
 
-static inline int bfs_namecmp(int len, const char * name, const char * buffer)
+static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
 {
 	if (len < BFS_NAMELEN && buffer[len])
 		return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
 }
 
 static struct buffer_head * bfs_find_entry(struct inode * dir, 
-	const char * name, int namelen, struct bfs_dirent ** res_dir)
+	const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
 {
 	unsigned long block, offset;
 	struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
 		}
 		de = (struct bfs_dirent *)(bh->b_data + offset);
 		offset += BFS_DIRENT_SIZE;
-		if (de->ino && bfs_namecmp(namelen, name, de->name)) {
+		if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
 			*res_dir = de;
 			return bh;
 		}
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e..807723b65da 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
 	return 0;
 }
 
-static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, 
-				unsigned long where)
+static int bfs_move_blocks(struct super_block *sb, unsigned long start,
+                           unsigned long end, unsigned long where)
 {
 	unsigned long i;
 
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
 static int bfs_get_block(struct inode * inode, sector_t block, 
 	struct buffer_head * bh_result, int create)
 {
-	long phys;
+	unsigned long phys;
 	int err;
 	struct super_block *sb = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(sb);
 	struct bfs_inode_info *bi = BFS_I(inode);
 	struct buffer_head *sbh = info->si_sbh;
 
-	if (block < 0 || block > info->si_blocks)
+	if (block > info->si_blocks)
 		return -EIO;
 
 	phys = bi->i_sblock + block;
 	if (!create) {
 		if (phys <= bi->i_eblock) {
-			dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys);
+			dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
+                                create, (unsigned long)block, phys);
 			map_bh(bh_result, sb, phys);
 		}
 		return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	   of blocks allocated for this file, we can grant it */
 	if (inode->i_size && phys <= bi->i_eblock) {
 		dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", 
-				create, block, phys);
+				create, (unsigned long)block, phys);
 		map_bh(bh_result, sb, phys);
 		return 0;
 	}
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	/* the rest has to be protected against itself */
 	lock_kernel();
 
-	/* if the last data block for this file is the last allocated block, we can
-	   extend the file trivially, without moving it anywhere */
+	/* if the last data block for this file is the last allocated
+	   block, we can extend the file trivially, without moving it
+	   anywhere */
 	if (bi->i_eblock == info->si_lf_eblk) {
 		dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", 
-				create, block, phys);
+				create, (unsigned long)block, phys);
 		map_bh(bh_result, sb, phys);
 		info->si_freeb -= phys - bi->i_eblock;
 		info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	} else
 		err = 0;
 
-	dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys);
+	dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
+                create, (unsigned long)block, phys);
 	bi->i_sblock = phys;
 	phys += block;
 	info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0..3af6c73c5b5 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
  *	BFS superblock and inode operations.
  *	Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
  *	From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
+ *
+ *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
  */
 
 #include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
 	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
 	di = (struct bfs_inode *)bh->b_data + off;
 
-	inode->i_mode = 0x0000FFFF & di->i_mode;
-	if (di->i_vtype == BFS_VDIR) {
+	inode->i_mode = 0x0000FFFF &  le32_to_cpu(di->i_mode);
+	if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
 		inode->i_mode |= S_IFDIR;
 		inode->i_op = &bfs_dir_inops;
 		inode->i_fop = &bfs_dir_operations;
-	} else if (di->i_vtype == BFS_VREG) {
+	} else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
 		inode->i_mode |= S_IFREG;
 		inode->i_op = &bfs_file_inops;
 		inode->i_fop = &bfs_file_operations;
 		inode->i_mapping->a_ops = &bfs_aops;
 	}
 
-	inode->i_uid = di->i_uid;
-	inode->i_gid = di->i_gid;
-	inode->i_nlink = di->i_nlink;
+	BFS_I(inode)->i_sblock =  le32_to_cpu(di->i_sblock);
+	BFS_I(inode)->i_eblock =  le32_to_cpu(di->i_eblock);
+	inode->i_uid =  le32_to_cpu(di->i_uid);
+	inode->i_gid =  le32_to_cpu(di->i_gid);
+	inode->i_nlink =  le32_to_cpu(di->i_nlink);
 	inode->i_size = BFS_FILESIZE(di);
 	inode->i_blocks = BFS_FILEBLOCKS(di);
+        if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
 	inode->i_blksize = PAGE_SIZE;
-	inode->i_atime.tv_sec = di->i_atime;
-	inode->i_mtime.tv_sec = di->i_mtime;
-	inode->i_ctime.tv_sec = di->i_ctime;
+	inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
+	inode->i_mtime.tv_sec =  le32_to_cpu(di->i_mtime);
+	inode->i_ctime.tv_sec =  le32_to_cpu(di->i_ctime);
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
-	BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */
-	BFS_I(inode)->i_sblock = di->i_sblock;
-	BFS_I(inode)->i_eblock = di->i_eblock;
+	BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
 
 	brelse(bh);
 }
 
 static int bfs_write_inode(struct inode * inode, int unused)
 {
-	unsigned long ino = inode->i_ino;
+	unsigned int ino = (u16)inode->i_ino;
+        unsigned long i_sblock;
 	struct bfs_inode * di;
 	struct buffer_head * bh;
 	int block, off;
 
+        dprintf("ino=%08x\n", ino);
+
 	if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
-		printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+		printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
 		return -EIO;
 	}
 
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
 	block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
 	bh = sb_bread(inode->i_sb, block);
 	if (!bh) {
-		printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+		printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
 		unlock_kernel();
 		return -EIO;
 	}
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
 	off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
 	di = (struct bfs_inode *)bh->b_data + off;
 
-	if (inode->i_ino == BFS_ROOT_INO)
-		di->i_vtype = BFS_VDIR;
+	if (ino == BFS_ROOT_INO)
+		di->i_vtype = cpu_to_le32(BFS_VDIR);
 	else
-		di->i_vtype = BFS_VREG;
-
-	di->i_ino = inode->i_ino;
-	di->i_mode = inode->i_mode;
-	di->i_uid = inode->i_uid;
-	di->i_gid = inode->i_gid;
-	di->i_nlink = inode->i_nlink;
-	di->i_atime = inode->i_atime.tv_sec;
-	di->i_mtime = inode->i_mtime.tv_sec;
-	di->i_ctime = inode->i_ctime.tv_sec;
-	di->i_sblock = BFS_I(inode)->i_sblock;
-	di->i_eblock = BFS_I(inode)->i_eblock;
-	di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1;
+		di->i_vtype = cpu_to_le32(BFS_VREG);
+
+	di->i_ino = cpu_to_le16(ino);
+	di->i_mode = cpu_to_le32(inode->i_mode);
+	di->i_uid = cpu_to_le32(inode->i_uid);
+	di->i_gid = cpu_to_le32(inode->i_gid);
+	di->i_nlink = cpu_to_le32(inode->i_nlink);
+	di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+	di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+	di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+        i_sblock = BFS_I(inode)->i_sblock;
+	di->i_sblock = cpu_to_le32(i_sblock);
+	di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
+	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
+        dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
 	brelse(bh);
 	unlock_kernel();
 	return 0;
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
 	int block, off;
 	struct super_block * s = inode->i_sb;
 	struct bfs_sb_info * info = BFS_SB(s);
+	struct bfs_inode_info * bi = BFS_I(inode);
+
+	dprintf("ino=%08lx\n", ino);
 
-	dprintf("ino=%08lx\n", inode->i_ino);
+	truncate_inode_pages(&inode->i_data, 0);
 
-	if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) {
-		printf("invalid ino=%08lx\n", inode->i_ino);
+	if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
+		printf("invalid ino=%08lx\n", ino);
 		return;
 	}
 	
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
 		return;
 	}
 	off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
-	if (di->i_ino) {
-		info->si_freeb += BFS_FILEBLOCKS(di);
+	di = (struct bfs_inode *) bh->b_data + off;
+        if (bi->i_dsk_ino) {
+		info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
 		info->si_freei++;
-		clear_bit(di->i_ino, info->si_imap);
+		clear_bit(ino, info->si_imap);
 		dump_imap("delete_inode", s);
-	}
+        }
 	di->i_ino = 0;
 	di->i_sblock = 0;
 	mark_buffer_dirty(bh);
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = {
 
 void dump_imap(const char *prefix, struct super_block * s)
 {
-#if 0
+#ifdef DEBUG
 	int i;
 	char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
 
 	if (!tmpbuf)
 		return;
 	for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
-		if (i>PAGE_SIZE-100) break;
+		if (i > PAGE_SIZE-100) break;
 		if (test_bit(i, BFS_SB(s)->si_imap))
 			strcat(tmpbuf, "1");
 		else
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 	struct buffer_head * bh;
 	struct bfs_super_block * bfs_sb;
 	struct inode * inode;
-	int i, imap_len;
+	unsigned i, imap_len;
 	struct bfs_sb_info * info;
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 	if(!bh)
 		goto out;
 	bfs_sb = (struct bfs_super_block *)bh->b_data;
-	if (bfs_sb->s_magic != BFS_MAGIC) {
+	if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
 		if (!silent)
 			printf("No BFS filesystem on %s (magic=%08x)\n", 
-				s->s_id, bfs_sb->s_magic);
+				s->s_id,  le32_to_cpu(bfs_sb->s_magic));
 		goto out;
 	}
 	if (BFS_UNCLEAN(bfs_sb, s) && !silent)
 		printf("%s is unclean, continuing\n", s->s_id);
 
 	s->s_magic = BFS_MAGIC;
-	info->si_bfs_sb = bfs_sb;
 	info->si_sbh = bh;
-	info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) 
+	info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
 			+ BFS_ROOT_INO - 1;
 
 	imap_len = info->si_lasti/8 + 1;
@@ -346,29 +356,47 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		goto out;
 	}
 
-	info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
-	info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS;
+	info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
+	info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 -  le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS;
 	info->si_freei = 0;
 	info->si_lf_eblk = 0;
 	info->si_lf_sblk = 0;
 	info->si_lf_ioff = 0;
+	bh = NULL;
 	for (i=BFS_ROOT_INO; i<=info->si_lasti; i++) {
-		inode = iget(s,i);
-		if (BFS_I(inode)->i_dsk_ino == 0)
+		struct bfs_inode *di;
+		int block = (i - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
+		int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
+		unsigned long sblock, eblock;
+
+		if (!off) {
+			brelse(bh);
+			bh = sb_bread(s, block);
+		}
+
+		if (!bh)
+			continue;
+
+		di = (struct bfs_inode *)bh->b_data + off;
+
+		if (!di->i_ino) {
 			info->si_freei++;
-		else {
-			set_bit(i, info->si_imap);
-			info->si_freeb -= inode->i_blocks;
-			if (BFS_I(inode)->i_eblock > info->si_lf_eblk) {
-				info->si_lf_eblk = BFS_I(inode)->i_eblock;
-				info->si_lf_sblk = BFS_I(inode)->i_sblock;
-				info->si_lf_ioff = BFS_INO2OFF(i);
-			}
+			continue;
+		}
+		set_bit(i, info->si_imap);
+		info->si_freeb -= BFS_FILEBLOCKS(di);
+
+		sblock =  le32_to_cpu(di->i_sblock);
+		eblock =  le32_to_cpu(di->i_eblock);
+		if (eblock > info->si_lf_eblk) {
+			info->si_lf_eblk = eblock;
+			info->si_lf_sblk = sblock;
+			info->si_lf_ioff = BFS_INO2OFF(i);
 		}
-		iput(inode);
 	}
+	brelse(bh);
 	if (!(s->s_flags & MS_RDONLY)) {
-		mark_buffer_dirty(bh);
+		mark_buffer_dirty(info->si_sbh);
 		s->s_dirt = 1;
 	} 
 	dump_imap("read_super", s);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index dd9baabaf01..72011826f0c 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -318,7 +318,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7976a238f0a..6fa6adc4097 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -773,7 +773,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
@@ -905,7 +904,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		send_sig(SIGKILL, current, 0);
 		goto out_free_dentry;
 	}
-	if (padzero(elf_bss)) {
+	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 		send_sig(SIGSEGV, current, 0);
 		retval = -EFAULT; /* Nobody gets to see this, but.. */
 		goto out_free_dentry;
@@ -1503,9 +1502,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 	fill_psinfo(psinfo, current->group_leader, current->mm);
 	fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
 	
-	fill_note(notes +2, "CORE", NT_TASKSTRUCT, sizeof(*current), current);
-  
-	numnote = 3;
+	numnote = 2;
 
 	auxv = (elf_addr_t *) current->mm->saved_auxv;
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 134c9c0d1f5..dda87c4c82a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -294,14 +294,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 				  &interp_params,
 				  &current->mm->start_stack,
 				  &current->mm->start_brk);
-#endif
-
-	/* do this so that we can load the interpreter, if need be
-	 * - we will change some of these later
-	 */
-	set_mm_counter(current->mm, rss, 0);
 
-#ifdef CONFIG_MMU
 	retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack);
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index c8998dc6688..9d6625829b9 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -520,7 +520,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 		DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
 
 		down_write(&current->mm->mmap_sem);
-		textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_SHARED, 0);
+		textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0);
 		up_write(&current->mm->mmap_sem);
 		if (!textpos  || textpos >= (unsigned long) -4096) {
 			if (!textpos)
@@ -650,7 +650,6 @@ static int load_flat_file(struct linux_binprm * bprm,
 		current->mm->start_brk = datapos + data_len + bss_len;
 		current->mm->brk = (current->mm->start_brk + 3) & ~3;
 		current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
-		set_mm_counter(current->mm, rss, 0);
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 227a2682d2b..00a91dc25d1 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -259,7 +259,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	create_som_tables(bprm);
 
 	current->mm->start_stack = bprm->p;
-	set_mm_counter(current->mm, rss, 0);
 
 #if 0
 	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
diff --git a/fs/bio.c b/fs/bio.c
index 249dd6bb66c..460554b07ff 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
+#include <scsi/sg.h>		/* for struct sg_iovec */
 
 #define BIO_POOL_SIZE 256
 
@@ -74,7 +75,7 @@ struct bio_set {
  */
 static struct bio_set *fs_bio_set;
 
-static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
 {
 	struct bio_vec *bvl;
 	struct biovec_slab *bp;
@@ -104,18 +105,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int
 	return bvl;
 }
 
-/*
- * default destructor for a bio allocated with bio_alloc_bioset()
- */
-static void bio_destructor(struct bio *bio)
+void bio_free(struct bio *bio, struct bio_set *bio_set)
 {
 	const int pool_idx = BIO_POOL_IDX(bio);
-	struct bio_set *bs = bio->bi_set;
 
 	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-	mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]);
-	mempool_free(bio, bs->bio_pool);
+	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+	mempool_free(bio, bio_set->bio_pool);
+}
+
+/*
+ * default destructor for a bio allocated with bio_alloc_bioset()
+ */
+static void bio_fs_destructor(struct bio *bio)
+{
+	bio_free(bio, fs_bio_set);
 }
 
 inline void bio_init(struct bio *bio)
@@ -150,7 +155,7 @@ inline void bio_init(struct bio *bio)
  *   allocate bio and iovecs from the memory pools specified by the
  *   bio_set structure.
  **/
-struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, struct bio_set *bs)
+struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
 	struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
 
@@ -171,16 +176,19 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru
 			bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
 		}
 		bio->bi_io_vec = bvl;
-		bio->bi_destructor = bio_destructor;
-		bio->bi_set = bs;
 	}
 out:
 	return bio;
 }
 
-struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs)
+struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 {
-	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+
+	if (bio)
+		bio->bi_destructor = bio_fs_destructor;
+
+	return bio;
 }
 
 void zero_fill_bio(struct bio *bio)
@@ -248,17 +256,13 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
 {
 	request_queue_t *q = bdev_get_queue(bio_src->bi_bdev);
 
-	memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec));
+	memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
+		bio_src->bi_max_vecs * sizeof(struct bio_vec));
 
 	bio->bi_sector = bio_src->bi_sector;
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio->bi_flags |= 1 << BIO_CLONED;
 	bio->bi_rw = bio_src->bi_rw;
-
-	/*
-	 * notes -- maybe just leave bi_idx alone. assume identical mapping
-	 * for the clone
-	 */
 	bio->bi_vcnt = bio_src->bi_vcnt;
 	bio->bi_size = bio_src->bi_size;
 	bio->bi_idx = bio_src->bi_idx;
@@ -273,12 +277,14 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
  *
  * 	Like __bio_clone, only also allocates the returned bio
  */
-struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask)
+struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 {
 	struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
 
-	if (b)
+	if (b) {
+		b->bi_destructor = bio_fs_destructor;
 		__bio_clone(b, bio);
+	}
 
 	return b;
 }
@@ -550,22 +556,34 @@ out_bmd:
 	return ERR_PTR(ret);
 }
 
-static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
-				  unsigned long uaddr, unsigned int len,
-				  int write_to_vm)
+static struct bio *__bio_map_user_iov(request_queue_t *q,
+				      struct block_device *bdev,
+				      struct sg_iovec *iov, int iov_count,
+				      int write_to_vm)
 {
-	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = uaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
-	int ret, offset, i;
+	int i, j;
+	int nr_pages = 0;
 	struct page **pages;
 	struct bio *bio;
+	int cur_page = 0;
+	int ret, offset;
 
-	/*
-	 * transfer and buffer must be aligned to at least hardsector
-	 * size for now, in the future we can relax this restriction
-	 */
-	if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
+	for (i = 0; i < iov_count; i++) {
+		unsigned long uaddr = (unsigned long)iov[i].iov_base;
+		unsigned long len = iov[i].iov_len;
+		unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		unsigned long start = uaddr >> PAGE_SHIFT;
+
+		nr_pages += end - start;
+		/*
+		 * transfer and buffer must be aligned to at least hardsector
+		 * size for now, in the future we can relax this restriction
+		 */
+		if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
+			return ERR_PTR(-EINVAL);
+	}
+
+	if (!nr_pages)
 		return ERR_PTR(-EINVAL);
 
 	bio = bio_alloc(GFP_KERNEL, nr_pages);
@@ -577,42 +595,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 	if (!pages)
 		goto out;
 
-	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm, uaddr, nr_pages,
-						write_to_vm, 0, pages, NULL);
-	up_read(&current->mm->mmap_sem);
-
-	if (ret < nr_pages)
-		goto out;
-
-	bio->bi_bdev = bdev;
-
-	offset = uaddr & ~PAGE_MASK;
-	for (i = 0; i < nr_pages; i++) {
-		unsigned int bytes = PAGE_SIZE - offset;
-
-		if (len <= 0)
-			break;
-
-		if (bytes > len)
-			bytes = len;
+	memset(pages, 0, nr_pages * sizeof(struct page *));
+
+	for (i = 0; i < iov_count; i++) {
+		unsigned long uaddr = (unsigned long)iov[i].iov_base;
+		unsigned long len = iov[i].iov_len;
+		unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		unsigned long start = uaddr >> PAGE_SHIFT;
+		const int local_nr_pages = end - start;
+		const int page_limit = cur_page + local_nr_pages;
+		
+		down_read(&current->mm->mmap_sem);
+		ret = get_user_pages(current, current->mm, uaddr,
+				     local_nr_pages,
+				     write_to_vm, 0, &pages[cur_page], NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (ret < local_nr_pages)
+			goto out_unmap;
+
+
+		offset = uaddr & ~PAGE_MASK;
+		for (j = cur_page; j < page_limit; j++) {
+			unsigned int bytes = PAGE_SIZE - offset;
+
+			if (len <= 0)
+				break;
+			
+			if (bytes > len)
+				bytes = len;
+
+			/*
+			 * sorry...
+			 */
+			if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes)
+				break;
+
+			len -= bytes;
+			offset = 0;
+		}
 
+		cur_page = j;
 		/*
-		 * sorry...
+		 * release the pages we didn't map into the bio, if any
 		 */
-		if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes)
-			break;
-
-		len -= bytes;
-		offset = 0;
+		while (j < page_limit)
+			page_cache_release(pages[j++]);
 	}
 
-	/*
-	 * release the pages we didn't map into the bio, if any
-	 */
-	while (i < nr_pages)
-		page_cache_release(pages[i++]);
-
 	kfree(pages);
 
 	/*
@@ -621,9 +651,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 	if (!write_to_vm)
 		bio->bi_rw |= (1 << BIO_RW);
 
+	bio->bi_bdev = bdev;
 	bio->bi_flags |= (1 << BIO_USER_MAPPED);
 	return bio;
-out:
+
+ out_unmap:
+	for (i = 0; i < nr_pages; i++) {
+		if(!pages[i])
+			break;
+		page_cache_release(pages[i]);
+	}
+ out:
 	kfree(pages);
 	bio_put(bio);
 	return ERR_PTR(ret);
@@ -643,9 +681,33 @@ out:
 struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
 			 unsigned long uaddr, unsigned int len, int write_to_vm)
 {
+	struct sg_iovec iov;
+
+	iov.iov_base = (void __user *)uaddr;
+	iov.iov_len = len;
+
+	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+}
+
+/**
+ *	bio_map_user_iov - map user sg_iovec table into bio
+ *	@q: the request_queue_t for the bio
+ *	@bdev: destination block device
+ *	@iov:	the iovec.
+ *	@iov_count: number of elements in the iovec
+ *	@write_to_vm: bool indicating writing to pages or not
+ *
+ *	Map the user space address into a bio suitable for io to a block
+ *	device. Returns an error pointer in case of error.
+ */
+struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
+			     struct sg_iovec *iov, int iov_count,
+			     int write_to_vm)
+{
 	struct bio *bio;
+	int len = 0, i;
 
-	bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm);
+	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
 
 	if (IS_ERR(bio))
 		return bio;
@@ -658,6 +720,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
 	 */
 	bio_get(bio);
 
+	for (i = 0; i < iov_count; i++)
+		len += iov[i].iov_len;
+
 	if (bio->bi_size == len)
 		return bio;
 
@@ -702,6 +767,82 @@ void bio_unmap_user(struct bio *bio)
 	bio_put(bio);
 }
 
+static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
+{
+	if (bio->bi_size)
+		return 1;
+
+	bio_put(bio);
+	return 0;
+}
+
+
+static struct bio *__bio_map_kern(request_queue_t *q, void *data,
+				  unsigned int len, gfp_t gfp_mask)
+{
+	unsigned long kaddr = (unsigned long)data;
+	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned long start = kaddr >> PAGE_SHIFT;
+	const int nr_pages = end - start;
+	int offset, i;
+	struct bio *bio;
+
+	bio = bio_alloc(gfp_mask, nr_pages);
+	if (!bio)
+		return ERR_PTR(-ENOMEM);
+
+	offset = offset_in_page(kaddr);
+	for (i = 0; i < nr_pages; i++) {
+		unsigned int bytes = PAGE_SIZE - offset;
+
+		if (len <= 0)
+			break;
+
+		if (bytes > len)
+			bytes = len;
+
+		if (__bio_add_page(q, bio, virt_to_page(data), bytes,
+				   offset) < bytes)
+			break;
+
+		data += bytes;
+		len -= bytes;
+		offset = 0;
+	}
+
+	bio->bi_end_io = bio_map_kern_endio;
+	return bio;
+}
+
+/**
+ *	bio_map_kern	-	map kernel address into bio
+ *	@q: the request_queue_t for the bio
+ *	@data: pointer to buffer to map
+ *	@len: length in bytes
+ *	@gfp_mask: allocation flags for bio allocation
+ *
+ *	Map the kernel address into a bio suitable for io to a block
+ *	device. Returns an error pointer in case of error.
+ */
+struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len,
+			 gfp_t gfp_mask)
+{
+	struct bio *bio;
+
+	bio = __bio_map_kern(q, data, len, gfp_mask);
+	if (IS_ERR(bio))
+		return bio;
+
+	if (bio->bi_size == len)
+		return bio;
+
+	/*
+	 * Don't support partial mappings.
+	 */
+	bio_put(bio);
+	return ERR_PTR(-EINVAL);
+}
+
 /*
  * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
  * for performing direct-IO in BIOs.
@@ -937,7 +1078,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	return bp;
 }
 
-static void *bio_pair_alloc(unsigned int __nocast gfp_flags, void *data)
+static void *bio_pair_alloc(gfp_t gfp_flags, void *data)
 {
 	return kmalloc(sizeof(struct bio_pair), gfp_flags);
 }
@@ -1079,6 +1220,7 @@ subsys_initcall(init_bio);
 
 EXPORT_SYMBOL(bio_alloc);
 EXPORT_SYMBOL(bio_put);
+EXPORT_SYMBOL(bio_free);
 EXPORT_SYMBOL(bio_endio);
 EXPORT_SYMBOL(bio_init);
 EXPORT_SYMBOL(__bio_clone);
@@ -1089,6 +1231,7 @@ EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
 EXPORT_SYMBOL(bio_map_user);
 EXPORT_SYMBOL(bio_unmap_user);
+EXPORT_SYMBOL(bio_map_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
 EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 6a25d7df89b..35fa34977e8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -40,6 +40,7 @@
 #include <linux/cpu.h>
 #include <linux/bitops.h>
 #include <linux/mpage.h>
+#include <linux/bit_spinlock.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static void invalidate_bh_lrus(void);
@@ -95,7 +96,7 @@ static void
 __clear_page_buffers(struct page *page)
 {
 	ClearPagePrivate(page);
-	page->private = 0;
+	set_page_private(page, 0);
 	page_cache_release(page);
 }
 
@@ -501,7 +502,7 @@ static void free_more_memory(void)
 	yield();
 
 	for_each_pgdat(pgdat) {
-		zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones;
+		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
 			try_to_free_pages(zones, GFP_NOFS);
 	}
@@ -917,8 +918,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				 * contents - it is a noop if I/O is still in
 				 * flight on potentially older contents.
 				 */
-				wait_on_buffer(bh);
-				ll_rw_block(WRITE, 1, &bh);
+				ll_rw_block(SWRITE, 1, &bh);
 				brelse(bh);
 				spin_lock(lock);
 			}
@@ -1478,8 +1478,10 @@ EXPORT_SYMBOL(__getblk);
 void __breadahead(struct block_device *bdev, sector_t block, int size)
 {
 	struct buffer_head *bh = __getblk(bdev, block, size);
-	ll_rw_block(READA, 1, &bh);
-	brelse(bh);
+	if (likely(bh)) {
+		ll_rw_block(READA, 1, &bh);
+		brelse(bh);
+	}
 }
 EXPORT_SYMBOL(__breadahead);
 
@@ -1497,7 +1499,7 @@ __bread(struct block_device *bdev, sector_t block, int size)
 {
 	struct buffer_head *bh = __getblk(bdev, block, size);
 
-	if (!buffer_uptodate(bh))
+	if (likely(bh) && !buffer_uptodate(bh))
 		bh = __bread_slow(bh);
 	return bh;
 }
@@ -1571,7 +1573,7 @@ static inline void discard_buffer(struct buffer_head * bh)
  *
  * NOTE: @gfp_mask may go away, and this function may become non-blocking.
  */
-int try_to_release_page(struct page *page, int gfp_mask)
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
 {
 	struct address_space * const mapping = page->mapping;
 
@@ -1637,6 +1639,15 @@ out:
 }
 EXPORT_SYMBOL(block_invalidatepage);
 
+int do_invalidatepage(struct page *page, unsigned long offset)
+{
+	int (*invalidatepage)(struct page *, unsigned long);
+	invalidatepage = page->mapping->a_ops->invalidatepage;
+	if (invalidatepage == NULL)
+		invalidatepage = block_invalidatepage;
+	return (*invalidatepage)(page, offset);
+}
+
 /*
  * We attach and possibly dirty the buffers atomically wrt
  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
@@ -2696,7 +2707,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 		 * they may have been added in ext3_writepage().  Make them
 		 * freeable here, so the page does not leak.
 		 */
-		block_invalidatepage(page, 0);
+		do_invalidatepage(page, 0);
 		unlock_page(page);
 		return 0; /* don't care */
 	}
@@ -2793,21 +2804,22 @@ int submit_bh(int rw, struct buffer_head * bh)
 
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
  * @nr: number of &struct buffer_heads in the array
  * @bhs: array of pointers to &struct buffer_head
  *
- * ll_rw_block() takes an array of pointers to &struct buffer_heads,
- * and requests an I/O operation on them, either a %READ or a %WRITE.
- * The third %READA option is described in the documentation for
- * generic_make_request() which ll_rw_block() calls.
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
+ * requests an I/O operation on them, either a %READ or a %WRITE.  The third
+ * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
+ * are sent to disk. The fourth %READA option is described in the documentation
+ * for generic_make_request() which ll_rw_block() calls.
  *
  * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit), any buffer that appears to be clean when doing a
- * write request, and any buffer that appears to be up-to-date when doing
- * read request.  Further it marks as clean buffers that are processed for
- * writing (the buffer cache won't assume that they are actually clean until
- * the buffer gets unlocked).
+ * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
+ * clean when doing a write request, and any buffer that appears to be
+ * up-to-date when doing read request.  Further it marks as clean buffers that
+ * are processed for writing (the buffer cache won't assume that they are
+ * actually clean until the buffer gets unlocked).
  *
  * ll_rw_block sets b_end_io to simple completion handler that marks
  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2823,11 +2835,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
 
-		if (test_set_buffer_locked(bh))
+		if (rw == SWRITE)
+			lock_buffer(bh);
+		else if (test_set_buffer_locked(bh))
 			continue;
 
 		get_bh(bh);
-		if (rw == WRITE) {
+		if (rw == WRITE || rw == SWRITE) {
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
 				submit_bh(WRITE, bh);
@@ -3042,14 +3056,13 @@ static void recalc_bh_state(void)
 	buffer_heads_over_limit = (tot > max_buffer_heads);
 }
 	
-struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags)
+struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
 {
 	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
 	if (ret) {
-		preempt_disable();
-		__get_cpu_var(bh_accounting).nr++;
+		get_cpu_var(bh_accounting).nr++;
 		recalc_bh_state();
-		preempt_enable();
+		put_cpu_var(bh_accounting);
 	}
 	return ret;
 }
@@ -3059,10 +3072,9 @@ void free_buffer_head(struct buffer_head *bh)
 {
 	BUG_ON(!list_empty(&bh->b_assoc_buffers));
 	kmem_cache_free(bh_cachep, bh);
-	preempt_disable();
-	__get_cpu_var(bh_accounting).nr--;
+	get_cpu_var(bh_accounting).nr--;
 	recalc_bh_state();
-	preempt_enable();
+	put_cpu_var(bh_accounting);
 }
 EXPORT_SYMBOL(free_buffer_head);
 
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
index 72fdc10dfdd..8848e4dfa02 100644
--- a/fs/cifs/AUTHORS
+++ b/fs/cifs/AUTHORS
@@ -32,6 +32,10 @@ Domen Puncer
 Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
 Vince Negri and Dave Stahl (for finding an important caching bug)
 Adrian Bunk (kcalloc cleanups)
+Miklos Szeredi 
+Kazeon team for various fixes especially for 2.4 version.
+Asser Ferno (Change Notify support)
+Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup
 
 Test case and Bug Report contributors
 -------------------------------------
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index dab4774ee7b..5bab24f5905 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,53 @@
+Version 1.39
+------------
+Defer close of a file handle slightly if pending writes depend on that file handle
+(this reduces the EBADF bad file handle errors that can be logged under heavy
+stress on writes).
+
+Version 1.38
+------------
+Fix tcp socket retransmission timeouts (e.g. on ENOSPACE from the socket)
+to be smaller at first (but increasing) so large write performance performance
+over GigE is better.  Do not hang thread on illegal byte range lock response
+from Windows (Windows can send an RFC1001 size which does not match smb size) by
+allowing an SMBs TCP length to be up to a few bytes longer than it should be.
+wsize and rsize can now be larger than negotiated buffer size if server
+supports large readx/writex, even when directio mount flag not specified.
+Write size will in many cases now be 16K instead of 4K which greatly helps
+file copy performance on lightly loaded networks.  Fix oops in dnotify
+when experimental config flag enabled. Make cifsFYI more granular.
+
+Version 1.37
+------------
+Fix readdir caching when unlink removes file in current search buffer,
+and this is followed by a rewind search to just before the deleted entry.
+Do not attempt to set ctime unless atime and/or mtime change requested
+(most servers throw it away anyway). Fix length check of received smbs
+to be more accurate. Fix big endian problem with mapchars mount option,
+and with a field returned by statfs.
+
+Version 1.36
+------------
+Add support for mounting to older pre-CIFS servers such as Windows9x and ME.
+For these older servers, add option for passing netbios name of server in
+on mount (servernetbiosname).  Add suspend support for power management, to
+avoid cifsd thread preventing software suspend from working.
+Add mount option for disabling the default behavior of sending byte range lock
+requests to the server (necessary for certain applications which break with
+mandatory lock behavior such as Evolution), and also mount option for
+requesting case insensitive matching for path based requests (requesting
+case sensitive is the default).
+
+Version 1.35
+------------
+Add writepage performance improvements.  Fix path name conversions
+for long filenames on mounts which were done with "mapchars" mount option
+specified.  Ensure multiplex ids do not collide.  Fix case in which 
+rmmod can oops if done soon after last unmount.  Fix truncated
+search (readdir) output when resume filename was a long filename.
+Fix filename conversion when mapchars mount option was specified and
+filename was a long filename.
+
 Version 1.34
 ------------
 Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
@@ -5,7 +55,7 @@ Do not oops if root user kills cifs oplock kernel thread or
 kills the cifsd thread (NB: killing the cifs kernel threads is not
 recommended, unmount and rmmod cifs will kill them when they are
 no longer needed).  Fix readdir to ASCII servers (ie older servers
-which do not support Unicode) and also require asterik.
+which do not support Unicode) and also require asterisk.
 Fix out of memory case in which data could be written one page
 off in the page cache.
 
@@ -95,7 +145,7 @@ improperly zeroed buffer in CIFS Unix extensions set times call.
 
 Version 1.25
 ------------
-Fix internationlization problem in cifs readdir with filenames that map to 
+Fix internationalization problem in cifs readdir with filenames that map to 
 longer UTF8 strings than the string on the wire was in Unicode.  Add workaround
 for readdir to netapp servers. Fix search rewind (seek into readdir to return 
 non-consecutive entries).  Do not do readdir when server negotiates 
@@ -270,7 +320,7 @@ Fix caching problem when files opened by multiple clients in which
 page cache could contain stale data, and write through did
 not occur often enough while file was still open when read ahead
 (read oplock) not allowed.  Treat "sep=" when first mount option
-as an overrride of comma as the default separator between mount
+as an override of comma as the default separator between mount
 options. 
 
 Version 1.01
@@ -280,7 +330,7 @@ Allow passwords longer than 16 bytes. Allow null password string.
 Version 1.00
 ------------
 Gracefully clean up failed mounts when attempting to mount to servers such as
-Windows 98 that terminate tcp sessions during prototocol negotiation.  Handle
+Windows 98 that terminate tcp sessions during protocol negotiation.  Handle
 embedded commas in mount parsing of passwords.
 
 Version 0.99
@@ -289,7 +339,7 @@ Invalidate local inode cached pages on oplock break and when last file
 instance is closed so that the client does not continue using stale local
 copy rather than later modified server copy of file.  Do not reconnect
 when server drops the tcp session prematurely before negotiate
-protocol response.  Fix oops in roepen_file when dentry freed.  Allow
+protocol response.  Fix oops in reopen_file when dentry freed.  Allow
 the support for CIFS Unix Extensions to be disabled via proc interface.
 
 Version 0.98
@@ -631,7 +681,7 @@ versions of 2.4 kernel (now builds and works again on kernels at least as early
 Version 0.41
 ------------
 Various minor fixes for Connectathon Posix "basic" file i/o test suite.  Directory caching fixed so hardlinked
-files now return the correct rumber of links on fstat as they are repeatedly linked and unlinked.
+files now return the correct number of links on fstat as they are repeatedly linked and unlinked.
 
 Version 0.40
 ------------
@@ -698,7 +748,7 @@ session)
 and cleaned them up and made them more consistent with other cifs functions. 
 
 7) Server support for Unix extensions is now fully detected and FindFirst is implemented both ways 
-(with or without Unix exentions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
+(with or without Unix extensions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
 nor is the symlink support using the Unix extensions
 
 8) Started adding the readlink and follow_link code 
diff --git a/fs/cifs/README b/fs/cifs/README
index 34b0cf7111f..bb90941826a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -294,8 +294,10 @@ A partial list of the supported mount options follows:
 		during the local client kernel build will be used.
 		If server does not support Unicode, this parameter is
 		unused.
-  rsize		default read size
-  wsize		default write size
+  rsize		default read size (usually 16K)
+  wsize		default write size (usually 16K, 32K is often better over GigE)
+		maximum wsize currently allowed by CIFS is 57344 (14 4096 byte
+		pages)
   rw		mount the network share read-write (note that the
 		server may still consider the share read-only)
   ro		mount network share read-only
@@ -407,6 +409,13 @@ A partial list of the supported mount options follows:
 		This has no effect if the server does not support
 		Unicode on the wire.
  nomapchars     Do not translate any of these seven characters (default).
+ nocase         Request case insensitive path name matching (case
+		sensitive is the default if the server suports it).
+ nobrl          Do not send byte range lock requests to the server.
+		This is necessary for certain applications that break
+		with cifs style mandatory byte range locks (and most
+		cifs servers do not yet support requesting advisory
+		byte range locks).
  remount        remount the share (often used to change from ro to rw mounts
 	        or vice versa)
 		
@@ -473,9 +482,16 @@ These experimental features and tracing can be enabled by changing flags in
 kernel, e.g.  insmod cifs).  To enable a feature set it to 1 e.g.  to enable 
 tracing to the kernel message log type: 
 
-	echo 1 > /proc/fs/cifs/cifsFYI
+	echo 7 > /proc/fs/cifs/cifsFYI
 	
-and for more extensive tracing including the start of smb requests and responses
+cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
+logging of various informational messages.  2 enables logging of non-zero
+SMB return codes while 4 enables logging of requests that take longer
+than one second to complete (except for byte range lock requests). 
+Setting it to 4 requires defining CONFIG_CIFS_STATS2 manually in the
+source code (typically by setting it in the beginning of cifsglob.h),
+and setting it to seven enables all three.  Finally, tracing
+the start of smb requests and responses can be enabled via:
 
 	echo 1 > /proc/fs/cifs/traceSMB
 
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 8cc881694e2..c909298d11e 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -1,4 +1,4 @@
-version 1.34 April 29, 2005
+version 1.37 October 9, 2005
 
 A Partial List of Missing Features
 ==================================
@@ -7,14 +7,14 @@ Contributions are welcome.  There are plenty of opportunities
 for visible, important contributions to this module.  Here
 is a partial list of the known problems and missing features:
 
-a) Support for SecurityDescriptors for chmod/chgrp/chown so
-these can be supported for Windows servers
+a) Support for SecurityDescriptors(Windows/CIFS ACLs) for chmod/chgrp/chown
+so that these operations can be supported to Windows servers
 
-b) Better pam/winbind integration (e.g. to handle uid mapping
-better)
+b) Mapping POSIX ACLs (and eventually NFSv4 ACLs) to CIFS
+SecurityDescriptors
 
-c) multi-user mounts - multiplexed sessionsetups over single vc
-(ie tcp session) - more testing needed
+c) Better pam/winbind integration (e.g. to handle uid mapping
+better)
 
 d) Kerberos/SPNEGO session setup support - (started)
 
@@ -29,12 +29,17 @@ f) Directory entry caching relies on a 1 second timer, rather than
 using FindNotify or equivalent.  - (started)
 
 g) A few byte range testcases fail due to POSIX vs. Windows/CIFS
-style byte range lock differences
+style byte range lock differences.  Save byte range locks so
+reconnect can replay them.  
 
-h) quota support
+h) Support unlock all (unlock 0,MAX_OFFSET)
+by unlocking all known byte range locks that we locked on the file.
 
-j) finish writepages support (multi-page write behind for improved
-performance) and syncpage
+i) quota support (needs minor kernel change since quota calls
+to make it to network filesystems or deviceless filesystems)
+
+j) investigate sync behavior (including syncpage) and check  
+for proper behavior of intr/nointr
 
 k) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
 extra copy in/out of the socket buffers in some cases.
@@ -57,20 +62,18 @@ p) Add support for storing symlink and fifo info to Windows servers
 in the Extended Attribute format their SFU clients would recognize.
 
 q) Finish fcntl D_NOTIFY support so kde and gnome file list windows
-will autorefresh (started)
+will autorefresh (partially complete by Asser). Needs minor kernel
+vfs change to support removing D_NOTIFY on a file.   
 
 r) Add GUI tool to configure /proc/fs/cifs settings and for display of
 the CIFS statistics (started)
 
-q) implement support for security and trusted categories of xattrs
+s) implement support for security and trusted categories of xattrs
 (requires minor protocol extension) to enable better support for SELINUX
 
-r) Implement O_DIRECT flag on open (already supported on mount)
-
-s) Allow remapping of last remaining character (\) to +0xF000 which
-(this character is valid for POSIX but not for Windows)
+t) Implement O_DIRECT flag on open (already supported on mount)
 
-t) Create UID mapping facility so server UIDs can be mapped on a per
+u) Create UID mapping facility so server UIDs can be mapped on a per
 mount or a per server basis to client UIDs or nobody if no mapping
 exists.  This is helpful when Unix extensions are negotiated to
 allow better permission checking when UIDs differ on the server
@@ -78,6 +81,17 @@ and client.  Add new protocol request to the CIFS protocol
 standard for asking the server for the corresponding name of a
 particular uid.
 
+v) Add support for CIFS Unix and also the newer POSIX extensions to the
+server side for Samba 4.
+
+w) Finish up the dos time conversion routines needed to return old server
+time to the client (default time, of now or time 0 is used now for these 
+very old servers)
+
+x) Add support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers)
+
+y) Finish testing of Windows 9x/Windows ME server support (started).
+
 KNOWN BUGS (updated April 29, 2005)
 ====================================
 See http://bugzilla.samba.org - search on product "CifsVFS" for
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index e02010dd73e..98539e2afe8 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -191,7 +191,8 @@ asn1_header_decode(struct asn1_ctx *ctx,
 		   unsigned char **eoc,
 		   unsigned int *cls, unsigned int *con, unsigned int *tag)
 {
-	unsigned int def, len;
+	unsigned int def = 0; 
+	unsigned int len = 0;
 
 	if (!asn1_id_decode(ctx, cls, con, tag))
 		return 0;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4061e43471c..22a444a3fe4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -81,6 +81,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 	buf += length;
 	length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION);
 	buf += length;
+	length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid);
+	buf += length;
 	length = sprintf(buf, "Servers:");
 	buf += length;
 
@@ -97,7 +99,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		} else {
 			length =
 			    sprintf(buf,
-				    "\n%d) Name: %s  Domain: %s Mounts: %d ServerOS: %s  \n\tServerNOS: %s\tCapabilities: 0x%x\n\tSMB session status: %d\t",
+				    "\n%d) Name: %s  Domain: %s Mounts: %d OS: %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t",
 				i, ses->serverName, ses->serverDomain,
 				atomic_read(&ses->inUse),
 				ses->serverOS, ses->serverNOS,
@@ -105,12 +107,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 			buf += length;
 		}
 		if(ses->server) {
-			buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req Active: %d",
+			buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
 				ses->server->tcpStatus,
 				atomic_read(&ses->server->socketUseCount),
 				ses->server->secMode,
 				atomic_read(&ses->server->inFlight));
-			
+
+#ifdef CONFIG_CIFS_STATS2
+			buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
+				atomic_read(&ses->server->inSend), 
+				atomic_read(&ses->server->num_waiters));
+#endif
+
 			length = sprintf(buf, "\nMIDs:\n");
 			buf += length;
 
@@ -149,7 +157,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
 		length =
 		    sprintf(buf,
-			    "\n%d) %s Uses: %d Type: %s Characteristics: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d",
+			    "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d",
 			    i, tcon->treeName,
 			    atomic_read(&tcon->useCount),
 			    tcon->nativeFileSystem,
@@ -195,6 +203,49 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 }
 
 #ifdef CONFIG_CIFS_STATS
+
+static int
+cifs_stats_write(struct file *file, const char __user *buffer,
+               unsigned long count, void *data)
+{
+        char c;
+        int rc;
+	struct list_head *tmp;
+	struct cifsTconInfo *tcon;
+
+        rc = get_user(c, buffer);
+        if (rc)
+                return rc;
+
+        if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+		read_lock(&GlobalSMBSeslock);
+		list_for_each(tmp, &GlobalTreeConnectionList) {
+			tcon = list_entry(tmp, struct cifsTconInfo,
+					cifsConnectionList);
+			atomic_set(&tcon->num_smbs_sent, 0);
+			atomic_set(&tcon->num_writes, 0);
+			atomic_set(&tcon->num_reads, 0);
+			atomic_set(&tcon->num_oplock_brks, 0);
+			atomic_set(&tcon->num_opens, 0);
+			atomic_set(&tcon->num_closes, 0);
+			atomic_set(&tcon->num_deletes, 0);
+			atomic_set(&tcon->num_mkdirs, 0);
+			atomic_set(&tcon->num_rmdirs, 0);
+			atomic_set(&tcon->num_renames, 0);
+			atomic_set(&tcon->num_t2renames, 0);
+			atomic_set(&tcon->num_ffirst, 0);
+			atomic_set(&tcon->num_fnext, 0);
+			atomic_set(&tcon->num_fclose, 0);
+			atomic_set(&tcon->num_hardlinks, 0);
+			atomic_set(&tcon->num_symlinks, 0);
+			atomic_set(&tcon->num_locks, 0);
+		}
+		read_unlock(&GlobalSMBSeslock);
+	}
+
+        return count;
+}
+
 static int
 cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 		  int count, int *eof, void *data)
@@ -254,35 +305,51 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 			buf += sprintf(buf, "\tDISCONNECTED ");
 			length += 14;
 		}
-		item_length = sprintf(buf,"\nSMBs: %d Oplock Breaks: %d",
+		item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d",
 			atomic_read(&tcon->num_smbs_sent),
 			atomic_read(&tcon->num_oplock_brks));
 		buf += item_length;
 		length += item_length;
-		item_length = sprintf(buf,"\nReads: %d Bytes %lld",
+		item_length = sprintf(buf, "\nReads:  %d Bytes: %lld",
 			atomic_read(&tcon->num_reads),
 			(long long)(tcon->bytes_read));
 		buf += item_length;
 		length += item_length;
-		item_length = sprintf(buf,"\nWrites: %d Bytes: %lld",
+		item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
 			atomic_read(&tcon->num_writes),
 			(long long)(tcon->bytes_written));
+                buf += item_length;
+                length += item_length;
+                item_length = sprintf(buf, 
+			"\nLocks: %d HardLinks: %d Symlinks: %d",
+                        atomic_read(&tcon->num_locks),
+			atomic_read(&tcon->num_hardlinks),
+			atomic_read(&tcon->num_symlinks));
+                buf += item_length;
+                length += item_length;
+
+		item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
+			atomic_read(&tcon->num_opens),
+			atomic_read(&tcon->num_closes),
+			atomic_read(&tcon->num_deletes));
 		buf += item_length;
 		length += item_length;
-		item_length = sprintf(buf,
-			"\nOpens: %d Deletes: %d\nMkdirs: %d Rmdirs: %d",
-			atomic_read(&tcon->num_opens),
-			atomic_read(&tcon->num_deletes),
+		item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d",
 			atomic_read(&tcon->num_mkdirs),
 			atomic_read(&tcon->num_rmdirs));
 		buf += item_length;
 		length += item_length;
-		item_length = sprintf(buf,
-			"\nRenames: %d T2 Renames %d",
+		item_length = sprintf(buf, "\nRenames: %d T2 Renames %d",
 			atomic_read(&tcon->num_renames),
 			atomic_read(&tcon->num_t2renames));
 		buf += item_length;
 		length += item_length;
+		item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d",
+			atomic_read(&tcon->num_ffirst),
+			atomic_read(&tcon->num_fnext),
+			atomic_read(&tcon->num_fclose));
+		buf += item_length;
+		length += item_length;
 	}
 	read_unlock(&GlobalSMBSeslock);
 
@@ -341,8 +408,10 @@ cifs_proc_init(void)
 				cifs_debug_data_read, NULL);
 
 #ifdef CONFIG_CIFS_STATS
-	create_proc_read_entry("Stats", 0, proc_fs_cifs,
+	pde = create_proc_read_entry("Stats", 0, proc_fs_cifs,
 				cifs_stats_read, NULL);
+	if (pde)
+		pde->write_proc = cifs_stats_write;
 #endif
 	pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs,
 				cifsFYI_read, NULL);
@@ -360,7 +429,7 @@ cifs_proc_init(void)
 	if (pde)
 		pde->write_proc = oplockEnabled_write;
 
-	pde = create_proc_read_entry("ReenableOldCifsReaddirCode", 0, proc_fs_cifs,
+	pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
 				quotaEnabled_read, NULL);
 	if (pde)
 		pde->write_proc = quotaEnabled_write;
@@ -419,7 +488,7 @@ cifs_proc_clean(void)
 	remove_proc_entry("ExtendedSecurity",proc_fs_cifs);
 	remove_proc_entry("PacketSigningEnabled",proc_fs_cifs);
 	remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
-	remove_proc_entry("ReenableOldCifsReaddirCode",proc_fs_cifs);
+	remove_proc_entry("Experimental",proc_fs_cifs);
 	remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
 	remove_proc_entry("cifs", proc_root_fs);
 }
@@ -459,6 +528,8 @@ cifsFYI_write(struct file *file, const char __user *buffer,
 		cifsFYI = 0;
 	else if (c == '1' || c == 'y' || c == 'Y')
 		cifsFYI = 1;
+	else if((c > '1') && (c <= '9'))
+		cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
 
 	return count;
 }
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index bf24d2828f6..4304d9dcfb6 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -26,6 +26,9 @@
 void cifs_dump_mem(char *label, void *data, int length);
 extern int traceSMB;		/* flag which enables the function below */
 void dump_smb(struct smb_hdr *, int);
+#define CIFS_INFO	0x01
+#define CIFS_RC  	0x02
+#define CIFS_TIMER	0x04
 
 /*
  *	debug ON
@@ -36,7 +39,7 @@ void dump_smb(struct smb_hdr *, int);
 
 /* information message: e.g., configuration, major event */
 extern int cifsFYI;
-#define cifsfyi(format,arg...) if (cifsFYI) printk(KERN_DEBUG " " __FILE__ ": " format "\n" "" , ## arg)
+#define cifsfyi(format,arg...) if (cifsFYI & CIFS_INFO) printk(KERN_DEBUG " " __FILE__ ": " format "\n" "" , ## arg)
 
 #define cFYI(button,prspec) if (button) cifsfyi prspec
 
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ec00d61d530..f799f6f0e72 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -24,6 +24,9 @@
 #define CIFS_MOUNT_DIRECT_IO    8 /* do not write nor read through page cache */
 #define CIFS_MOUNT_NO_XATTR  0x10 /* if set - disable xattr support */
 #define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
+#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible. */
+#define CIFS_MOUNT_UNX_EMUL    0x80 /* Network compat with SFUnix emulation */
+#define CIFS_MOUNT_NO_BRL	0x100 /* No sending byte range locks to srv */
 
 struct cifs_sb_info {
 	struct cifsTconInfo *tcon;	/* primary mount */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8cc23e7d0d5..877095a1192 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -59,6 +59,8 @@ unsigned int ntlmv2_support = 0;
 unsigned int sign_CIFS_PDUs = 1;
 extern struct task_struct * oplockThread; /* remove sparse warning */
 struct task_struct * oplockThread = NULL;
+extern struct task_struct * dnotifyThread; /* remove sparse warning */
+struct task_struct * dnotifyThread = NULL;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
 module_param(CIFSMaxBufSize, int, 0);
 MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048");
@@ -73,6 +75,7 @@ module_param(cifs_max_pending, int, 0);
 MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
 
 static DECLARE_COMPLETION(cifs_oplock_exited);
+static DECLARE_COMPLETION(cifs_dnotify_exited);
 
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
@@ -202,6 +205,10 @@ cifs_statfs(struct super_block *sb, struct kstatfs *buf)
 #endif /* CIFS_EXPERIMENTAL */
 	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
+	/* Old Windows servers do not support level 103, retry with level 
+	   one if old server failed the previous call */ 
+	if(rc)
+		rc = SMBOldQFSInfo(xid, pTcon, buf);
 	/*     
 	   int f_type;
 	   __fsid_t f_fsid;
@@ -253,7 +260,7 @@ cifs_alloc_inode(struct super_block *sb)
 	cifs_inode->clientCanCacheAll = FALSE;
 	cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE;
 	cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
-
+	cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;
 	INIT_LIST_HEAD(&cifs_inode->openFileList);
 	return &cifs_inode->vfs_inode;
 }
@@ -398,6 +405,34 @@ static struct quotactl_ops cifs_quotactl_ops = {
 };
 #endif
 
+static void cifs_umount_begin(struct super_block * sblock)
+{
+	struct cifs_sb_info *cifs_sb;
+	struct cifsTconInfo * tcon;
+
+	cifs_sb = CIFS_SB(sblock);
+	if(cifs_sb == NULL)
+		return;
+
+	tcon = cifs_sb->tcon;
+	if(tcon == NULL)
+		return;
+	down(&tcon->tconSem);
+	if (atomic_read(&tcon->useCount) == 1)
+		tcon->tidStatus = CifsExiting;
+	up(&tcon->tconSem);
+
+	if(tcon->ses && tcon->ses->server)
+	{
+		cERROR(1,("wake up tasks now - umount begin not complete"));
+		wake_up_all(&tcon->ses->server->request_q);
+	}
+/* BB FIXME - finish add checks for tidStatus BB */
+
+	return;
+}
+	
+
 static int cifs_remount(struct super_block *sb, int *flags, char *data)
 {
 	*flags |= MS_NODIRATIME;
@@ -415,7 +450,7 @@ struct super_operations cifs_super_ops = {
    unless later we add lazy close of inodes or unless the kernel forgets to call
    us with the same number of releases (closes) as opens */
 	.show_options = cifs_show_options,
-/*    .umount_begin   = cifs_umount_begin, *//* consider adding in the future */
+/*	.umount_begin   = cifs_umount_begin, */ /* BB finish in the future */
 	.remount_fs = cifs_remount,
 };
 
@@ -781,9 +816,9 @@ static int cifs_oplock_thread(void * dummyarg)
 
 	oplockThread = current;
 	do {
-		set_current_state(TASK_INTERRUPTIBLE);
+		if (try_to_freeze()) 
+			continue;
 		
-		schedule_timeout(1*HZ);  
 		spin_lock(&GlobalMid_Lock);
 		if(list_empty(&GlobalOplock_Q)) {
 			spin_unlock(&GlobalMid_Lock);
@@ -832,10 +867,27 @@ static int cifs_oplock_thread(void * dummyarg)
 				}
 			} else
 				spin_unlock(&GlobalMid_Lock);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(1);  /* yield in case q were corrupt */
 		}
 	} while(!signal_pending(current));
-	complete_and_exit (&cifs_oplock_exited, 0);
 	oplockThread = NULL;
+	complete_and_exit (&cifs_oplock_exited, 0);
+}
+
+static int cifs_dnotify_thread(void * dummyarg)
+{
+	daemonize("cifsdnotifyd");
+	allow_signal(SIGTERM);
+
+	dnotifyThread = current;
+	do {
+		if(try_to_freeze())
+			continue;
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(39*HZ);
+	} while(!signal_pending(current));
+	complete_and_exit (&cifs_dnotify_exited, 0);
 }
 
 static int __init
@@ -849,6 +901,10 @@ init_cifs(void)
 	INIT_LIST_HEAD(&GlobalSMBSessionList);
 	INIT_LIST_HEAD(&GlobalTreeConnectionList);
 	INIT_LIST_HEAD(&GlobalOplock_Q);
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+	INIT_LIST_HEAD(&GlobalDnotifyReqList);
+	INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
+#endif	
 /*
  *  Initialize Global counters
  */
@@ -884,10 +940,16 @@ init_cifs(void)
 				if (!rc) {                
 					rc = (int)kernel_thread(cifs_oplock_thread, NULL, 
 						CLONE_FS | CLONE_FILES | CLONE_VM);
-					if(rc > 0)
-						return 0;
-					else 
+					if(rc > 0) {
+						rc = (int)kernel_thread(cifs_dnotify_thread, NULL,
+							CLONE_FS | CLONE_FILES | CLONE_VM);
+						if(rc > 0)
+							return 0;
+						else
+							cERROR(1,("error %d create dnotify thread", rc));
+					} else {
 						cERROR(1,("error %d create oplock thread",rc));
+					}
 				}
 				cifs_destroy_request_bufs();
 			}
@@ -916,6 +978,10 @@ exit_cifs(void)
 		send_sig(SIGTERM, oplockThread, 1);
 		wait_for_completion(&cifs_oplock_exited);
 	}
+	if(dnotifyThread) {
+		send_sig(SIGTERM, dnotifyThread, 1);
+		wait_for_completion(&cifs_dnotify_exited);
+	}
 }
 
 MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 78af5850c55..1223fa81dbd 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -81,10 +81,11 @@ extern int cifs_dir_notify(struct file *, unsigned long arg);
 
 /* Functions related to dir entries */
 extern struct dentry_operations cifs_dentry_ops;
+extern struct dentry_operations cifs_ci_dentry_ops;
 
 /* Functions related to symlinks */
-extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd);
+extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
+extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *);
 extern int cifs_readlink(struct dentry *direntry, char __user *buffer, 
 			 int buflen);
 extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
@@ -96,5 +97,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.35"
+#define CIFS_VERSION   "1.39"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 81babab265e..1ba08f8c5bc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -110,8 +110,9 @@ enum protocolEnum {
  */
 
 struct TCP_Server_Info {
-	char server_Name[SERVER_NAME_LEN_WITH_NULL];	/* 15 chars + X'20'in 16th */
-	char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2];	/* Unicode version of server_Name */
+	/* 15 character server name + 0x20 16th byte indicating type = srv */
+	char server_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
+	char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2];
 	struct socket *ssocket;
 	union {
 		struct sockaddr_in sockAddr;
@@ -122,13 +123,17 @@ struct TCP_Server_Info {
 	struct list_head pending_mid_q;
 	void *Server_NlsInfo;	/* BB - placeholder for future NLS info  */
 	unsigned short server_codepage;	/* codepage for the server    */
-	unsigned long ip_address;	/* IP addr for the server if known     */
+	unsigned long ip_address;	/* IP addr for the server if known */
 	enum protocolEnum protocolType;	
 	char versionMajor;
 	char versionMinor;
 	unsigned svlocal:1;	/* local server or remote */
 	atomic_t socketUseCount; /* number of open cifs sessions on socket */
 	atomic_t inFlight;  /* number of requests on the wire to server */
+#ifdef CONFIG_CIFS_STATS2
+	atomic_t inSend; /* requests trying to send */
+	atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
+#endif
 	enum statusEnum tcpStatus; /* what we think the status is */
 	struct semaphore tcpSem;
 	struct task_struct *tsk;
@@ -147,8 +152,10 @@ struct TCP_Server_Info {
 	/* (returned on Negotiate */
 	int capabilities; /* allow selective disabling of caps by smb sess */
 	__u16 timeZone;
+	__u16 CurrentMid;         /* multiplex id - rotating counter */
 	char cryptKey[CIFS_CRYPTO_KEY_SIZE];
-	char workstation_RFC1001_name[16]; /* 16th byte is always zero */
+	/* 16th byte of RFC1001 workstation name is always null */
+	char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
 	__u32 sequence_number; /* needed for CIFS PDU signature */
 	char mac_signing_key[CIFS_SESSION_KEY_SIZE + 16]; 
 };
@@ -214,19 +221,41 @@ struct cifsTconInfo {
 	atomic_t num_reads;
 	atomic_t num_oplock_brks;
 	atomic_t num_opens;
+	atomic_t num_closes;
 	atomic_t num_deletes;
 	atomic_t num_mkdirs;
 	atomic_t num_rmdirs;
 	atomic_t num_renames;
 	atomic_t num_t2renames;
+	atomic_t num_ffirst;
+	atomic_t num_fnext;
+	atomic_t num_fclose;
+	atomic_t num_hardlinks;
+	atomic_t num_symlinks;
+	atomic_t num_locks;
+#ifdef CONFIG_CIFS_STATS2
+	unsigned long long time_writes;
+	unsigned long long time_reads;
+	unsigned long long time_opens;
+	unsigned long long time_deletes;
+	unsigned long long time_closes;
+	unsigned long long time_mkdirs;
+	unsigned long long time_rmdirs;
+	unsigned long long time_renames;
+	unsigned long long time_t2renames;
+	unsigned long long time_ffirst;
+	unsigned long long time_fnext;
+	unsigned long long time_fclose;
+#endif /* CONFIG_CIFS_STATS2 */
 	__u64    bytes_read;
 	__u64    bytes_written;
 	spinlock_t stat_lock;
-#endif
+#endif /* CONFIG_CIFS_STATS */
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo;	/* ok if file system name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
 	unsigned retry:1;
+	unsigned nocase:1;
 	/* BB add field for back pointer to sb struct? */
 };
 
@@ -270,6 +299,7 @@ struct cifsFileInfo {
 	struct inode * pInode; /* needed for oplock break */
 	unsigned closePend:1;	/* file is marked to close */
 	unsigned invalidHandle:1;  /* file closed via session abend */
+	atomic_t wrtPending;   /* handle in use - defer close */
 	struct semaphore fh_sem; /* prevents reopen race after dead ses*/
 	char * search_resume_name; /* BB removeme BB */
 	unsigned int resume_name_length; /* BB removeme - field renamed and moved BB */
@@ -306,6 +336,41 @@ CIFS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
+{
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+		return '/';
+	else
+		return '\\';
+}
+
+#ifdef CONFIG_CIFS_STATS
+#define cifs_stats_inc atomic_inc
+
+static inline void cifs_stats_bytes_written(struct cifsTconInfo *tcon,
+					    unsigned int bytes)
+{
+	if (bytes) {
+		spin_lock(&tcon->stat_lock);
+		tcon->bytes_written += bytes;
+		spin_unlock(&tcon->stat_lock);
+	}
+}
+
+static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon,
+					 unsigned int bytes)
+{
+	spin_lock(&tcon->stat_lock);
+	tcon->bytes_read += bytes;
+	spin_unlock(&tcon->stat_lock);
+}
+#else
+
+#define  cifs_stats_inc(field) do {} while(0)
+#define  cifs_stats_bytes_written(tcon, bytes) do {} while(0)
+#define  cifs_stats_bytes_read(tcon, bytes) do {} while(0)
+
+#endif
 
 /* one of these for every pending CIFS request to the server */
 struct mid_q_entry {
@@ -313,7 +378,11 @@ struct mid_q_entry {
 	__u16 mid;		/* multiplex id */
 	__u16 pid;		/* process id */
 	__u32 sequence_number;  /* for CIFS signing */
-	struct timeval when_sent;	/* time when smb sent */
+	unsigned long when_alloc;  /* when mid was created */
+#ifdef CONFIG_CIFS_STATS2
+	unsigned long when_sent; /* time when smb send finished */
+	unsigned long when_received; /* when demux complete (taken off wire) */
+#endif
 	struct cifsSesInfo *ses;	/* smb was sent to this server */
 	struct task_struct *tsk;	/* task waiting for response */
 	struct smb_hdr *resp_buf;	/* response buffer */
@@ -331,6 +400,20 @@ struct oplock_q_entry {
 	__u16 netfid;
 };
 
+/* for pending dnotify requests */
+struct dir_notify_req {
+       struct list_head lhead;
+       __le16 Pid;
+       __le16 PidHigh;
+       __u16 Mid;
+       __u16 Tid;
+       __u16 Uid;
+       __u16 netfid;
+       __u32 filter; /* CompletionFilter (for multishot) */
+       int multishot;
+       struct file * pfile;
+};
+
 #define   MID_FREE 0
 #define   MID_REQUEST_ALLOCATED 1
 #define   MID_REQUEST_SUBMITTED 2
@@ -399,6 +482,9 @@ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;  /* protects list inserts on 3 above */
 
 GLOBAL_EXTERN struct list_head GlobalOplock_Q;
 
+GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */
+GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; /* Dir notify response queue */
+
 /*
  * Global transaction id (XID) information
  */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index aede6a81316..48a05b9df7e 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -36,9 +36,11 @@
 #define SMB_COM_CLOSE                 0x04 /* triv req/rsp, timestamp ignored */
 #define SMB_COM_DELETE                0x06 /* trivial response */
 #define SMB_COM_RENAME                0x07 /* trivial response */
+#define SMB_COM_QUERY_INFORMATION     0x08 /* aka getattr */
 #define SMB_COM_SETATTR               0x09 /* trivial response */
 #define SMB_COM_LOCKING_ANDX          0x24 /* trivial response */
 #define SMB_COM_COPY                  0x29 /* trivial rsp, fail filename ignrd*/
+#define SMB_COM_OPEN_ANDX             0x2D /* Legacy open for old servers */
 #define SMB_COM_READ_ANDX             0x2E
 #define SMB_COM_WRITE_ANDX            0x2F
 #define SMB_COM_TRANSACTION2          0x32
@@ -52,6 +54,7 @@
 #define SMB_COM_NT_TRANSACT           0xA0
 #define SMB_COM_NT_TRANSACT_SECONDARY 0xA1
 #define SMB_COM_NT_CREATE_ANDX        0xA2
+#define SMB_COM_NT_CANCEL             0xA4 /* no response */
 #define SMB_COM_NT_RENAME             0xA5 /* trivial response */
 
 /* Transact2 subcommand codes */
@@ -59,6 +62,7 @@
 #define TRANS2_FIND_FIRST             0x01
 #define TRANS2_FIND_NEXT              0x02
 #define TRANS2_QUERY_FS_INFORMATION   0x03
+#define TRANS2_SET_FS_INFORMATION     0x04
 #define TRANS2_QUERY_PATH_INFORMATION 0x05
 #define TRANS2_SET_PATH_INFORMATION   0x06
 #define TRANS2_QUERY_FILE_INFORMATION 0x07
@@ -76,7 +80,7 @@
 #define NT_TRANSACT_GET_USER_QUOTA    0x07
 #define NT_TRANSACT_SET_USER_QUOTA    0x08
 
-#define MAX_CIFS_HDR_SIZE 256	/* chained NTCreateXReadX will probably be biggest */
+#define MAX_CIFS_HDR_SIZE 256	/* is future chained NTCreateXReadX bigger? */
 
 /* internal cifs vfs structures */
 /*****************************************************************
@@ -129,10 +133,11 @@
 /*
  * SMB flag definitions 
  */
-#define SMBFLG_EXTD_LOCK 0x01	/* server supports lock-read write-unlock primitives */
+#define SMBFLG_EXTD_LOCK 0x01	/* server supports lock-read write-unlock smb */
 #define SMBFLG_RCV_POSTED 0x02	/* obsolete */
 #define SMBFLG_RSVD 0x04
-#define SMBFLG_CASELESS 0x08	/* all pathnames treated as caseless (off implies case sensitive file handling requested) */
+#define SMBFLG_CASELESS 0x08	/* all pathnames treated as caseless (off
+				implies case sensitive file handling request) */
 #define SMBFLG_CANONICAL_PATH_FORMAT 0x10	/* obsolete */
 #define SMBFLG_OLD_OPLOCK 0x20	/* obsolete */
 #define SMBFLG_OLD_OPLOCK_NOTIFY 0x40	/* obsolete */
@@ -141,7 +146,8 @@
 /*
  * SMB flag2 definitions 
  */
-#define SMBFLG2_KNOWS_LONG_NAMES cpu_to_le16(1)	/* can send long (non-8.3) path names in response */
+#define SMBFLG2_KNOWS_LONG_NAMES cpu_to_le16(1)	/* can send long (non-8.3) 
+						   path names in response */
 #define SMBFLG2_KNOWS_EAS cpu_to_le16(2)
 #define SMBFLG2_SECURITY_SIGNATURE cpu_to_le16(4)
 #define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40)
@@ -160,32 +166,32 @@
  * file and can have any suitable combination of the following values:
  */
 
-#define FILE_READ_DATA        0x00000001	/* Data can be read from the file   */
-#define FILE_WRITE_DATA       0x00000002	/* Data can be written to the file  */
-#define FILE_APPEND_DATA      0x00000004	/* Data can be appended to the file */
-#define FILE_READ_EA          0x00000008	/* Extended attributes associated   */
-					 /* with the file can be read        */
-#define FILE_WRITE_EA         0x00000010	/* Extended attributes associated   */
-					 /* with the file can be written     */
-#define FILE_EXECUTE          0x00000020	/*Data can be read into memory from */
-					 /* the file using system paging I/O */
+#define FILE_READ_DATA        0x00000001  /* Data can be read from the file   */
+#define FILE_WRITE_DATA       0x00000002  /* Data can be written to the file  */
+#define FILE_APPEND_DATA      0x00000004  /* Data can be appended to the file */
+#define FILE_READ_EA          0x00000008  /* Extended attributes associated   */
+					  /* with the file can be read        */
+#define FILE_WRITE_EA         0x00000010  /* Extended attributes associated   */
+					  /* with the file can be written     */
+#define FILE_EXECUTE          0x00000020  /*Data can be read into memory from */
+					  /* the file using system paging I/O */
 #define FILE_DELETE_CHILD     0x00000040
-#define FILE_READ_ATTRIBUTES  0x00000080	/* Attributes associated with the   */
-					 /* file can be read                 */
-#define FILE_WRITE_ATTRIBUTES 0x00000100	/* Attributes associated with the   */
-					 /* file can be written              */
-#define DELETE                0x00010000	/* The file can be deleted          */
-#define READ_CONTROL          0x00020000	/* The access control list and      */
-					 /* ownership associated with the    */
-					 /* file can be read                 */
-#define WRITE_DAC             0x00040000	/* The access control list and      */
-					 /* ownership associated with the    */
-					 /* file can be written.             */
-#define WRITE_OWNER           0x00080000	/* Ownership information associated */
-					 /* with the file can be written     */
-#define SYNCHRONIZE           0x00100000	/* The file handle can waited on to */
-					 /* synchronize with the completion  */
-					 /* of an input/output request       */
+#define FILE_READ_ATTRIBUTES  0x00000080  /* Attributes associated with the   */
+					  /* file can be read                 */
+#define FILE_WRITE_ATTRIBUTES 0x00000100  /* Attributes associated with the   */
+					  /* file can be written              */
+#define DELETE                0x00010000  /* The file can be deleted          */
+#define READ_CONTROL          0x00020000  /* The access control list and      */
+					  /* ownership associated with the    */
+					  /* file can be read                 */
+#define WRITE_DAC             0x00040000  /* The access control list and      */
+					  /* ownership associated with the    */
+					  /* file can be written.             */
+#define WRITE_OWNER           0x00080000  /* Ownership information associated */
+					  /* with the file can be written     */
+#define SYNCHRONIZE           0x00100000  /* The file handle can waited on to */
+					  /* synchronize with the completion  */
+					  /* of an input/output request       */
 #define GENERIC_ALL           0x10000000
 #define GENERIC_EXECUTE       0x20000000
 #define GENERIC_WRITE         0x40000000
@@ -193,7 +199,7 @@
 					 /* In summary - Relevant file       */
 					 /* access flags from CIFS are       */
 					 /* file_read_data, file_write_data  */
-					 /* file_execute, file_read_attributes */
+					 /* file_execute, file_read_attributes*/
 					 /* write_dac, and delete.           */
 
 /*
@@ -238,7 +244,8 @@
 #define ATTR_SPARSE    0x0200
 #define ATTR_REPARSE   0x0400
 #define ATTR_COMPRESSED 0x0800
-#define ATTR_OFFLINE    0x1000	/* ie file not immediately available - offline storage */
+#define ATTR_OFFLINE    0x1000	/* ie file not immediately available - 
+					on offline storage */
 #define ATTR_NOT_CONTENT_INDEXED 0x2000
 #define ATTR_ENCRYPTED  0x4000
 #define ATTR_POSIX_SEMANTICS 0x01000000
@@ -267,10 +274,18 @@
 /* CreateOptions */
 #define CREATE_NOT_FILE		0x00000001	/* if set must not be file */
 #define CREATE_WRITE_THROUGH	0x00000002
-#define CREATE_NOT_DIR		0x00000040	/* if set must not be directory */
+#define CREATE_SEQUENTIAL       0x00000004
+#define CREATE_SYNC_ALERT       0x00000010
+#define CREATE_ASYNC_ALERT      0x00000020
+#define CREATE_NOT_DIR		0x00000040    /* if set must not be directory */
+#define CREATE_NO_EA_KNOWLEDGE  0x00000200
+#define CREATE_EIGHT_DOT_THREE  0x00000400
 #define CREATE_RANDOM_ACCESS	0x00000800
 #define CREATE_DELETE_ON_CLOSE	0x00001000
+#define CREATE_OPEN_BY_ID       0x00002000
 #define OPEN_REPARSE_POINT	0x00200000
+#define CREATE_OPTIONS_MASK     0x007FFFFF 
+#define CREATE_OPTION_SPECIAL   0x20000000   /* system. NB not sent over wire */
 
 /* ImpersonationLevel flags */
 #define SECURITY_ANONYMOUS      0
@@ -297,10 +312,10 @@
 #define GETU16(var)  (*((__u16 *)var))	/* BB check for endian issues */
 #define GETU32(var)  (*((__u32 *)var))	/* BB check for endian issues */
 
-#pragma pack(1)
-
 struct smb_hdr {
-	__u32 smb_buf_length;	/* big endian on wire *//* BB length is only two or three bytes - with one or two byte type preceding it but that is always zero - we could mask the type byte off just in case BB */
+	__u32 smb_buf_length;	/* big endian on wire *//* BB length is only two
+		or three bytes - with one or two byte type preceding it that are
+		zero - we could mask the type byte off just in case BB */
 	__u8 Protocol[4];
 	__u8 Command;
 	union {
@@ -308,9 +323,9 @@ struct smb_hdr {
 			__u8 ErrorClass;
 			__u8 Reserved;
 			__le16 Error;
-		} DosError;
+		} __attribute__((packed)) DosError;
 		__le32 CifsError;
-	} Status;
+	} __attribute__((packed)) Status;
 	__u8 Flags;
 	__le16 Flags2;		/* note: le */
 	__le16 PidHigh;
@@ -318,16 +333,16 @@ struct smb_hdr {
 		struct {
 			__le32 SequenceNumber;  /* le */
 			__u32 Reserved; /* zero */
-		} Sequence;
+		} __attribute__((packed)) Sequence;
 		__u8 SecuritySignature[8];	/* le */
-	} Signature;
+	} __attribute__((packed)) Signature;
 	__u8 pad[2];
 	__u16 Tid;
 	__le16 Pid;
 	__u16 Uid;
 	__u16 Mid;
 	__u8 WordCount;
-};
+} __attribute__((packed));
 /* given a pointer to an smb_hdr retrieve the value of byte count */
 #define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
 #define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
@@ -379,7 +394,7 @@ typedef struct negotiate_req {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__le16 ByteCount;
 	unsigned char DialectsArray[1];
-} NEGOTIATE_REQ;
+} __attribute__((packed)) NEGOTIATE_REQ;
 
 typedef struct negotiate_rsp {
 	struct smb_hdr hdr;	/* wct = 17 */
@@ -397,16 +412,16 @@ typedef struct negotiate_rsp {
 	__u8 EncryptionKeyLength;
 	__u16 ByteCount;
 	union {
-		unsigned char EncryptionKey[1];	/* if cap extended security is off */
+		unsigned char EncryptionKey[1];	/* cap extended security off */
 		/* followed by Domain name - if extended security is off */
 		/* followed by 16 bytes of server GUID */
-		/* followed by security blob if cap_extended_security negotiated */
+		/* then security blob if cap_extended_security negotiated */
 		struct {
 			unsigned char GUID[16];
 			unsigned char SecurityBlob[1];
-		} extended_response;
-	} u;
-} NEGOTIATE_RSP;
+		} __attribute__((packed)) extended_response;
+	} __attribute__((packed)) u;
+} __attribute__((packed)) NEGOTIATE_RSP;
 
 /* SecurityMode bits */
 #define SECMODE_USER          0x01	/* off indicates share level security */
@@ -452,7 +467,8 @@ typedef union smb_com_session_setup_andx {
 		unsigned char SecurityBlob[1];	/* followed by */
 		/* STRING NativeOS */
 		/* STRING NativeLanMan */
-	} req;			/* NTLM request format (with extended security */
+	} __attribute__((packed)) req;	/* NTLM request format (with 
+					extended security */
 
 	struct {		/* request format */
 		struct smb_hdr hdr;	/* wct = 13 */
@@ -463,18 +479,19 @@ typedef union smb_com_session_setup_andx {
 		__le16 MaxMpxCount;
 		__le16 VcNumber;
 		__u32 SessionKey;
-		__le16 CaseInsensitivePasswordLength;	/* ASCII password length */
-		__le16 CaseSensitivePasswordLength;	/* Unicode password length */
+		__le16 CaseInsensitivePasswordLength; /* ASCII password len */
+		__le16 CaseSensitivePasswordLength; /* Unicode password length*/
 		__u32 Reserved;	/* see below */
 		__le32 Capabilities;
 		__le16 ByteCount;
-		unsigned char CaseInsensitivePassword[1];	/* followed by: */
+		unsigned char CaseInsensitivePassword[1];     /* followed by: */
 		/* unsigned char * CaseSensitivePassword; */
 		/* STRING AccountName */
 		/* STRING PrimaryDomain */
 		/* STRING NativeOS */
 		/* STRING NativeLanMan */
-	} req_no_secext;	/* NTLM request format (without extended security */
+	} __attribute__((packed)) req_no_secext; /* NTLM request format (without
+							extended security */
 
 	struct {		/* default (NTLM) response format */
 		struct smb_hdr hdr;	/* wct = 4 */
@@ -488,7 +505,7 @@ typedef union smb_com_session_setup_andx {
 /*      unsigned char  * NativeOS;      */
 /*	unsigned char  * NativeLanMan;  */
 /*      unsigned char  * PrimaryDomain; */
-	} resp;			/* NTLM response format (with or without extended security */
+	} __attribute__((packed)) resp;			/* NTLM response format (with or without extended security */
 
 	struct {		/* request format */
 		struct smb_hdr hdr;	/* wct = 10 */
@@ -507,7 +524,7 @@ typedef union smb_com_session_setup_andx {
 		/* STRING PrimaryDomain */
 		/* STRING NativeOS */
 		/* STRING NativeLanMan */
-	} old_req;		/* pre-NTLM (LANMAN2.1) request format */
+	} __attribute__((packed)) old_req;		/* pre-NTLM (LANMAN2.1) request format */
 
 	struct {		/* default (NTLM) response format */
 		struct smb_hdr hdr;	/* wct = 3 */
@@ -519,8 +536,8 @@ typedef union smb_com_session_setup_andx {
 		unsigned char NativeOS[1];	/* followed by */
 /*	unsigned char * NativeLanMan; */
 /*      unsigned char * PrimaryDomain; */
-	} old_resp;		/* pre-NTLM (LANMAN2.1) response format */
-} SESSION_SETUP_ANDX;
+	} __attribute__((packed)) old_resp;		/* pre-NTLM (LANMAN2.1) response format */
+} __attribute__((packed)) SESSION_SETUP_ANDX;
 
 #define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
 
@@ -530,7 +547,8 @@ typedef union smb_com_session_setup_andx {
 #define CAP_NT_SMBS            0x00000010
 #define CAP_STATUS32           0x00000040
 #define CAP_LEVEL_II_OPLOCKS   0x00000080
-#define CAP_NT_FIND            0x00000200	/* reserved should be zero (presumably because NT_SMBs implies the same thing) */
+#define CAP_NT_FIND            0x00000200	/* reserved should be zero 
+				(because NT_SMBs implies the same thing?) */
 #define CAP_BULK_TRANSFER      0x20000000
 #define CAP_EXTENDED_SECURITY  0x80000000
 
@@ -548,7 +566,7 @@ typedef struct smb_com_tconx_req {
 	unsigned char Password[1];	/* followed by */
 /* STRING Path    *//* \\server\share name */
 	/* STRING Service */
-} TCONX_REQ;
+} __attribute__((packed)) TCONX_REQ;
 
 typedef struct smb_com_tconx_rsp {
 	struct smb_hdr hdr;	/* wct = 3 *//* note that Win2000 has sent wct=7 in some cases on responses. Four unspecified words followed OptionalSupport */
@@ -559,13 +577,14 @@ typedef struct smb_com_tconx_rsp {
 	__u16 ByteCount;
 	unsigned char Service[1];	/* always ASCII, not Unicode */
 	/* STRING NativeFileSystem */
-} TCONX_RSP;
+} __attribute__((packed)) TCONX_RSP;
 
 /* tree connect Flags */
 #define DISCONNECT_TID          0x0001
 #define TCON_EXTENDED_SECINFO   0x0008
 /* OptionalSupport bits */
-#define SMB_SUPPORT_SEARCH_BITS 0x0001	/* must have bits (exclusive searches suppt. */
+#define SMB_SUPPORT_SEARCH_BITS 0x0001	/* "must have" directory search bits
+					 (exclusive searches supported) */
 #define SMB_SHARE_IS_IN_DFS     0x0002
 
 typedef struct smb_com_logoff_andx_req {
@@ -574,7 +593,7 @@ typedef struct smb_com_logoff_andx_req {
 	__u8 AndXReserved;
 	__u16 AndXOffset;
 	__u16 ByteCount;
-} LOGOFF_ANDX_REQ;
+} __attribute__((packed)) LOGOFF_ANDX_REQ;
 
 typedef struct smb_com_logoff_andx_rsp {
 	struct smb_hdr hdr;	/* wct = 2 */
@@ -582,38 +601,39 @@ typedef struct smb_com_logoff_andx_rsp {
 	__u8 AndXReserved;
 	__u16 AndXOffset;
 	__u16 ByteCount;
-} LOGOFF_ANDX_RSP;
+} __attribute__((packed)) LOGOFF_ANDX_RSP;
 
 typedef union smb_com_tree_disconnect {	/* as an altetnative can use flag on tree_connect PDU to effect disconnect *//* probably the simplest SMB PDU */
 	struct {
 		struct smb_hdr hdr;	/* wct = 0 */
 		__u16 ByteCount;	/* bcc = 0 */
-	} req;
+	} __attribute__((packed)) req;
 	struct {
 		struct smb_hdr hdr;	/* wct = 0 */
 		__u16 ByteCount;	/* bcc = 0 */
-	} resp;
-} TREE_DISCONNECT;
+	} __attribute__((packed)) resp;
+} __attribute__((packed)) TREE_DISCONNECT;
 
 typedef struct smb_com_close_req {
 	struct smb_hdr hdr;	/* wct = 3 */
 	__u16 FileID;
 	__u32 LastWriteTime;	/* should be zero */
 	__u16 ByteCount;	/* 0 */
-} CLOSE_REQ;
+} __attribute__((packed)) CLOSE_REQ;
 
 typedef struct smb_com_close_rsp {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__u16 ByteCount;	/* bct = 0 */
-} CLOSE_RSP;
+} __attribute__((packed)) CLOSE_RSP;
 
 typedef struct smb_com_findclose_req {
 	struct smb_hdr hdr; /* wct = 1 */
 	__u16 FileID;
 	__u16 ByteCount;    /* 0 */
-} FINDCLOSE_REQ;
+} __attribute__((packed)) FINDCLOSE_REQ;
 
 /* OpenFlags */
+#define REQ_MORE_INFO      0x00000001  /* legacy (OPEN_AND_X) only */
 #define REQ_OPLOCK         0x00000002
 #define REQ_BATCHOPLOCK    0x00000004
 #define REQ_OPENDIRONLY    0x00000008
@@ -637,7 +657,7 @@ typedef struct smb_com_open_req {	/* also handles create */
 	__u8 SecurityFlags;
 	__le16 ByteCount;
 	char fileName[1];
-} OPEN_REQ;
+} __attribute__((packed)) OPEN_REQ;
 
 /* open response: oplock levels */
 #define OPLOCK_NONE  	 0
@@ -667,7 +687,63 @@ typedef struct smb_com_open_rsp {
 	__le16 DeviceState;
 	__u8 DirectoryFlag;
 	__u16 ByteCount;	/* bct = 0 */
-} OPEN_RSP;
+} __attribute__((packed)) OPEN_RSP;
+
+/* format of legacy open request */
+typedef struct smb_com_openx_req {
+	struct smb_hdr	hdr;	/* wct = 15 */
+	__u8 AndXCommand;
+	__u8 AndXReserved;
+	__le16 AndXOffset;
+	__le16 OpenFlags;
+	__le16 Mode;
+	__le16 Sattr; /* search attributes */
+	__le16 FileAttributes;  /* dos attrs */
+	__le32 CreateTime; /* os2 format */
+	__le16 OpenFunction;
+	__le32 EndOfFile;
+	__le32 Timeout;
+	__le32 Reserved;
+	__le16  ByteCount;  /* file name follows */
+	char   fileName[1];
+} __attribute__((packed)) OPENX_REQ;
+
+typedef struct smb_com_openx_rsp {
+	struct smb_hdr	hdr;	/* wct = 15 */
+	__u8 AndXCommand;
+	__u8 AndXReserved;
+	__le16 AndXOffset;
+	__u16  Fid;
+	__le16 FileAttributes;
+	__le32 LastWriteTime; /* os2 format */
+	__le32 EndOfFile;
+	__le16 Access;
+	__le16 FileType;
+	__le16 IPCState;
+	__le16 Action;
+	__u32  FileId;
+	__u16  Reserved;
+	__u16  ByteCount;
+} __attribute__((packed)) OPENX_RSP; 
+
+/* Legacy write request for older servers */
+typedef struct smb_com_writex_req {
+        struct smb_hdr hdr;     /* wct = 12 */
+        __u8 AndXCommand;
+        __u8 AndXReserved;
+        __le16 AndXOffset;
+        __u16 Fid;
+        __le32 OffsetLow;
+        __u32 Reserved; /* Timeout */
+        __le16 WriteMode; /* 1 = write through */
+        __le16 Remaining;
+        __le16 Reserved2;
+        __le16 DataLengthLow;
+        __le16 DataOffset;
+        __le16 ByteCount;
+        __u8 Pad;               /* BB check for whether padded to DWORD boundary and optimum performance here */
+        char Data[0];
+} __attribute__((packed)) WRITEX_REQ;
 
 typedef struct smb_com_write_req {
 	struct smb_hdr hdr;	/* wct = 14 */
@@ -686,7 +762,7 @@ typedef struct smb_com_write_req {
 	__le16 ByteCount;
 	__u8 Pad;		/* BB check for whether padded to DWORD boundary and optimum performance here */
 	char Data[0];
-} WRITE_REQ;
+} __attribute__((packed)) WRITE_REQ;
 
 typedef struct smb_com_write_rsp {
 	struct smb_hdr hdr;	/* wct = 6 */
@@ -698,7 +774,22 @@ typedef struct smb_com_write_rsp {
 	__le16 CountHigh;
 	__u16  Reserved;
 	__u16 ByteCount;
-} WRITE_RSP;
+} __attribute__((packed)) WRITE_RSP;
+
+/* legacy read request for older servers */
+typedef struct smb_com_readx_req {
+        struct smb_hdr hdr;     /* wct = 10 */
+        __u8 AndXCommand;
+        __u8 AndXReserved;
+        __le16 AndXOffset;
+        __u16 Fid;
+        __le32 OffsetLow;
+        __le16 MaxCount;
+        __le16 MinCount;                /* obsolete */
+        __le32 Reserved;
+        __le16 Remaining;
+        __le16 ByteCount;
+} __attribute__((packed)) READX_REQ;
 
 typedef struct smb_com_read_req {
 	struct smb_hdr hdr;	/* wct = 12 */
@@ -713,7 +804,7 @@ typedef struct smb_com_read_req {
 	__le16 Remaining;
 	__le32 OffsetHigh;
 	__le16 ByteCount;
-} READ_REQ;
+} __attribute__((packed)) READ_REQ;
 
 typedef struct smb_com_read_rsp {
 	struct smb_hdr hdr;	/* wct = 12 */
@@ -730,7 +821,7 @@ typedef struct smb_com_read_rsp {
 	__u16 ByteCount;
 	__u8 Pad;		/* BB check for whether padded to DWORD boundary and optimum performance here */
 	char Data[1];
-} READ_RSP;
+} __attribute__((packed)) READ_RSP;
 
 typedef struct locking_andx_range {
 	__le16 Pid;
@@ -739,7 +830,7 @@ typedef struct locking_andx_range {
 	__le32 OffsetLow;
 	__le32 LengthHigh;
 	__le32 LengthLow;
-} LOCKING_ANDX_RANGE;
+} __attribute__((packed)) LOCKING_ANDX_RANGE;
 
 #define LOCKING_ANDX_SHARED_LOCK     0x01
 #define LOCKING_ANDX_OPLOCK_RELEASE  0x02
@@ -760,7 +851,7 @@ typedef struct smb_com_lock_req {
 	__le16 NumberOfLocks;
 	__le16 ByteCount;
 	LOCKING_ANDX_RANGE Locks[1];
-} LOCK_REQ;
+} __attribute__((packed)) LOCK_REQ;
 
 
 typedef struct cifs_posix_lock {
@@ -770,7 +861,7 @@ typedef struct cifs_posix_lock {
 	__le64	start;
 	__le64	length;
 	/* BB what about additional owner info to identify network client */
-} CIFS_POSIX_LOCK;
+} __attribute__((packed)) CIFS_POSIX_LOCK;
 
 typedef struct smb_com_lock_rsp {
 	struct smb_hdr hdr;	/* wct = 2 */
@@ -778,7 +869,7 @@ typedef struct smb_com_lock_rsp {
 	__u8 AndXReserved;
 	__le16 AndXOffset;
 	__u16 ByteCount;
-} LOCK_RSP;
+} __attribute__((packed)) LOCK_RSP;
 
 typedef struct smb_com_rename_req {
 	struct smb_hdr hdr;	/* wct = 1 */
@@ -788,7 +879,7 @@ typedef struct smb_com_rename_req {
 	unsigned char OldFileName[1];
 	/* followed by __u8 BufferFormat2 */
 	/* followed by NewFileName */
-} RENAME_REQ;
+} __attribute__((packed)) RENAME_REQ;
 
 	/* copy request flags */
 #define COPY_MUST_BE_FILE      0x0001
@@ -808,7 +899,7 @@ typedef struct smb_com_copy_req {
 	unsigned char OldFileName[1];
 	/* followed by __u8 BufferFormat2 */
 	/* followed by NewFileName string */
-} COPY_REQ;
+} __attribute__((packed)) COPY_REQ;
 
 typedef struct smb_com_copy_rsp {
 	struct smb_hdr hdr;     /* wct = 1 */
@@ -816,7 +907,7 @@ typedef struct smb_com_copy_rsp {
 	__u16 ByteCount;    /* may be zero */
 	__u8 BufferFormat;  /* 0x04 - only present if errored file follows */
 	unsigned char ErrorFileName[1]; /* only present if error in copy */
-} COPY_RSP;
+} __attribute__((packed)) COPY_RSP;
 
 #define CREATE_HARD_LINK		0x103
 #define MOVEFILE_COPY_ALLOWED		0x0002
@@ -832,12 +923,12 @@ typedef struct smb_com_nt_rename_req {	/* A5 - also used for create hardlink */
 	unsigned char OldFileName[1];
 	/* followed by __u8 BufferFormat2 */
 	/* followed by NewFileName */
-} NT_RENAME_REQ;
+} __attribute__((packed)) NT_RENAME_REQ;
 
 typedef struct smb_com_rename_rsp {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__u16 ByteCount;	/* bct = 0 */
-} RENAME_RSP;
+} __attribute__((packed)) RENAME_RSP;
 
 typedef struct smb_com_delete_file_req {
 	struct smb_hdr hdr;	/* wct = 1 */
@@ -845,36 +936,52 @@ typedef struct smb_com_delete_file_req {
 	__le16 ByteCount;
 	__u8 BufferFormat;	/* 4 = ASCII */
 	unsigned char fileName[1];
-} DELETE_FILE_REQ;
+} __attribute__((packed)) DELETE_FILE_REQ;
 
 typedef struct smb_com_delete_file_rsp {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__u16 ByteCount;	/* bct = 0 */
-} DELETE_FILE_RSP;
+} __attribute__((packed)) DELETE_FILE_RSP;
 
 typedef struct smb_com_delete_directory_req {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__le16 ByteCount;
 	__u8 BufferFormat;	/* 4 = ASCII */
 	unsigned char DirName[1];
-} DELETE_DIRECTORY_REQ;
+} __attribute__((packed)) DELETE_DIRECTORY_REQ;
 
 typedef struct smb_com_delete_directory_rsp {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__u16 ByteCount;	/* bct = 0 */
-} DELETE_DIRECTORY_RSP;
+} __attribute__((packed)) DELETE_DIRECTORY_RSP;
 
 typedef struct smb_com_create_directory_req {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__le16 ByteCount;
 	__u8 BufferFormat;	/* 4 = ASCII */
 	unsigned char DirName[1];
-} CREATE_DIRECTORY_REQ;
+} __attribute__((packed)) CREATE_DIRECTORY_REQ;
 
 typedef struct smb_com_create_directory_rsp {
 	struct smb_hdr hdr;	/* wct = 0 */
 	__u16 ByteCount;	/* bct = 0 */
-} CREATE_DIRECTORY_RSP;
+} __attribute__((packed)) CREATE_DIRECTORY_RSP;
+
+typedef struct smb_com_query_information_req {
+	struct smb_hdr hdr;     /* wct = 0 */
+	__le16 ByteCount;	/* 1 + namelen + 1 */
+	__u8 BufferFormat;      /* 4 = ASCII */
+	unsigned char FileName[1];
+} __attribute__((packed)) QUERY_INFORMATION_REQ;
+
+typedef struct smb_com_query_information_rsp {
+	struct smb_hdr hdr;     /* wct = 10 */
+	__le16 attr;
+	__le32  last_write_time;
+	__le32 size;
+	__u16  reserved[5];
+	__le16 ByteCount;	/* bcc = 0 */
+} __attribute__((packed)) QUERY_INFORMATION_RSP;
 
 typedef struct smb_com_setattr_req {
 	struct smb_hdr hdr; /* wct = 8 */
@@ -885,12 +992,12 @@ typedef struct smb_com_setattr_req {
 	__u16  ByteCount;
 	__u8   BufferFormat; /* 4 = ASCII */
 	unsigned char fileName[1];
-} SETATTR_REQ;
+} __attribute__((packed)) SETATTR_REQ;
 
 typedef struct smb_com_setattr_rsp {
 	struct smb_hdr hdr;     /* wct = 0 */
 	__u16 ByteCount;        /* bct = 0 */
-} SETATTR_RSP;
+} __attribute__((packed)) SETATTR_RSP;
 
 /* empty wct response to setattr */
 
@@ -920,7 +1027,7 @@ typedef struct smb_com_transaction_ioctl_req {
 	__le16 ByteCount;
 	__u8 Pad[3];
 	__u8 Data[1];
-} TRANSACT_IOCTL_REQ;
+} __attribute__((packed)) TRANSACT_IOCTL_REQ;
 
 typedef struct smb_com_transaction_ioctl_rsp {
 	struct smb_hdr hdr;	/* wct = 19 */
@@ -937,7 +1044,7 @@ typedef struct smb_com_transaction_ioctl_rsp {
 	__le16 ReturnedDataLen;
 	__u16 ByteCount;
 	__u8 Pad[3];
-} TRANSACT_IOCTL_RSP;
+} __attribute__((packed)) TRANSACT_IOCTL_RSP;
 
 typedef struct smb_com_transaction_change_notify_req {
 	struct smb_hdr hdr;     /* wct = 23 */
@@ -961,7 +1068,7 @@ typedef struct smb_com_transaction_change_notify_req {
 	__le16 ByteCount;
 /* __u8 Pad[3];*/
 /*	__u8 Data[1];*/
-} TRANSACT_CHANGE_NOTIFY_REQ;
+} __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ;
 
 typedef struct smb_com_transaction_change_notify_rsp {
 	struct smb_hdr hdr;	/* wct = 18 */
@@ -977,7 +1084,7 @@ typedef struct smb_com_transaction_change_notify_rsp {
 	__u8 SetupCount;   /* 0 */
 	__u16 ByteCount;
 	/* __u8 Pad[3]; */
-} TRANSACT_CHANGE_NOTIFY_RSP;
+} __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_RSP;
 /* Completion Filter flags for Notify */
 #define FILE_NOTIFY_CHANGE_FILE_NAME    0x00000001
 #define FILE_NOTIFY_CHANGE_DIR_NAME     0x00000002
@@ -1008,7 +1115,7 @@ struct file_notify_information {
 	__le32 Action;
 	__le32 FileNameLength;
 	__u8  FileName[0];
-}; 
+} __attribute__((packed)); 
 
 struct reparse_data {
 	__u32	ReparseTag;
@@ -1019,7 +1126,7 @@ struct reparse_data {
 	__u16	TargetNameOffset;
 	__u16	TargetNameLen;
 	char	LinkNamesBuf[1];
-};
+} __attribute__((packed));
 
 struct cifs_quota_data {
 	__u32	rsrvd1;  /* 0 */
@@ -1029,7 +1136,7 @@ struct cifs_quota_data {
 	__u64	soft_limit;
 	__u64	hard_limit;
 	char	sid[1];  /* variable size? */
-};
+} __attribute__((packed));
 
 /* quota sub commands */
 #define QUOTA_LIST_CONTINUE	    0
@@ -1055,12 +1162,12 @@ struct trans2_req {
 	__u8 Reserved3;
 	__le16 SubCommand; /* 1st setup word - SetupCount words follow */
 	__le16 ByteCount;
-};
+} __attribute__((packed));
 
 struct smb_t2_req {
 	struct smb_hdr hdr;
 	struct trans2_req t2_req;
-};
+} __attribute__((packed));
 
 struct trans2_resp {
 	/* struct smb_hdr hdr precedes. Note wct = 10 + setup count */
@@ -1079,12 +1186,12 @@ struct trans2_resp {
 	__u16 ByteCount;
 	__u16 Reserved2;*/	
 	/* data area follows */
-};
+} __attribute__((packed));
 
 struct smb_t2_rsp {
 	struct smb_hdr hdr;
 	struct trans2_resp t2_rsp;
-};
+} __attribute__((packed));
 
 /* PathInfo/FileInfo infolevels */
 #define SMB_INFO_STANDARD                   1
@@ -1171,14 +1278,14 @@ typedef struct smb_com_transaction2_qpi_req {
 	__le16 InformationLevel;
 	__u32 Reserved4;
 	char FileName[1];
-} TRANSACTION2_QPI_REQ;
+} __attribute__((packed)) TRANSACTION2_QPI_REQ;
 
 typedef struct smb_com_transaction2_qpi_rsp {
 	struct smb_hdr hdr;	/* wct = 10 + SetupCount */
 	struct trans2_resp t2;
 	__u16 ByteCount;
 	__u16 Reserved2;	/* parameter word reserved - present for infolevels > 100 */
-} TRANSACTION2_QPI_RSP;
+} __attribute__((packed)) TRANSACTION2_QPI_RSP;
 
 typedef struct smb_com_transaction2_spi_req {
 	struct smb_hdr hdr;	/* wct = 15 */
@@ -1204,21 +1311,21 @@ typedef struct smb_com_transaction2_spi_req {
 	__le16 InformationLevel;
 	__u32 Reserved4;
 	char FileName[1];
-} TRANSACTION2_SPI_REQ;
+} __attribute__((packed)) TRANSACTION2_SPI_REQ;
 
 typedef struct smb_com_transaction2_spi_rsp {
 	struct smb_hdr hdr;	/* wct = 10 + SetupCount */
 	struct trans2_resp t2;
 	__u16 ByteCount;
 	__u16 Reserved2;	/* parameter word reserved - present for infolevels > 100 */
-} TRANSACTION2_SPI_RSP;
+} __attribute__((packed)) TRANSACTION2_SPI_RSP;
 
 struct set_file_rename {
 	__le32 overwrite;   /* 1 = overwrite dest */
 	__u32 root_fid;   /* zero */
 	__le32 target_name_len;
 	char  target_name[0];  /* Must be unicode */
-};
+} __attribute__((packed));
 
 struct smb_com_transaction2_sfi_req {
 	struct smb_hdr hdr;	/* wct = 15 */
@@ -1244,7 +1351,7 @@ struct smb_com_transaction2_sfi_req {
 	__u16 Fid;
 	__le16 InformationLevel;
 	__u16 Reserved4;	
-};
+} __attribute__((packed));
 
 struct smb_com_transaction2_sfi_rsp {
 	struct smb_hdr hdr;	/* wct = 10 + SetupCount */
@@ -1252,7 +1359,7 @@ struct smb_com_transaction2_sfi_rsp {
 	__u16 ByteCount;
 	__u16 Reserved2;	/* parameter word reserved - 
 					present for infolevels > 100 */
-};
+} __attribute__((packed));
 
 struct smb_t2_qfi_req {
         struct	smb_hdr hdr;
@@ -1260,7 +1367,7 @@ struct smb_t2_qfi_req {
 	__u8	Pad;
 	__u16	Fid;
 	__le16	InformationLevel;
-};
+} __attribute__((packed));
 
 struct smb_t2_qfi_rsp {
         struct smb_hdr hdr;     /* wct = 10 + SetupCount */
@@ -1268,7 +1375,7 @@ struct smb_t2_qfi_rsp {
         __u16 ByteCount;
         __u16 Reserved2;        /* parameter word reserved - 
 					present for infolevels > 100 */
-};
+} __attribute__((packed));
 
 /*
  * Flags on T2 FINDFIRST and FINDNEXT 
@@ -1310,13 +1417,13 @@ typedef struct smb_com_transaction2_ffirst_req {
 	__le16 InformationLevel;
 	__le32 SearchStorageType;
 	char FileName[1];
-} TRANSACTION2_FFIRST_REQ;
+} __attribute__((packed)) TRANSACTION2_FFIRST_REQ;
 
 typedef struct smb_com_transaction2_ffirst_rsp {
 	struct smb_hdr hdr;	/* wct = 10 */
 	struct trans2_resp t2;
 	__u16 ByteCount;
-} TRANSACTION2_FFIRST_RSP;
+} __attribute__((packed)) TRANSACTION2_FFIRST_RSP;
 
 typedef struct smb_com_transaction2_ffirst_rsp_parms {
 	__u16 SearchHandle;
@@ -1324,7 +1431,7 @@ typedef struct smb_com_transaction2_ffirst_rsp_parms {
 	__le16 EndofSearch;
 	__le16 EAErrorOffset;
 	__le16 LastNameOffset;
-} T2_FFIRST_RSP_PARMS;
+} __attribute__((packed)) T2_FFIRST_RSP_PARMS;
 
 typedef struct smb_com_transaction2_fnext_req {
 	struct smb_hdr hdr;	/* wct = 15 */
@@ -1352,20 +1459,20 @@ typedef struct smb_com_transaction2_fnext_req {
 	__u32 ResumeKey;
 	__le16 SearchFlags;
 	char ResumeFileName[1];
-} TRANSACTION2_FNEXT_REQ;
+} __attribute__((packed)) TRANSACTION2_FNEXT_REQ;
 
 typedef struct smb_com_transaction2_fnext_rsp {
 	struct smb_hdr hdr;	/* wct = 10 */
 	struct trans2_resp t2;
 	__u16 ByteCount;
-} TRANSACTION2_FNEXT_RSP;
+} __attribute__((packed)) TRANSACTION2_FNEXT_RSP;
 
 typedef struct smb_com_transaction2_fnext_rsp_parms {
 	__le16 SearchCount;
 	__le16 EndofSearch;
 	__le16 EAErrorOffset;
 	__le16 LastNameOffset;
-} T2_FNEXT_RSP_PARMS;
+} __attribute__((packed)) T2_FNEXT_RSP_PARMS;
 
 /* QFSInfo Levels */
 #define SMB_INFO_ALLOCATION         1
@@ -1402,14 +1509,51 @@ typedef struct smb_com_transaction2_qfsi_req {
 	__le16 ByteCount;
 	__u8 Pad;
 	__le16 InformationLevel;
-} TRANSACTION2_QFSI_REQ;
+} __attribute__((packed)) TRANSACTION2_QFSI_REQ;
 
 typedef struct smb_com_transaction_qfsi_rsp {
 	struct smb_hdr hdr;	/* wct = 10 + SetupCount */
 	struct trans2_resp t2;
 	__u16 ByteCount;
 	__u8 Pad;		/* may be three bytes *//* followed by data area */
-} TRANSACTION2_QFSI_RSP;
+} __attribute__((packed)) TRANSACTION2_QFSI_RSP;
+
+
+/* SETFSInfo Levels */
+#define SMB_SET_CIFS_UNIX_INFO    0x200
+typedef struct smb_com_transaction2_setfsi_req {
+	struct smb_hdr hdr;	/* wct = 15 */
+	__le16 TotalParameterCount;
+	__le16 TotalDataCount;
+	__le16 MaxParameterCount;
+	__le16 MaxDataCount;
+	__u8 MaxSetupCount;
+	__u8 Reserved;
+	__le16 Flags;
+	__le32 Timeout;
+	__u16 Reserved2;
+	__le16 ParameterCount;	/* 4 */
+	__le16 ParameterOffset;
+	__le16 DataCount;	/* 12 */
+	__le16 DataOffset;
+	__u8 SetupCount;	/* one */
+	__u8 Reserved3;
+	__le16 SubCommand;	/* TRANS2_SET_FS_INFORMATION */
+	__le16 ByteCount;
+	__u8 Pad;
+	__u16 FileNum;		/* Parameters start. */
+	__le16 InformationLevel;/* Parameters end. */
+	__le16 ClientUnixMajor; /* Data start. */
+	__le16 ClientUnixMinor;
+	__le64 ClientUnixCap;   /* Data end */
+} __attribute__((packed)) TRANSACTION2_SETFSI_REQ;
+
+typedef struct smb_com_transaction2_setfsi_rsp {
+	struct smb_hdr hdr;	/* wct = 10 */
+	struct trans2_resp t2;
+	__u16 ByteCount;
+} __attribute__((packed)) TRANSACTION2_SETFSI_RSP;
+
 
 typedef struct smb_com_transaction2_get_dfs_refer_req {
 	struct smb_hdr hdr;	/* wct = 15 */
@@ -1433,7 +1577,7 @@ typedef struct smb_com_transaction2_get_dfs_refer_req {
 	__u8 Pad[3];		/* Win2K has sent 0x0F01 (max resp length perhaps?) followed by one byte pad - doesn't seem to matter though */
 	__le16 MaxReferralLevel;
 	char RequestFileName[1];
-} TRANSACTION2_GET_DFS_REFER_REQ;
+} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_REQ;
 
 typedef struct dfs_referral_level_3 {
 	__le16 VersionNumber;
@@ -1445,7 +1589,7 @@ typedef struct dfs_referral_level_3 {
 	__le16 DfsPathOffset;
 	__le16 DfsAlternatePathOffset;
 	__le16 NetworkAddressOffset;
-} REFERRAL3;
+} __attribute__((packed)) REFERRAL3;
 
 typedef struct smb_com_transaction_get_dfs_refer_rsp {
 	struct smb_hdr hdr;	/* wct = 10 */
@@ -1458,7 +1602,7 @@ typedef struct smb_com_transaction_get_dfs_refer_rsp {
 	__u16 Pad2;
 	REFERRAL3 referrals[1];	/* array of level 3 dfs_referral structures */
 	/* followed by the strings pointed to by the referral structures */
-} TRANSACTION2_GET_DFS_REFER_RSP;
+} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
 
 /* DFS Flags */
 #define DFSREF_REFERRAL_SERVER  0x0001
@@ -1512,7 +1656,7 @@ struct serverInfo {
 	unsigned char versionMinor;
 	unsigned long type;
 	unsigned int commentOffset;
-};
+} __attribute__((packed));
 
 /*
  * The following structure is the format of the data returned on a NetShareEnum
@@ -1524,39 +1668,55 @@ struct shareInfo {
 	char pad;
 	unsigned short type;
 	unsigned int commentOffset;
-};
+} __attribute__((packed));
 
 struct aliasInfo {
 	char aliasName[9];
 	char pad;
 	unsigned int commentOffset;
 	unsigned char type[2];
-};
+} __attribute__((packed));
 
 struct aliasInfo92 {
 	int aliasNameOffset;
 	int serverNameOffset;
 	int shareNameOffset;
-};
+} __attribute__((packed));
 
 typedef struct {
 	__le64 TotalAllocationUnits;
 	__le64 FreeAllocationUnits;
 	__le32 SectorsPerAllocationUnit;
 	__le32 BytesPerSector;
-} FILE_SYSTEM_INFO;		/* size info, level 0x103 */
+} __attribute__((packed)) FILE_SYSTEM_INFO;		/* size info, level 0x103 */
+
+typedef struct {
+	__le32 fsid;
+	__le32 SectorsPerAllocationUnit;
+	__le32 TotalAllocationUnits;
+	__le32 FreeAllocationUnits;
+	__le16  BytesPerSector;
+} __attribute__((packed)) FILE_SYSTEM_ALLOC_INFO;
 
 typedef struct {
 	__le16 MajorVersionNumber;
 	__le16 MinorVersionNumber;
 	__le64 Capability;
-} FILE_SYSTEM_UNIX_INFO;	/* Unix extensions info, level 0x200 */
+} __attribute__((packed)) FILE_SYSTEM_UNIX_INFO;	/* Unix extensions info, level 0x200 */
+
+/* Version numbers for CIFS UNIX major and minor. */
+#define CIFS_UNIX_MAJOR_VERSION 1
+#define CIFS_UNIX_MINOR_VERSION 0
+
 /* Linux/Unix extensions capability flags */
 #define CIFS_UNIX_FCNTL_CAP             0x00000001 /* support for fcntl locks */
 #define CIFS_UNIX_POSIX_ACL_CAP         0x00000002 /* support getfacl/setfacl */
 #define CIFS_UNIX_XATTR_CAP             0x00000004 /* support new namespace   */
 #define CIFS_UNIX_EXTATTR_CAP           0x00000008 /* support chattr/chflag   */
+#define CIFS_UNIX_POSIX_PATHNAMES_CAP   0x00000010 /* Use POSIX pathnames on the wire. */
+
 #define CIFS_POSIX_EXTENSIONS           0x00000010 /* support for new QFSInfo */
+
 typedef struct {
 	/* For undefined recommended transfer size return -1 in that field */
 	__le32 OptimalTransferSize;  /* bsize on some os, iosize on other os */
@@ -1577,7 +1737,7 @@ typedef struct {
 	__le64 FileSysIdentifier;   /* fsid */
 	/* NB Namelen comes from FILE_SYSTEM_ATTRIBUTE_INFO call */
 	/* NB flags can come from FILE_SYSTEM_DEVICE_INFO call   */
-} FILE_SYSTEM_POSIX_INFO;
+} __attribute__((packed)) FILE_SYSTEM_POSIX_INFO;
 
 /* DeviceType Flags */
 #define FILE_DEVICE_CD_ROM              0x00000002
@@ -1602,14 +1762,14 @@ typedef struct {
 typedef struct {
 	__le32 DeviceType;
 	__le32 DeviceCharacteristics;
-} FILE_SYSTEM_DEVICE_INFO;	/* device info, level 0x104 */
+} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO;	/* device info, level 0x104 */
 
 typedef struct {
 	__le32 Attributes;
 	__le32 MaxPathNameComponentLength;
 	__le32 FileSystemNameLen;
 	char FileSystemName[52]; /* do not really need to save this - so potentially get only subset of name */
-} FILE_SYSTEM_ATTRIBUTE_INFO;
+} __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO;
 
 /******************************************************************************/
 /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */
@@ -1636,7 +1796,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
 	__le32 AlignmentRequirement;
 	__le32 FileNameLength;
 	char FileName[1];
-} FILE_ALL_INFO;		/* level 0x107 QPathInfo */
+} __attribute__((packed)) FILE_ALL_INFO;		/* level 0x107 QPathInfo */
 
 /* defines for enumerating possible values of the Unix type field below */
 #define UNIX_FILE      0
@@ -1660,11 +1820,11 @@ typedef struct {
 	__u64 UniqueId;
 	__le64 Permissions;
 	__le64 Nlinks;
-} FILE_UNIX_BASIC_INFO;		/* level 0x200 QPathInfo */
+} __attribute__((packed)) FILE_UNIX_BASIC_INFO;		/* level 0x200 QPathInfo */
 
 typedef struct {
 	char LinkDest[1];
-} FILE_UNIX_LINK_INFO;		/* level 0x201 QPathInfo */
+} __attribute__((packed)) FILE_UNIX_LINK_INFO;		/* level 0x201 QPathInfo */
 
 /* The following three structures are needed only for
 	setting time to NT4 and some older servers via
@@ -1673,13 +1833,13 @@ typedef struct {
 	__u16 Day:5;
 	__u16 Month:4;
 	__u16 Year:7;
-} SMB_DATE;
+} __attribute__((packed)) SMB_DATE;
 
 typedef struct {
 	__u16 TwoSeconds:5;
 	__u16 Minutes:6;
 	__u16 Hours:5;
-} SMB_TIME;
+} __attribute__((packed)) SMB_TIME;
 
 typedef struct {
 	__le16 CreationDate; /* SMB Date see above */
@@ -1692,7 +1852,7 @@ typedef struct {
 	__le32 AllocationSize;
 	__le16 Attributes; /* verify not u32 */
 	__le32 EASize;
-} FILE_INFO_STANDARD;  /* level 1 SetPath/FileInfo */
+} __attribute__((packed)) FILE_INFO_STANDARD;  /* level 1 SetPath/FileInfo */
 
 typedef struct {
 	__le64 CreationTime;
@@ -1701,19 +1861,19 @@ typedef struct {
 	__le64 ChangeTime;
 	__le32 Attributes;
 	__u32 Pad;
-} FILE_BASIC_INFO;		/* size info, level 0x101 */
+} __attribute__((packed)) FILE_BASIC_INFO;		/* size info, level 0x101 */
 
 struct file_allocation_info {
 	__le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
-};	/* size used on disk, level 0x103 for set, 0x105 for query */
+} __attribute__((packed));	/* size used on disk, level 0x103 for set, 0x105 for query */
 
 struct file_end_of_file_info {
 	__le64 FileSize;		/* offset to end of file */
-};	/* size info, level 0x104 for set, 0x106 for query */
+} __attribute__((packed));	/* size info, level 0x104 for set, 0x106 for query */
 
 struct file_alt_name_info {
 	__u8   alt_name[1];
-};      /* level 0x0108 */
+} __attribute__((packed));      /* level 0x0108 */
 
 struct file_stream_info {
 	__le32 number_of_streams;  /* BB check sizes and verify location */
@@ -1730,7 +1890,7 @@ struct file_compression_info {
 	__u8   ch_shift;
 	__u8   cl_shift;
 	__u8   pad[3];
-};      /* level 0x10b */
+} __attribute__((packed));      /* level 0x10b */
 
 /* POSIX ACL set/query path info structures */
 #define CIFS_ACL_VERSION 1
@@ -1738,7 +1898,7 @@ struct cifs_posix_ace { /* access control entry (ACE) */
 	__u8  cifs_e_tag;
 	__u8  cifs_e_perm;
 	__le64 cifs_uid; /* or gid */
-}; 
+} __attribute__((packed)); 
 
 struct cifs_posix_acl { /* access conrol list  (ACL) */
 	__le16	version;
@@ -1747,7 +1907,7 @@ struct cifs_posix_acl { /* access conrol list  (ACL) */
 	struct cifs_posix_ace ace_array[0];
 	/* followed by
 	struct cifs_posix_ace default_ace_arraay[] */
-};  /* level 0x204 */
+} __attribute__((packed));  /* level 0x204 */
 
 /* types of access control entries already defined in posix_acl.h */
 /* #define CIFS_POSIX_ACL_USER_OBJ	 0x01
@@ -1766,15 +1926,15 @@ struct cifs_posix_acl { /* access conrol list  (ACL) */
 
 struct file_internal_info {
 	__u64  UniqueId; /* inode number */
-};      /* level 0x3ee */
+} __attribute__((packed));      /* level 0x3ee */
 struct file_mode_info {
 	__le32	Mode;
-};      /* level 0x3f8 */
+} __attribute__((packed));      /* level 0x3f8 */
 
 struct file_attrib_tag {
 	__le32 Attribute;
 	__le32 ReparseTag;
-};      /* level 0x40b */
+} __attribute__((packed));      /* level 0x40b */
 
 
 /********************************************************/
@@ -1798,7 +1958,7 @@ typedef struct {
 	__le64 Permissions;
 	__le64 Nlinks;
 	char FileName[1];
-} FILE_UNIX_INFO; /* level 0x202 */
+} __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -1812,7 +1972,7 @@ typedef struct {
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
 	char FileName[1];
-} FILE_DIRECTORY_INFO;   /* level 0x101 FF response data area */
+} __attribute__((packed)) FILE_DIRECTORY_INFO;   /* level 0x101 FF response data area */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -1827,7 +1987,7 @@ typedef struct {
 	__le32 FileNameLength;
 	__le32 EaSize; /* length of the xattrs */
 	char FileName[1];
-} FILE_FULL_DIRECTORY_INFO;   /* level 0x102 FF response data area */
+} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO;   /* level 0x102 FF response data area */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -1844,7 +2004,7 @@ typedef struct {
 	__le32 Reserved;
 	__u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
 	char FileName[1];
-} SEARCH_ID_FULL_DIR_INFO;   /* level 0x105 FF response data area */
+} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO;   /* level 0x105 FF response data area */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -1862,18 +2022,18 @@ typedef struct {
 	__u8   Reserved;
 	__u8   ShortName[12];
 	char FileName[1];
-} FILE_BOTH_DIRECTORY_INFO;   /* level 0x104 FF response data area */
+} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO;   /* level 0x104 FF response data area */
 
 
 struct gea {
 	unsigned char name_len;
 	char name[1];
-};
+} __attribute__((packed));
 
 struct gealist {
 	unsigned long list_len;
 	struct gea list[1];
-};
+} __attribute__((packed));
 
 struct fea {
 	unsigned char EA_flags;
@@ -1881,21 +2041,21 @@ struct fea {
 	__le16 value_len;
 	char name[1];
 	/* optionally followed by value */
-};
+} __attribute__((packed));
 /* flags for _FEA.fEA */
 #define FEA_NEEDEA         0x80	/* need EA bit */
 
 struct fealist {
 	__le32 list_len;
 	struct fea list[1];
-};
+} __attribute__((packed));
 
 /* used to hold an arbitrary blob of data */
 struct data_blob {
 	__u8 *data;
 	size_t length;
 	void (*free) (struct data_blob * data_blob);
-};
+} __attribute__((packed));
 
 
 #ifdef CONFIG_CIFS_POSIX
@@ -1907,18 +2067,17 @@ struct data_blob {
 	perhaps add a CreateDevice - to create Pipes and other special .inodes
 	Also note POSIX open flags
 	2) Close - to return the last write time to do cache across close more safely
-	3) PosixQFSInfo - to return statfs info
-	4) FindFirst return unique inode number - what about resume key, two forms short (matches readdir) and full (enough info to cache inodes)
-	5) Mkdir - set mode
+	3) FindFirst return unique inode number - what about resume key, two 
+	forms short (matches readdir) and full (enough info to cache inodes)
+	4) Mkdir - set mode
 	
 	And under consideration: 
-	6) FindClose2 (return nanosecond timestamp ??)
-	7) Use nanosecond timestamps throughout all time fields if 
+	5) FindClose2 (return nanosecond timestamp ??)
+	6) Use nanosecond timestamps throughout all time fields if 
 	   corresponding attribute flag is set
-	8) sendfile - handle based copy
-	9) Direct i/o
-	10) "POSIX ACL" support
-	11) Misc fcntls?
+	7) sendfile - handle based copy
+	8) Direct i/o
+	9) Misc fcntls?
 	
 	what about fixing 64 bit alignment
 	
@@ -1974,7 +2133,7 @@ struct data_blob {
 	
  */
 
-/* xsymlink is a symlink format that can be used
+/* xsymlink is a symlink format (used by MacOS) that can be used
    to save symlink info in a regular file when 
    mounted to operating systems that do not
    support the cifs Unix extensions or EAs (for xattr
@@ -1999,7 +2158,7 @@ struct xsymlink {
 	char cr2;        /* \n */
 /* if room left, then end with \n then 0x20s by convention but not required */
 	char path[1024];  
-};
+} __attribute__((packed));
 
 typedef struct file_xattr_info {
 	/* BB do we need another field for flags? BB */
@@ -2007,7 +2166,7 @@ typedef struct file_xattr_info {
 	__u32 xattr_value_len;
 	char  xattr_name[0];
 	/* followed by xattr_value[xattr_value_len], no pad */
-} FILE_XATTR_INFO;	/* extended attribute, info level 0x205 */
+} __attribute__((packed)) FILE_XATTR_INFO;	/* extended attribute, info level 0x205 */
 
 
 /* flags for chattr command */
@@ -2033,10 +2192,8 @@ typedef struct file_xattr_info {
 typedef struct file_chattr_info {
 	__le64	mask; /* list of all possible attribute bits */
 	__le64	mode; /* list of actual attribute bits on this inode */
-} FILE_CHATTR_INFO;  /* ext attributes (chattr, chflags) level 0x206 */
+} __attribute__((packed)) FILE_CHATTR_INFO;  /* ext attributes (chattr, chflags) level 0x206 */
 
 #endif 
 
-#pragma pack()			/* resume default structure packing */
-
 #endif				/* _CIFSPDU_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ea239dea571..d301149b1bb 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -47,19 +47,24 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct smb_hdr * /* input */ ,
 			struct smb_hdr * /* out */ ,
 			int * /* bytes returned */ , const int long_op);
+extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
+			struct kvec *, int /* nvec */,
+			int * /* bytes returned */ , const int long_op);
 extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb);
 extern int is_size_safe_to_change(struct cifsInodeInfo *);
+extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *);
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
+extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			enum securityEnum *secType);
 extern int cifs_inet_pton(int, char * source, void *dst);
 extern int map_smb_to_linux_error(struct smb_hdr *smb);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
-			const struct cifsTconInfo *, int /* specifies length
-			    of fixed section (word count) in two byte units */
-			);
+			    const struct cifsTconInfo *, int /* length of
+			    fixed section (word count) in two byte units */);
+extern __u16 GetNextMid(struct TCP_Server_Info *server);
 extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 
 						 struct cifsTconInfo *);
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
@@ -89,7 +94,7 @@ extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 
 extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
             const char *searchName, const struct nls_table *nls_codepage,
-            __u16 *searchHandle, struct cifs_search_info * psrch_inf, int map);
+            __u16 *searchHandle, struct cifs_search_info * psrch_inf, int map, const char dirsep);
 
 extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
             __u16 searchHandle, struct cifs_search_info * psrch_inf);
@@ -101,6 +106,10 @@ extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
 			FILE_ALL_INFO * findData,
 			const struct nls_table *nls_codepage, int remap);
+extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
+                        const unsigned char *searchName,
+                        FILE_ALL_INFO * findData,
+                        const struct nls_table *nls_codepage, int remap);
 
 extern int CIFSSMBUnixQPathInfo(const int xid,
 			struct cifsTconInfo *tcon,
@@ -125,6 +134,11 @@ extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo,
 			int remap);
 extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon,
 			struct kstatfs *FSData);
+extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon,
+			struct kstatfs *FSData);
+extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon,
+			__u64 cap);
+
 extern int CIFSSMBQFSAttributeInfo(const int xid,
 			struct cifsTconInfo *tcon);
 extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon);
@@ -207,6 +221,11 @@ extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
 			const int access_flags, const int omode,
 			__u16 * netfid, int *pOplock, FILE_ALL_INFO *,
 			const struct nls_table *nls_codepage, int remap);
+extern int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon,
+			const char *fileName, const int disposition,
+			const int access_flags, const int omode,
+			__u16 * netfid, int *pOplock, FILE_ALL_INFO *,
+			const struct nls_table *nls_codepage, int remap);
 extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
 			const int smb_file_id);
 
@@ -222,7 +241,7 @@ extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 			const int netfid, const unsigned int count,
 			const __u64 offset, unsigned int *nbytes, 
-			const char __user *buf,const int long_op);
+			struct kvec *iov, const int nvec, const int long_op);
 extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName, __u64 * inode_number,
 			const struct nls_table *nls_codepage, 
@@ -264,7 +283,8 @@ extern int CIFSSMBCopy(int xid,
 			int remap_special_chars);
 extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, 
 			const int notify_subdirs,const __u16 netfid,
-			__u32 filter, const struct nls_table *nls_codepage);
+			__u32 filter, struct file * file, int multishot, 
+			const struct nls_table *nls_codepage);
 extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName, char * EAData,
 			size_t bufsize, const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3c628bf667a..9312bfc5668 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -125,6 +125,9 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 				rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon
 					, nls_codepage);
 				up(&tcon->ses->sesSem);
+				/* BB FIXME add code to check if wsize needs
+				   update due to negotiated smb buffer size
+				   shrinking */
 				if(rc == 0)
 					atomic_inc(&tconInfoReconnectCount);
 
@@ -166,11 +169,9 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 
 	header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,wct);
 
-#ifdef CONFIG_CIFS_STATS
-        if(tcon != NULL) {
-                atomic_inc(&tcon->num_smbs_sent);
-        }
-#endif /* CONFIG_CIFS_STATS */
+        if(tcon != NULL)
+                cifs_stats_inc(&tcon->num_smbs_sent);
+
 	return rc;
 }  
 
@@ -222,6 +223,9 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 				rc = CIFSTCon(0, tcon->ses, tcon->treeName,
 					      tcon, nls_codepage);
 				up(&tcon->ses->sesSem);
+				/* BB FIXME add code to check if wsize needs
+				update due to negotiated smb buffer size
+				shrinking */
 				if(rc == 0)
 					atomic_inc(&tconInfoReconnectCount);
 
@@ -269,11 +273,9 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 	header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
 			wct /*wct */ );
 
-#ifdef CONFIG_CIFS_STATS
-        if(tcon != NULL) {
-                atomic_inc(&tcon->num_smbs_sent);
-        }
-#endif /* CONFIG_CIFS_STATS */
+        if(tcon != NULL)
+                cifs_stats_inc(&tcon->num_smbs_sent);
+
 	return rc;
 }
 
@@ -330,7 +332,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		      (void **) &pSMB, (void **) &pSMBr);
 	if (rc)
 		return rc;
-
+	pSMB->hdr.Mid = GetNextMid(server);
 	pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
 	if (extended_security)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -422,8 +424,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		}
 				
 	}
-	if (pSMB)
-		cifs_buf_release(pSMB);
+	
+	cifs_buf_release(pSMB);
 	return rc;
 }
 
@@ -518,6 +520,8 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
 	smb_buffer_response = (struct smb_hdr *)pSMB; /* BB removeme BB */
 	
 	if(ses->server) {
+		pSMB->hdr.Mid = GetNextMid(ses->server);
+
 		if(ses->server->secMode & 
 		   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
 			pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
@@ -537,9 +541,8 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
 			rc = -ESHUTDOWN;
 		}
 	}
-	if (pSMB)
-		cifs_small_buf_release(pSMB);
 	up(&ses->sesSem);
+	cifs_small_buf_release(pSMB);
 
 	/* if session dead then we do not need to do ulogoff,
 		since server closed smb session, no sense reporting 
@@ -583,14 +586,10 @@ DelFileRetry:
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_deletes);
 	if (rc) {
 		cFYI(1, ("Error in RMFile = %d", rc));
 	} 
-#ifdef CONFIG_CIFS_STATS
-        else {
-		atomic_inc(&tcon->num_deletes);
-        }
-#endif
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -632,14 +631,10 @@ RmDirRetry:
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_rmdirs);
 	if (rc) {
 		cFYI(1, ("Error in RMDir = %d", rc));
 	}
-#ifdef CONFIG_CIFS_STATS
-        else {
-		atomic_inc(&tcon->num_rmdirs);
-        }
-#endif
 
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
@@ -680,20 +675,161 @@ MkDirRetry:
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_mkdirs);
 	if (rc) {
 		cFYI(1, ("Error in Mkdir = %d", rc));
 	}
-#ifdef CONFIG_CIFS_STATS
-        else {
-		atomic_inc(&tcon->num_mkdirs);
-        }
-#endif
+
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
 		goto MkDirRetry;
 	return rc;
 }
 
+static __u16 convert_disposition(int disposition)
+{
+	__u16 ofun = 0;
+
+	switch (disposition) {
+		case FILE_SUPERSEDE:
+			ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC;
+			break;
+		case FILE_OPEN:
+			ofun = SMBOPEN_OAPPEND;
+			break;
+		case FILE_CREATE:
+			ofun = SMBOPEN_OCREATE;
+			break;
+		case FILE_OPEN_IF:
+			ofun = SMBOPEN_OCREATE | SMBOPEN_OAPPEND;
+			break;
+		case FILE_OVERWRITE:
+			ofun = SMBOPEN_OTRUNC;
+			break;
+		case FILE_OVERWRITE_IF:
+			ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC;
+			break;
+		default:
+			cFYI(1,("unknown disposition %d",disposition));
+			ofun =  SMBOPEN_OAPPEND; /* regular open */
+	}
+	return ofun;
+}
+
+int
+SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon,
+	    const char *fileName, const int openDisposition,
+	    const int access_flags, const int create_options, __u16 * netfid,
+            int *pOplock, FILE_ALL_INFO * pfile_info,
+	    const struct nls_table *nls_codepage, int remap)
+{
+	int rc = -EACCES;
+	OPENX_REQ *pSMB = NULL;
+	OPENX_RSP *pSMBr = NULL;
+	int bytes_returned;
+	int name_len;
+	__u16 count;
+
+OldOpenRetry:
+	rc = smb_init(SMB_COM_OPEN_ANDX, 15, tcon, (void **) &pSMB,
+		      (void **) &pSMBr);
+	if (rc)
+		return rc;
+
+	pSMB->AndXCommand = 0xFF;       /* none */
+
+	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
+		count = 1;      /* account for one byte pad to word boundary */
+		name_len =
+		   cifsConvertToUCS((__le16 *) (pSMB->fileName + 1),
+				    fileName, PATH_MAX, nls_codepage, remap);
+		name_len++;     /* trailing null */
+		name_len *= 2;
+	} else {                /* BB improve check for buffer overruns BB */
+		count = 0;      /* no pad */
+		name_len = strnlen(fileName, PATH_MAX);
+		name_len++;     /* trailing null */
+		strncpy(pSMB->fileName, fileName, name_len);
+	}
+	if (*pOplock & REQ_OPLOCK)
+		pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
+	else if (*pOplock & REQ_BATCHOPLOCK) {
+		pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK);
+	}
+	pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO);
+	/* BB fixme add conversion for access_flags to bits 0 - 2 of mode */
+	/* 0 = read
+	   1 = write
+	   2 = rw
+	   3 = execute
+        */
+	pSMB->Mode = cpu_to_le16(2);
+	pSMB->Mode |= cpu_to_le16(0x40); /* deny none */
+	/* set file as system file if special file such
+	   as fifo and server expecting SFU style and
+	   no Unix extensions */
+
+        if(create_options & CREATE_OPTION_SPECIAL)
+                pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM);
+        else
+                pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); /* BB FIXME */
+
+	/* if ((omode & S_IWUGO) == 0)
+		pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/
+	/*  Above line causes problems due to vfs splitting create into two
+	    pieces - need to set mode after file created not while it is
+	    being created */
+
+	/* BB FIXME BB */
+/*	pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); */
+	/* BB FIXME END BB */
+
+	pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
+	pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
+	count += name_len;
+	pSMB->hdr.smb_buf_length += count;
+
+	pSMB->ByteCount = cpu_to_le16(count);
+	/* long_op set to 1 to allow for oplock break timeouts */
+	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+		         (struct smb_hdr *) pSMBr, &bytes_returned, 1);
+	cifs_stats_inc(&tcon->num_opens);
+	if (rc) {
+		cFYI(1, ("Error in Open = %d", rc));
+	} else {
+	/* BB verify if wct == 15 */
+
+/*		*pOplock = pSMBr->OplockLevel; */  /* BB take from action field BB */
+
+		*netfid = pSMBr->Fid;   /* cifs fid stays in le */
+		/* Let caller know file was created so we can set the mode. */
+		/* Do we care about the CreateAction in any other cases? */
+	/* BB FIXME BB */
+/*		if(cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction)
+			*pOplock |= CIFS_CREATE_ACTION; */
+	/* BB FIXME END */
+
+		if(pfile_info) {
+			pfile_info->CreationTime = 0; /* BB convert CreateTime*/
+			pfile_info->LastAccessTime = 0; /* BB fixme */
+			pfile_info->LastWriteTime = 0; /* BB fixme */
+			pfile_info->ChangeTime = 0;  /* BB fixme */
+			pfile_info->Attributes =
+				cpu_to_le32(le16_to_cpu(pSMBr->FileAttributes)); 
+			/* the file_info buf is endian converted by caller */
+			pfile_info->AllocationSize =
+				cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile));
+			pfile_info->EndOfFile = pfile_info->AllocationSize;
+			pfile_info->NumberOfLinks = cpu_to_le32(1);
+		}
+	}
+
+	cifs_buf_release(pSMB);
+	if (rc == -EAGAIN)
+		goto OldOpenRetry;
+	return rc;
+}
+
 int
 CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
 	    const char *fileName, const int openDisposition,
@@ -738,7 +874,13 @@ openRetry:
 	}
 	pSMB->DesiredAccess = cpu_to_le32(access_flags);
 	pSMB->AllocationSize = 0;
-	pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL);
+	/* set file as system file if special file such
+	   as fifo and server expecting SFU style and
+	   no Unix extensions */
+	if(create_options & CREATE_OPTION_SPECIAL)
+		pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM);
+	else
+		pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL);
 	/* XP does not handle ATTR_POSIX_SEMANTICS */
 	/* but it helps speed up case sensitive checks for other
 	servers such as Samba */
@@ -752,7 +894,7 @@ openRetry:
 		being created */
 	pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL);
 	pSMB->CreateDisposition = cpu_to_le32(openDisposition);
-	pSMB->CreateOptions = cpu_to_le32(create_options);
+	pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK);
 	/* BB Expirement with various impersonation levels and verify */
 	pSMB->ImpersonationLevel = cpu_to_le32(SECURITY_IMPERSONATION);
 	pSMB->SecurityFlags =
@@ -765,6 +907,7 @@ openRetry:
 	/* long_op set to 1 to allow for oplock break timeouts */
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 1);
+	cifs_stats_inc(&tcon->num_opens);
 	if (rc) {
 		cFYI(1, ("Error in Open = %d", rc));
 	} else {
@@ -782,11 +925,8 @@ openRetry:
 		    pfile_info->EndOfFile = pSMBr->EndOfFile;
 		    pfile_info->NumberOfLinks = cpu_to_le32(1);
 		}
-
-#ifdef CONFIG_CIFS_STATS
-		atomic_inc(&tcon->num_opens);
-#endif
 	}
+
 	cifs_buf_release(pSMB);
 	if (rc == -EAGAIN)
 		goto openRetry;
@@ -807,11 +947,16 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
 	READ_RSP *pSMBr = NULL;
 	char *pReadData = NULL;
 	int bytes_returned;
+	int wct;
 
 	cFYI(1,("Reading %d bytes on fid %d",count,netfid));
+	if(tcon->ses->capabilities & CAP_LARGE_FILES)
+		wct = 12;
+	else
+		wct = 10; /* old style read */
 
 	*nbytes = 0;
-	rc = smb_init(SMB_COM_READ_ANDX, 12, tcon, (void **) &pSMB,
+	rc = smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
 	if (rc)
 		return rc;
@@ -823,14 +968,26 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
 	pSMB->AndXCommand = 0xFF;	/* none */
 	pSMB->Fid = netfid;
 	pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF);
-	pSMB->OffsetHigh = cpu_to_le32(lseek >> 32);
+	if(wct == 12)
+		pSMB->OffsetHigh = cpu_to_le32(lseek >> 32);
+        else if((lseek >> 32) > 0) /* can not handle this big offset for old */
+                return -EIO;
+
 	pSMB->Remaining = 0;
 	pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
 	pSMB->MaxCountHigh = cpu_to_le32(count >> 16);
-	pSMB->ByteCount = 0;  /* no need to do le conversion since it is 0 */
-
+	if(wct == 12)
+		pSMB->ByteCount = 0;  /* no need to do le conversion since 0 */
+	else {
+		/* old style read */
+		struct smb_com_readx_req * pSMBW = 
+			(struct smb_com_readx_req *)pSMB;
+		pSMBW->ByteCount = 0;	
+	}
+	
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_reads);
 	if (rc) {
 		cERROR(1, ("Send error in read = %d", rc));
 	} else {
@@ -876,12 +1033,20 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	int rc = -EACCES;
 	WRITE_REQ *pSMB = NULL;
 	WRITE_RSP *pSMBr = NULL;
-	int bytes_returned;
+	int bytes_returned, wct;
 	__u32 bytes_sent;
 	__u16 byte_count;
 
 	/* cFYI(1,("write at %lld %d bytes",offset,count));*/
-	rc = smb_init(SMB_COM_WRITE_ANDX, 14, tcon, (void **) &pSMB,
+	if(tcon->ses == NULL)
+		return -ECONNABORTED;
+
+	if(tcon->ses->capabilities & CAP_LARGE_FILES)
+		wct = 14;
+	else
+		wct = 12;
+
+	rc = smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB,
 		      (void **) &pSMBr);
 	if (rc)
 		return rc;
@@ -892,7 +1057,11 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	pSMB->AndXCommand = 0xFF;	/* none */
 	pSMB->Fid = netfid;
 	pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
-	pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
+	if(wct == 14) 
+		pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
+	else if((offset >> 32) > 0) /* can not handle this big offset for old */
+		return -EIO;
+	
 	pSMB->Reserved = 0xFFFFFFFF;
 	pSMB->WriteMode = 0;
 	pSMB->Remaining = 0;
@@ -911,7 +1080,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	if (bytes_sent > count)
 		bytes_sent = count;
 	pSMB->DataOffset =
-	    cpu_to_le16(offsetof(struct smb_com_write_req,Data) - 4);
+		cpu_to_le16(offsetof(struct smb_com_write_req,Data) - 4);
 	if(buf)
 	    memcpy(pSMB->Data,buf,bytes_sent);
 	else if(ubuf) {
@@ -919,20 +1088,31 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 			cifs_buf_release(pSMB);
 			return -EFAULT;
 		}
-	} else {
+	} else if (count != 0) {
 		/* No buffer */
 		cifs_buf_release(pSMB);
 		return -EINVAL;
+	} /* else setting file size with write of zero bytes */
+	if(wct == 14)
+		byte_count = bytes_sent + 1; /* pad */
+	else /* wct == 12 */ {
+		byte_count = bytes_sent + 5; /* bigger pad, smaller smb hdr */
 	}
-
-	byte_count = bytes_sent + 1 /* pad */ ; /* BB fix this for sends > 64K */
 	pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
 	pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
-	pSMB->hdr.smb_buf_length += bytes_sent+1;
-	pSMB->ByteCount = cpu_to_le16(byte_count);
+	pSMB->hdr.smb_buf_length += byte_count;
+
+	if(wct == 14)
+		pSMB->ByteCount = cpu_to_le16(byte_count);
+	else { /* old style write has byte count 4 bytes earlier so 4 bytes pad  */
+		struct smb_com_writex_req * pSMBW = 
+			(struct smb_com_writex_req *)pSMB;
+		pSMBW->ByteCount = cpu_to_le16(byte_count);
+	}
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, long_op);
+	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
 		cFYI(1, ("Send error in write = %d", rc));
 		*nbytes = 0;
@@ -951,56 +1131,72 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
+int
+CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 	     const int netfid, const unsigned int count,
-	     const __u64 offset, unsigned int *nbytes, const char __user *buf,
-	     const int long_op)
+	     const __u64 offset, unsigned int *nbytes, struct kvec *iov,
+	     int n_vec, const int long_op)
 {
 	int rc = -EACCES;
 	WRITE_REQ *pSMB = NULL;
-	WRITE_RSP *pSMBr = NULL;
-	/*int bytes_returned;*/
-	unsigned bytes_sent;
-	__u16 byte_count;
+	int bytes_returned, wct;
+	int smb_hdr_len;
 
-	rc = small_smb_init(SMB_COM_WRITE_ANDX, 14, tcon, (void **) &pSMB);
-    
+	cFYI(1,("write2 at %lld %d bytes",offset,count)); /* BB removeme BB */
+	if(tcon->ses->capabilities & CAP_LARGE_FILES)
+		wct = 14;
+	else
+		wct = 12;
+	rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB);
 	if (rc)
 		return rc;
-	
-	pSMBr = (WRITE_RSP *)pSMB; /* BB removeme BB */
-
 	/* tcon and ses pointer are checked in smb_init */
 	if (tcon->ses->server == NULL)
 		return -ECONNABORTED;
 
-	pSMB->AndXCommand = 0xFF; /* none */
+	pSMB->AndXCommand = 0xFF;	/* none */
 	pSMB->Fid = netfid;
 	pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
-	pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
+	if(wct == 14)
+		pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
+	else if((offset >> 32) > 0) /* can not handle this big offset for old */
+		return -EIO;
 	pSMB->Reserved = 0xFFFFFFFF;
 	pSMB->WriteMode = 0;
 	pSMB->Remaining = 0;
-	bytes_sent = (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & ~0xFF;
-	if (bytes_sent > count)
-		bytes_sent = count;
-	pSMB->DataLengthHigh = 0;
+
 	pSMB->DataOffset =
 	    cpu_to_le16(offsetof(struct smb_com_write_req,Data) - 4);
 
-	byte_count = bytes_sent + 1 /* pad */ ;
-	pSMB->DataLengthLow = cpu_to_le16(bytes_sent);
-	pSMB->DataLengthHigh = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
-	pSMB->ByteCount = cpu_to_le16(byte_count);
+	pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
+	pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
+	smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */
+	if(wct == 14)
+		pSMB->hdr.smb_buf_length += count+1;
+	else /* wct == 12 */
+		pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 
+	if(wct == 14)
+		pSMB->ByteCount = cpu_to_le16(count + 1);
+	else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
+		struct smb_com_writex_req * pSMBW =
+				(struct smb_com_writex_req *)pSMB;
+		pSMBW->ByteCount = cpu_to_le16(count + 5);
+	}
+	iov[0].iov_base = pSMB;
+	iov[0].iov_len = smb_hdr_len + 4;
 
-/*	rc = SendReceive2(xid, tcon->ses, (struct smb_hdr *) pSMB,
-			 (struct smb_hdr *) pSMBr, buf, buflen, &bytes_returned, long_op); */  /* BB fixme BB */
+	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &bytes_returned,
+			  long_op);
+	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
-		cFYI(1, ("Send error in write2 (large write) = %d", rc));
+		cFYI(1, ("Send error Write2 = %d", rc));
 		*nbytes = 0;
-	} else
-		*nbytes = le16_to_cpu(pSMBr->Count);
+	} else {
+		WRITE_RSP * pSMBr = (WRITE_RSP *)pSMB;
+		*nbytes = le16_to_cpu(pSMBr->CountHigh);
+		*nbytes = (*nbytes) << 16;
+		*nbytes += le16_to_cpu(pSMBr->Count);
+	}
 
 	cifs_small_buf_release(pSMB);
 
@@ -1009,6 +1205,8 @@ int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 
 	return rc;
 }
+
+
 #endif /* CIFS_EXPERIMENTAL */
 
 int
@@ -1065,7 +1263,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, timeout);
-
+	cifs_stats_inc(&tcon->num_locks);
 	if (rc) {
 		cFYI(1, ("Send error in Lock = %d", rc));
 	}
@@ -1099,6 +1297,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
 	pSMB->ByteCount = 0;
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_closes);
 	if (rc) {
 		if(rc!=-EINTR) {
 			/* EINTR is expected when user ctl-c to kill app */
@@ -1171,16 +1370,11 @@ renameRetry:
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_renames);
 	if (rc) {
 		cFYI(1, ("Send error in rename = %d", rc));
 	} 
 
-#ifdef CONFIG_CIFS_STATS
-	  else {
-		atomic_inc(&tcon->num_renames);
-	}
-#endif
-
 	cifs_buf_release(pSMB);
 
 	if (rc == -EAGAIN)
@@ -1255,14 +1449,11 @@ int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon,
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
                          (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&pTcon->num_t2renames);
 	if (rc) {
 		cFYI(1,("Send error in Rename (by file handle) = %d", rc));
 	}
-#ifdef CONFIG_CIFS_STATS
-	  else {
-		atomic_inc(&pTcon->num_t2renames);
-	}
-#endif
+
 	cifs_buf_release(pSMB);
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
@@ -1416,6 +1607,7 @@ createSymLinkRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_symlinks);
 	if (rc) {
 		cFYI(1,
 		     ("Send error in SetPathInfo (create symlink) = %d",
@@ -1505,6 +1697,7 @@ createHardLinkRetry:
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_hardlinks);
 	if (rc) {
 		cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc));
 	}
@@ -1575,6 +1768,7 @@ winCreateHardLinkRetry:
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_hardlinks);
 	if (rc) {
 		cFYI(1, ("Send error in hard link (NT rename) = %d", rc));
 	}
@@ -1775,8 +1969,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 		}
 	}
 qreparse_out:
-	if (pSMB)
-		cifs_buf_release(pSMB);
+	cifs_buf_release(pSMB);
 
 	/* Note: On -EAGAIN error only caller can retry on handle based calls
 		since file handle passed in no longer valid */
@@ -2165,6 +2358,67 @@ GetExtAttrOut:
 
 #endif /* CONFIG_POSIX */
 
+/* Legacy Query Path Information call for lookup to old servers such
+   as Win9x/WinME */
+int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
+                 const unsigned char *searchName,
+                 FILE_ALL_INFO * pFinfo,
+                 const struct nls_table *nls_codepage, int remap)
+{
+	QUERY_INFORMATION_REQ * pSMB;
+	QUERY_INFORMATION_RSP * pSMBr;
+	int rc = 0;
+	int bytes_returned;
+	int name_len;
+
+	cFYI(1, ("In SMBQPath path %s", searchName)); 
+QInfRetry:
+	rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB,
+                      (void **) &pSMBr);
+	if (rc)
+		return rc;
+
+	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
+		name_len =
+                    cifsConvertToUCS((__le16 *) pSMB->FileName, searchName,
+                                     PATH_MAX, nls_codepage, remap);
+		name_len++;     /* trailing null */
+		name_len *= 2;
+	} else {               
+		name_len = strnlen(searchName, PATH_MAX);
+		name_len++;     /* trailing null */
+		strncpy(pSMB->FileName, searchName, name_len);
+	}
+	pSMB->BufferFormat = 0x04;
+	name_len++; /* account for buffer type byte */	
+	pSMB->hdr.smb_buf_length += (__u16) name_len;
+	pSMB->ByteCount = cpu_to_le16(name_len);
+
+	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	if (rc) {
+		cFYI(1, ("Send error in QueryInfo = %d", rc));
+	} else if (pFinfo) {            /* decode response */
+		memset(pFinfo, 0, sizeof(FILE_ALL_INFO));
+		pFinfo->AllocationSize =
+			cpu_to_le64(le32_to_cpu(pSMBr->size));
+		pFinfo->EndOfFile = pFinfo->AllocationSize;
+		pFinfo->Attributes =
+			cpu_to_le32(le16_to_cpu(pSMBr->attr));
+	} else
+		rc = -EIO; /* bad buffer passed in */
+
+	cifs_buf_release(pSMB);
+
+	if (rc == -EAGAIN)
+		goto QInfRetry;
+
+	return rc;
+}
+
+
+
+
 int
 CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
 		 const unsigned char *searchName,
@@ -2396,7 +2650,7 @@ findUniqueRetry:
 	if (rc) {
 		cFYI(1, ("Send error in FindFileDirInfo = %d", rc));
 	} else {		/* decode response */
-
+		cifs_stats_inc(&tcon->num_ffirst);
 		/* BB fill in */
 	}
 
@@ -2414,7 +2668,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
 	      const char *searchName, 
 	      const struct nls_table *nls_codepage,
 	      __u16 *	pnetfid,
-	      struct cifs_search_info * psrch_inf, int remap)
+	      struct cifs_search_info * psrch_inf, int remap, const char dirsep)
 {
 /* level 257 SMB_ */
 	TRANSACTION2_FFIRST_REQ *pSMB = NULL;
@@ -2441,7 +2695,7 @@ findFirstRetry:
 		it got remapped to 0xF03A as if it were part of the
 		directory name instead of a wildcard */
 		name_len *= 2;
-		pSMB->FileName[name_len] = '\\';
+		pSMB->FileName[name_len] = dirsep;
 		pSMB->FileName[name_len+1] = 0;
 		pSMB->FileName[name_len+2] = '*';
 		pSMB->FileName[name_len+3] = 0;
@@ -2455,7 +2709,7 @@ findFirstRetry:
 		if(name_len > buffersize-header)
 			free buffer exit; BB */
 		strncpy(pSMB->FileName, searchName, name_len);
-		pSMB->FileName[name_len] = '\\';
+		pSMB->FileName[name_len] = dirsep;
 		pSMB->FileName[name_len+1] = '*';
 		pSMB->FileName[name_len+2] = 0;
 		name_len += 3;
@@ -2496,6 +2750,7 @@ findFirstRetry:
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	cifs_stats_inc(&tcon->num_ffirst);
 
 	if (rc) {/* BB add logic to retry regular search if Unix search rejected unexpectedly by server */
 		/* BB Add code to handle unsupported level rc */
@@ -2602,6 +2857,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
 	if(name_len < PATH_MAX) {
 		memcpy(pSMB->ResumeFileName, psrch_inf->presume_name, name_len);
 		byte_count += name_len;
+		/* 14 byte parm len above enough for 2 byte null terminator */
+		pSMB->ResumeFileName[name_len] = 0;
+		pSMB->ResumeFileName[name_len+1] = 0;
 	} else {
 		rc = -EINVAL;
 		goto FNext2_err_exit;
@@ -2614,7 +2872,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
                                                                                               
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			(struct smb_hdr *) pSMBr, &bytes_returned, 0);
-                                                                                              
+	cifs_stats_inc(&tcon->num_fnext);
 	if (rc) {
 		if (rc == -EBADF) {
 			psrch_inf->endOfSearch = TRUE;
@@ -2691,6 +2949,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon, const __u16 searchHandle
 	if (rc) {
 		cERROR(1, ("Send error in FindClose = %d", rc));
 	}
+	cifs_stats_inc(&tcon->num_fclose);
 	cifs_small_buf_release(pSMB);
 
 	/* Since session is dead, search handle closed on server already */
@@ -2824,7 +3083,10 @@ getDFSRetry:
 		      (void **) &pSMBr);
 	if (rc)
 		return rc;
-
+	
+	/* server pointer checked in called function, 
+	but should never be null here anyway */
+	pSMB->hdr.Mid = GetNextMid(ses->server);
 	pSMB->hdr.Tid = ses->ipc_tid;
 	pSMB->hdr.Uid = ses->Suid;
 	if (ses->capabilities & CAP_STATUS32) {
@@ -2965,6 +3227,92 @@ GetDFSRefExit:
 	return rc;
 }
 
+/* Query File System Info such as free space to old servers such as Win 9x */
+int
+SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
+{
+/* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */
+	TRANSACTION2_QFSI_REQ *pSMB = NULL;
+	TRANSACTION2_QFSI_RSP *pSMBr = NULL;
+	FILE_SYSTEM_ALLOC_INFO *response_data;
+	int rc = 0;
+	int bytes_returned = 0;
+	__u16 params, byte_count;
+
+	cFYI(1, ("OldQFSInfo"));
+oldQFSInfoRetry:
+	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
+		(void **) &pSMBr);
+	if (rc)
+		return rc;
+	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
+		      (void **) &pSMBr);
+	if (rc)
+		return rc;
+
+	params = 2;     /* level */
+	pSMB->TotalDataCount = 0;
+	pSMB->MaxParameterCount = cpu_to_le16(2);
+	pSMB->MaxDataCount = cpu_to_le16(1000);
+	pSMB->MaxSetupCount = 0;
+	pSMB->Reserved = 0;
+	pSMB->Flags = 0;
+	pSMB->Timeout = 0;
+	pSMB->Reserved2 = 0;
+	byte_count = params + 1 /* pad */ ;
+	pSMB->TotalParameterCount = cpu_to_le16(params);
+	pSMB->ParameterCount = pSMB->TotalParameterCount;
+	pSMB->ParameterOffset = cpu_to_le16(offsetof(
+	struct smb_com_transaction2_qfsi_req, InformationLevel) - 4);
+	pSMB->DataCount = 0;
+	pSMB->DataOffset = 0;
+	pSMB->SetupCount = 1;
+	pSMB->Reserved3 = 0;
+	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
+	pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
+	pSMB->hdr.smb_buf_length += byte_count;
+	pSMB->ByteCount = cpu_to_le16(byte_count);
+
+	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	if (rc) {
+		cFYI(1, ("Send error in QFSInfo = %d", rc));
+	} else {                /* decode response */
+		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
+
+		if (rc || (pSMBr->ByteCount < 18))
+			rc = -EIO;      /* bad smb */
+		else {
+			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
+			cFYI(1,("qfsinf resp BCC: %d  Offset %d",
+				 pSMBr->ByteCount, data_offset));
+
+			response_data =
+				(FILE_SYSTEM_ALLOC_INFO *) 
+				(((char *) &pSMBr->hdr.Protocol) + data_offset);
+			FSData->f_bsize =
+				le16_to_cpu(response_data->BytesPerSector) *
+				le32_to_cpu(response_data->
+					SectorsPerAllocationUnit);
+			FSData->f_blocks =
+				le32_to_cpu(response_data->TotalAllocationUnits);
+			FSData->f_bfree = FSData->f_bavail =
+				le32_to_cpu(response_data->FreeAllocationUnits);
+			cFYI(1,
+			     ("Blocks: %lld  Free: %lld Block size %ld",
+			      (unsigned long long)FSData->f_blocks,
+			      (unsigned long long)FSData->f_bfree,
+			      FSData->f_bsize));
+		}
+	}
+	cifs_buf_release(pSMB);
+
+	if (rc == -EAGAIN)
+		goto oldQFSInfoRetry;
+
+	return rc;
+}
+
 int
 CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
 {
@@ -2986,7 +3334,7 @@ QFSInfoRetry:
 	params = 2;	/* level */
 	pSMB->TotalDataCount = 0;
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find exact max SMB PDU from sess structure BB */
+	pSMB->MaxDataCount = cpu_to_le16(1000);
 	pSMB->MaxSetupCount = 0;
 	pSMB->Reserved = 0;
 	pSMB->Flags = 0;
@@ -3009,17 +3357,14 @@ QFSInfoRetry:
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
 	if (rc) {
-		cERROR(1, ("Send error in QFSInfo = %d", rc));
+		cFYI(1, ("Send error in QFSInfo = %d", rc));
 	} else {		/* decode response */
                 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 24)) /* BB alsO CHEck enough total bytes returned */
+		if (rc || (pSMBr->ByteCount < 24))
 			rc = -EIO;	/* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
-			cFYI(1,
-				("Decoding qfsinfo response.  BCC: %d  Offset %d",
-				pSMBr->ByteCount, data_offset));
 
 			response_data =
 			    (FILE_SYSTEM_INFO
@@ -3254,6 +3599,77 @@ QFSUnixRetry:
 	return rc;
 }
 
+int
+CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
+{
+/* level 0x200  SMB_SET_CIFS_UNIX_INFO */
+	TRANSACTION2_SETFSI_REQ *pSMB = NULL;
+	TRANSACTION2_SETFSI_RSP *pSMBr = NULL;
+	int rc = 0;
+	int bytes_returned = 0;
+	__u16 params, param_offset, offset, byte_count;
+
+	cFYI(1, ("In SETFSUnixInfo"));
+SETFSUnixRetry:
+	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
+		      (void **) &pSMBr);
+	if (rc)
+		return rc;
+
+	params = 4;	/* 2 bytes zero followed by info level. */
+	pSMB->MaxSetupCount = 0;
+	pSMB->Reserved = 0;
+	pSMB->Flags = 0;
+	pSMB->Timeout = 0;
+	pSMB->Reserved2 = 0;
+	param_offset = offsetof(struct smb_com_transaction2_setfsi_req, FileNum) - 4;
+	offset = param_offset + params;
+
+	pSMB->MaxParameterCount = cpu_to_le16(4);
+	pSMB->MaxDataCount = cpu_to_le16(100);	/* BB find exact max SMB PDU from sess structure BB */
+	pSMB->SetupCount = 1;
+	pSMB->Reserved3 = 0;
+	pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FS_INFORMATION);
+	byte_count = 1 /* pad */ + params + 12;
+
+	pSMB->DataCount = cpu_to_le16(12);
+	pSMB->ParameterCount = cpu_to_le16(params);
+	pSMB->TotalDataCount = pSMB->DataCount;
+	pSMB->TotalParameterCount = pSMB->ParameterCount;
+	pSMB->ParameterOffset = cpu_to_le16(param_offset);
+	pSMB->DataOffset = cpu_to_le16(offset);
+
+	/* Params. */
+	pSMB->FileNum = 0;
+	pSMB->InformationLevel = cpu_to_le16(SMB_SET_CIFS_UNIX_INFO);
+
+	/* Data. */
+	pSMB->ClientUnixMajor = cpu_to_le16(CIFS_UNIX_MAJOR_VERSION);
+	pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
+	pSMB->ClientUnixCap = cpu_to_le64(cap);
+
+	pSMB->hdr.smb_buf_length += byte_count;
+	pSMB->ByteCount = cpu_to_le16(byte_count);
+
+	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+	if (rc) {
+		cERROR(1, ("Send error in SETFSUnixInfo = %d", rc));
+	} else {		/* decode response */
+		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
+		if (rc) {
+			rc = -EIO;	/* bad smb */
+		}
+	}
+	cifs_buf_release(pSMB);
+
+	if (rc == -EAGAIN)
+		goto SETFSUnixRetry;
+
+	return rc;
+}
+
+
 
 int
 CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
@@ -3318,16 +3734,16 @@ QFSPosixRetry:
 					le64_to_cpu(response_data->TotalBlocks);
 			FSData->f_bfree =
 			    le64_to_cpu(response_data->BlocksAvail);
-			if(response_data->UserBlocksAvail == -1) {
+			if(response_data->UserBlocksAvail == cpu_to_le64(-1)) {
 				FSData->f_bavail = FSData->f_bfree;
 			} else {
 				FSData->f_bavail =
 					le64_to_cpu(response_data->UserBlocksAvail);
 			}
-			if(response_data->TotalFileNodes != -1)
+			if(response_data->TotalFileNodes != cpu_to_le64(-1))
 				FSData->f_files =
 					le64_to_cpu(response_data->TotalFileNodes);
-			if(response_data->FreeFileNodes != -1)
+			if(response_data->FreeFileNodes != cpu_to_le64(-1))
 				FSData->f_ffree =
 					le64_to_cpu(response_data->FreeFileNodes);
 		}
@@ -3373,7 +3789,7 @@ SetEOFRetry:
 				     PATH_MAX, nls_codepage, remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
-	} else {		/* BB improve the check for buffer overruns BB */
+	} else {	/* BB improve the check for buffer overruns BB */
 		name_len = strnlen(fileName, PATH_MAX);
 		name_len++;	/* trailing null */
 		strncpy(pSMB->FileName, fileName, name_len);
@@ -3381,7 +3797,7 @@ SetEOFRetry:
 	params = 6 + name_len;
 	data_count = sizeof (struct file_end_of_file_info);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find max SMB size from sess */
+	pSMB->MaxDataCount = cpu_to_le16(4100);
 	pSMB->MaxSetupCount = 0;
 	pSMB->Reserved = 0;
 	pSMB->Flags = 0;
@@ -3763,7 +4179,7 @@ setPermsRetry:
 				     PATH_MAX, nls_codepage, remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
-	} else {		/* BB improve the check for buffer overruns BB */
+	} else {	/* BB improve the check for buffer overruns BB */
 		name_len = strnlen(fileName, PATH_MAX);
 		name_len++;	/* trailing null */
 		strncpy(pSMB->FileName, fileName, name_len);
@@ -3836,12 +4252,14 @@ setPermsRetry:
 }
 
 int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, 
-			const int notify_subdirs, const __u16 netfid,
-			__u32 filter, const struct nls_table *nls_codepage)
+		  const int notify_subdirs, const __u16 netfid,
+		  __u32 filter, struct file * pfile, int multishot, 
+		  const struct nls_table *nls_codepage)
 {
 	int rc = 0;
 	struct smb_com_transaction_change_notify_req * pSMB = NULL;
 	struct smb_com_transaction_change_notify_rsp * pSMBr = NULL;
+	struct dir_notify_req *dnotify_req;
 	int bytes_returned;
 
 	cFYI(1, ("In CIFSSMBNotify for file handle %d",(int)netfid));
@@ -3874,6 +4292,28 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
 			(struct smb_hdr *) pSMBr, &bytes_returned, -1);
 	if (rc) {
 		cFYI(1, ("Error in Notify = %d", rc));
+	} else {
+		/* Add file to outstanding requests */
+		/* BB change to kmem cache alloc */	
+		dnotify_req = (struct dir_notify_req *) kmalloc(
+						sizeof(struct dir_notify_req),
+						 GFP_KERNEL);
+		if(dnotify_req) {
+			dnotify_req->Pid = pSMB->hdr.Pid;
+			dnotify_req->PidHigh = pSMB->hdr.PidHigh;
+			dnotify_req->Mid = pSMB->hdr.Mid;
+			dnotify_req->Tid = pSMB->hdr.Tid;
+			dnotify_req->Uid = pSMB->hdr.Uid;
+			dnotify_req->netfid = netfid;
+			dnotify_req->pfile = pfile;
+			dnotify_req->filter = filter;
+			dnotify_req->multishot = multishot;
+			spin_lock(&GlobalMid_Lock);
+			list_add_tail(&dnotify_req->lhead, 
+					&GlobalDnotifyReqList);
+			spin_unlock(&GlobalMid_Lock);
+		} else 
+			rc = -ENOMEM;
 	}
 	cifs_buf_release(pSMB);
 	return rc;	
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e568cc47a7f..d74367a08d5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -29,6 +29,8 @@
 #include <linux/utsname.h>
 #include <linux/mempool.h>
 #include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/pagevec.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include "cifspdu.h"
@@ -44,6 +46,8 @@
 #define CIFS_PORT 445
 #define RFC1001_PORT 139
 
+static DECLARE_COMPLETION(cifsd_complete);
+
 extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
 		       unsigned char *p24);
 extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
@@ -60,6 +64,7 @@ struct smb_vol {
 	char *in6_addr;  /* ipv6 address as human readable form of in6_addr */
 	char *iocharset;  /* local code page for mapping to and from Unicode */
 	char source_rfc1001_name[16]; /* netbios name of client */
+	char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
 	uid_t linux_uid;
 	gid_t linux_gid;
 	mode_t file_mode;
@@ -74,6 +79,10 @@ struct smb_vol {
 	unsigned server_ino:1; /* use inode numbers from server ie UniqueId */
 	unsigned direct_io:1;
 	unsigned remap:1;   /* set to remap seven reserved chars in filenames */
+	unsigned posix_paths:1;   /* unset to not ask for posix pathnames. */
+	unsigned sfu_emul:1;
+	unsigned nocase;     /* request case insensitive filenames */
+	unsigned nobrl;      /* disable sending byte range locks to srv */
 	unsigned int rsize;
 	unsigned int wsize;
 	unsigned int sockopt;
@@ -82,7 +91,8 @@ struct smb_vol {
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
 			struct socket **csocket,
-			char * netb_name);
+			char * netb_name,
+			char * server_netb_name);
 static int ipv6_connect(struct sockaddr_in6 *psin_server, 
 			struct socket **csocket);
 
@@ -175,9 +185,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		} else {
 			rc = ipv4_connect(&server->addr.sockAddr, 
 					&server->ssocket,
-					server->workstation_RFC1001_name);
+					server->workstation_RFC1001_name,
+					server->server_RFC1001_name);
 		}
 		if(rc) {
+			cFYI(1,("reconnect error %d",rc));
 			msleep(3000);
 		} else {
 			atomic_inc(&tcpSesReconnectCount);
@@ -293,12 +305,12 @@ static int coalesce_t2(struct smb_hdr * psecond, struct smb_hdr *pTargetSMB)
 	byte_count += total_in_buf2;
 	BCC_LE(pTargetSMB) = cpu_to_le16(byte_count);
 
-	byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
+	byte_count = pTargetSMB->smb_buf_length;
 	byte_count += total_in_buf2;
 
 	/* BB also add check that we are not beyond maximum buffer size */
 		
-	pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
+	pTargetSMB->smb_buf_length = byte_count;
 
 	if(remaining == total_in_buf2) {
 		cFYI(1,("found the last secondary response"));
@@ -323,7 +335,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 	struct cifsSesInfo *ses;
 	struct task_struct *task_to_wake = NULL;
 	struct mid_q_entry *mid_entry;
-	char *temp;
+	char temp;
 	int isLargeBuf = FALSE;
 	int isMultiRsp;
 	int reconnect;
@@ -337,6 +349,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 	atomic_inc(&tcpSesAllocCount);
 	length = tcpSesAllocCount.counter;
 	write_unlock(&GlobalSMBSeslock);
+	complete(&cifsd_complete);
 	if(length  > 1) {
 		mempool_resize(cifs_req_poolp,
 			length + cifs_min_rcv,
@@ -344,6 +357,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 	}
 
 	while (server->tcpStatus != CifsExiting) {
+		if (try_to_freeze())
+			continue;
 		if (bigbuf == NULL) {
 			bigbuf = cifs_buf_get();
 			if(bigbuf == NULL) {
@@ -422,22 +437,32 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			continue;
 		}
 
-		/* the right amount was read from socket - 4 bytes */
+		/* The right amount was read from socket - 4 bytes */
+		/* so we can now interpret the length field */
+
+		/* the first byte big endian of the length field,
+		is actually not part of the length but the type
+		with the most common, zero, as regular data */
+		temp = *((char *) smb_buffer);
 
+		/* Note that FC 1001 length is big endian on the wire, 
+		but we convert it here so it is always manipulated
+		as host byte order */
 		pdu_length = ntohl(smb_buffer->smb_buf_length);
-		cFYI(1,("rfc1002 length(big endian)0x%x)", pdu_length+4));
+		smb_buffer->smb_buf_length = pdu_length;
 
-		temp = (char *) smb_buffer;
-		if (temp[0] == (char) RFC1002_SESSION_KEEP_ALIVE) {
+		cFYI(1,("rfc1002 length 0x%x)", pdu_length+4));
+
+		if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) {
 			continue; 
-		} else if (temp[0] == (char)RFC1002_POSITIVE_SESSION_RESPONSE) {
+		} else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) {
 			cFYI(1,("Good RFC 1002 session rsp"));
 			continue;
-		} else if (temp[0] == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) {
+		} else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) {
 			/* we get this from Windows 98 instead of 
 			   an error on SMB negprot response */
 			cFYI(1,("Negative RFC1002 Session Response Error 0x%x)",
-				temp[4]));
+				pdu_length));
 			if(server->tcpStatus == CifsNew) {
 				/* if nack on negprot (rather than 
 				ret of smb negprot error) reconnecting
@@ -459,9 +484,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 				wake_up(&server->response_q);
 				continue;
 			}
-		} else if (temp[0] != (char) 0) {
+		} else if (temp != (char) 0) {
 			cERROR(1,("Unknown RFC 1002 frame"));
-			cifs_dump_mem(" Received Data: ", temp, length);
+			cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
+				      length);
 			cifs_reconnect(server);
 			csocket = server->ssocket;
 			continue;
@@ -531,7 +557,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
 		dump_smb(smb_buffer, length);
 		if (checkSMB (smb_buffer, smb_buffer->Mid, total_read+4)) {
-			cERROR(1, ("Bad SMB Received "));
+			cifs_dump_mem("Bad SMB: ", smb_buffer, 48);
 			continue;
 		}
 
@@ -579,6 +605,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 multi_t2_fnd:
 				task_to_wake = mid_entry->tsk;
 				mid_entry->midState = MID_RESPONSE_RECEIVED;
+#ifdef CONFIG_CIFS_STATS2
+				mid_entry->when_received = jiffies;
+#endif
 				break;
 			}
 		}
@@ -596,7 +625,8 @@ multi_t2_fnd:
 		} else if ((is_valid_oplock_break(smb_buffer) == FALSE)
 		    && (isMultiRsp == FALSE)) {                          
 			cERROR(1, ("No task to wake, unknown frame rcvd!"));
-			cifs_dump_mem("Received Data is: ",temp,sizeof(struct smb_hdr));
+			cifs_dump_mem("Received Data is: ",(char *)smb_buffer,
+				      sizeof(struct smb_hdr));
 		}
 	} /* end while !EXITING */
 
@@ -674,7 +704,7 @@ multi_t2_fnd:
 		msleep(125);
 	}
 
-	if (list_empty(&server->pending_mid_q)) {
+	if (!list_empty(&server->pending_mid_q)) {
 		/* mpx threads have not exited yet give them 
 		at least the smb send timeout time for long ops */
 		/* due to delays on oplock break requests, we need
@@ -711,7 +741,7 @@ multi_t2_fnd:
 			GFP_KERNEL);
 	}
 	
-	msleep(250);
+	complete_and_exit(&cifsd_complete, 0);
 	return 0;
 }
 
@@ -735,7 +765,9 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 			toupper(system_utsname.nodename[i]);
 	}
 	vol->source_rfc1001_name[15] = 0;
-
+	/* null target name indicates to use *SMBSERVR default called name
+	   if we end up sending RFC1001 session initialize */
+	vol->target_rfc1001_name[0] = 0;
 	vol->linux_uid = current->uid;	/* current->euid instead? */
 	vol->linux_gid = current->gid;
 	vol->dir_mode = S_IRWXUGO;
@@ -745,6 +777,9 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
 	vol->rw = TRUE;
 
+	/* default is always to request posix paths. */
+	vol->posix_paths = 1;
+
 	if (!options)
 		return 1;
 
@@ -836,7 +871,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				/* go from value to value + temp_len condensing 
 				double commas to singles. Note that this ends up
 				allocating a few bytes too many, which is ok */
-				vol->password = kcalloc(1, temp_len, GFP_KERNEL);
+				vol->password = kzalloc(temp_len, GFP_KERNEL);
 				if(vol->password == NULL) {
 					printk("CIFS: no memory for pass\n");
 					return 1;
@@ -851,7 +886,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				}
 				vol->password[j] = 0;
 			} else {
-				vol->password = kcalloc(1, temp_len+1, GFP_KERNEL);
+				vol->password = kzalloc(temp_len+1, GFP_KERNEL);
 				if(vol->password == NULL) {
 					printk("CIFS: no memory for pass\n");
 					return 1;
@@ -985,7 +1020,31 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				/* The string has 16th byte zero still from
 				set at top of the function  */
 				if((i==15) && (value[i] != 0))
-					printk(KERN_WARNING "CIFS: netbiosname longer than 15 and was truncated.\n");
+					printk(KERN_WARNING "CIFS: netbiosname longer than 15 truncated.\n");
+			}
+		} else if (strnicmp(data, "servern", 7) == 0) {
+			/* servernetbiosname specified override *SMBSERVER */
+			if (!value || !*value || (*value == ' ')) {
+				cFYI(1,("empty server netbiosname specified"));
+			} else {
+				/* last byte, type, is 0x20 for servr type */
+				memset(vol->target_rfc1001_name,0x20,16);
+
+				for(i=0;i<15;i++) {
+				/* BB are there cases in which a comma can be
+				   valid in this workstation netbios name (and need
+				   special handling)? */
+
+				/* user or mount helper must uppercase netbiosname */
+					if (value[i]==0)
+						break;
+					else
+						vol->target_rfc1001_name[i] = value[i];
+				}
+				/* The string has 16th byte zero still from
+				   set at top of the function  */
+				if((i==15) && (value[i] != 0))
+					printk(KERN_WARNING "CIFS: server netbiosname longer than 15 truncated.\n");
 			}
 		} else if (strnicmp(data, "credentials", 4) == 0) {
 			/* ignore */
@@ -1023,6 +1082,27 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 			vol->remap = 1;
 		} else if (strnicmp(data, "nomapchars", 10) == 0) {
 			vol->remap = 0;
+                } else if (strnicmp(data, "sfu", 3) == 0) {
+                        vol->sfu_emul = 1;
+                } else if (strnicmp(data, "nosfu", 5) == 0) {
+                        vol->sfu_emul = 0;
+		} else if (strnicmp(data, "posixpaths", 10) == 0) {
+			vol->posix_paths = 1;
+		} else if (strnicmp(data, "noposixpaths", 12) == 0) {
+			vol->posix_paths = 0;
+                } else if ((strnicmp(data, "nocase", 6) == 0) ||
+			   (strnicmp(data, "ignorecase", 10)  == 0)) {
+                        vol->nocase = 1;
+		} else if (strnicmp(data, "brl", 3) == 0) {
+			vol->nobrl =  0;
+		} else if ((strnicmp(data, "nobrl", 5) == 0) || 
+			   (strnicmp(data, "nolock", 6) == 0)) {
+			vol->nobrl =  1;
+			/* turn off mandatory locking in mode
+			if remote locking is turned off since the
+			local vfs will do advisory */
+			if(vol->file_mode == (S_IALLUGO & ~(S_ISUID | S_IXGRP)))
+				vol->file_mode = S_IALLUGO;
 		} else if (strnicmp(data, "setuids", 7) == 0) {
 			vol->setuids = 1;
 		} else if (strnicmp(data, "nosetuids", 9) == 0) {
@@ -1242,7 +1322,7 @@ static void rfc1002mangle(char * target,char * source, unsigned int length)
 
 static int
 ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 
-			 char * netbios_name)
+	     char * netbios_name, char * target_name)
 {
 	int rc = 0;
 	int connected = 0;
@@ -1307,21 +1387,33 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
 	/* Eventually check for other socket options to change from 
 		the default. sock_setsockopt not used because it expects 
 		user space buffer */
+	 cFYI(1,("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",(*csocket)->sk->sk_sndbuf,
+		 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
 	(*csocket)->sk->sk_rcvtimeo = 7 * HZ;
+	/* make the bufsizes depend on wsize/rsize and max requests */
+	if((*csocket)->sk->sk_sndbuf < (200 * 1024))
+		(*csocket)->sk->sk_sndbuf = 200 * 1024;
+	if((*csocket)->sk->sk_rcvbuf < (140 * 1024))
+		(*csocket)->sk->sk_rcvbuf = 140 * 1024;
 
 	/* send RFC1001 sessinit */
-
 	if(psin_server->sin_port == htons(RFC1001_PORT)) {
 		/* some servers require RFC1001 sessinit before sending
 		negprot - BB check reconnection in case where second 
 		sessinit is sent but no second negprot */
 		struct rfc1002_session_packet * ses_init_buf;
 		struct smb_hdr * smb_buf;
-		ses_init_buf = kcalloc(1, sizeof(struct rfc1002_session_packet), GFP_KERNEL);
+		ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL);
 		if(ses_init_buf) {
 			ses_init_buf->trailer.session_req.called_len = 32;
-			rfc1002mangle(ses_init_buf->trailer.session_req.called_name,
-				DEFAULT_CIFS_CALLED_NAME,16);
+			if(target_name && (target_name[0] != 0)) {
+				rfc1002mangle(ses_init_buf->trailer.session_req.called_name,
+					target_name, 16);
+			} else {
+				rfc1002mangle(ses_init_buf->trailer.session_req.called_name,
+					DEFAULT_CIFS_CALLED_NAME,16);
+			}
+
 			ses_init_buf->trailer.session_req.calling_len = 32;
 			/* calling name ends in null (byte 16) from old smb
 			convention. */
@@ -1554,7 +1646,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			sin_server.sin_port = htons(volume_info.port);
 		else
 			sin_server.sin_port = 0;
-		rc = ipv4_connect(&sin_server,&csocket,volume_info.source_rfc1001_name);
+		rc = ipv4_connect(&sin_server,&csocket,
+				  volume_info.source_rfc1001_name,
+				  volume_info.target_rfc1001_name);
 		if (rc < 0) {
 			cERROR(1,
 			       ("Error connecting to IPv4 socket. Aborting operation"));
@@ -1604,9 +1698,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 					kfree(volume_info.password);
 				FreeXid(xid);
 				return rc;
-			} else
-				rc = 0;
+			}
+			wait_for_completion(&cifsd_complete);
+			rc = 0;
 			memcpy(srvTcp->workstation_RFC1001_name, volume_info.source_rfc1001_name,16);
+			memcpy(srvTcp->server_RFC1001_name, volume_info.target_rfc1001_name,16);
 			srvTcp->sequence_number = 0;
 		}
 	}
@@ -1651,17 +1747,27 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
     
 	/* search for existing tcon to this server share */
 	if (!rc) {
-		if((volume_info.rsize) && (volume_info.rsize <= CIFSMaxBufSize))
+		if(volume_info.rsize > CIFSMaxBufSize) {
+			cERROR(1,("rsize %d too large, using MaxBufSize",
+				volume_info.rsize));
+			cifs_sb->rsize = CIFSMaxBufSize;
+		} else if((volume_info.rsize) && (volume_info.rsize <= CIFSMaxBufSize))
 			cifs_sb->rsize = volume_info.rsize;
-		else
-			cifs_sb->rsize = srvTcp->maxBuf - MAX_CIFS_HDR_SIZE; /* default */
-		if((volume_info.wsize) && (volume_info.wsize <= CIFSMaxBufSize))
+		else /* default */
+			cifs_sb->rsize = CIFSMaxBufSize;
+
+		if(volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
+			cERROR(1,("wsize %d too large using 4096 instead",
+				  volume_info.wsize));
+			cifs_sb->wsize = 4096;
+		} else if(volume_info.wsize)
 			cifs_sb->wsize = volume_info.wsize;
 		else
 			cifs_sb->wsize = CIFSMaxBufSize; /* default */
 		if(cifs_sb->rsize < PAGE_CACHE_SIZE) {
-			cifs_sb->rsize = PAGE_CACHE_SIZE;
-			cERROR(1,("Attempt to set readsize for mount to less than one page (4096)"));
+			cifs_sb->rsize = PAGE_CACHE_SIZE; 
+			/* Windows ME does this */
+			cFYI(1,("Attempt to set readsize for mount to less than one page (4096)"));
 		}
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
@@ -1679,8 +1785,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
 		if(volume_info.no_xattr)
 			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
+		if(volume_info.sfu_emul)
+			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
+		if(volume_info.nobrl)
+			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+
 		if(volume_info.direct_io) {
-			cERROR(1,("mounting share using direct i/o"));
+			cFYI(1,("mounting share using direct i/o"));
 			cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
 		}
 
@@ -1694,6 +1805,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			   to the same server share the last value passed in 
 			   for the retry flag is used */
 			tcon->retry = volume_info.retry;
+			tcon->nocase = volume_info.nocase;
 		} else {
 			tcon = tconInfoAlloc();
 			if (tcon == NULL)
@@ -1722,6 +1834,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				if (!rc) {
 					atomic_inc(&pSesInfo->inUse);
 					tcon->retry = volume_info.retry;
+					tcon->nocase = volume_info.nocase;
 				}
 			}
 		}
@@ -1743,8 +1856,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			spin_lock(&GlobalMid_Lock);
 			srvTcp->tcpStatus = CifsExiting;
 			spin_unlock(&GlobalMid_Lock);
-			if(srvTcp->tsk)
+			if(srvTcp->tsk) {
 				send_sig(SIGKILL,srvTcp->tsk,1);
+				wait_for_completion(&cifsd_complete);
+			}
 		}
 		 /* If find_unc succeeded then rc == 0 so we can not end */
 		if (tcon)  /* up accidently freeing someone elses tcon struct */
@@ -1757,8 +1872,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 					temp_rc = CIFSSMBLogoff(xid, pSesInfo);
 					/* if the socketUseCount is now zero */
 					if((temp_rc == -ESHUTDOWN) &&
-					   (pSesInfo->server->tsk))
+					   (pSesInfo->server->tsk)) {
 						send_sig(SIGKILL,pSesInfo->server->tsk,1);
+						wait_for_completion(&cifsd_complete);
+					}
 				} else
 					cFYI(1, ("No session or bad tcon"));
 				sesInfoFree(pSesInfo);
@@ -1781,8 +1898,27 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 						cFYI(1,("server negotiated posix acl support"));
 						sb->s_flags |= MS_POSIXACL;
 				}
+
+				/* Try and negotiate POSIX pathnames if we can. */
+				if (volume_info.posix_paths && (CIFS_UNIX_POSIX_PATHNAMES_CAP &
+				    le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+					if (!CIFSSMBSetFSUnixInfo(xid, tcon, CIFS_UNIX_POSIX_PATHNAMES_CAP))  {
+						cFYI(1,("negotiated posix pathnames support"));
+						cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+					} else {
+						cFYI(1,("posix pathnames support requested but not supported"));
+					}
+				}
 			}
 		}
+		if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
+			cifs_sb->wsize = min(cifs_sb->wsize,
+					     (tcon->ses->server->maxBuf -
+					      MAX_CIFS_HDR_SIZE));
+		if (!(tcon->ses->capabilities & CAP_LARGE_READ_X))
+                        cifs_sb->rsize = min(cifs_sb->rsize,
+                                             (tcon->ses->server->maxBuf -
+                                              MAX_CIFS_HDR_SIZE));
 	}
 
 	/* volume_info.password is freed above when existing session found
@@ -1830,6 +1966,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
 			NULL /* no tCon exists yet */ , 13 /* wct */ );
 
+	smb_buffer->Mid = GetNextMid(ses->server);
 	pSMB->req_no_secext.AndXCommand = 0xFF;
 	pSMB->req_no_secext.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
 	pSMB->req_no_secext.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
@@ -1964,7 +2101,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 /* We look for obvious messed up bcc or strings in response so we do not go off
    the end since (at least) WIN2K and Windows XP have a major bug in not null
    terminating last Unicode string in response  */
-				ses->serverOS = kcalloc(1, 2 * (len + 1), GFP_KERNEL);
+				ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL);
 				if(ses->serverOS == NULL)
 					goto sesssetup_nomem;
 				cifs_strfromUCS_le(ses->serverOS,
@@ -1976,7 +2113,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 				if (remaining_words > 0) {
 					len = UniStrnlen((wchar_t *)bcc_ptr,
 							 remaining_words-1);
-					ses->serverNOS = kcalloc(1, 2 * (len + 1),GFP_KERNEL);
+					ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
 					if(ses->serverNOS == NULL)
 						goto sesssetup_nomem;
 					cifs_strfromUCS_le(ses->serverNOS,
@@ -1994,7 +2131,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
           /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
 						ses->serverDomain =
-						    kcalloc(1, 2*(len+1),GFP_KERNEL);
+						    kzalloc(2*(len+1),GFP_KERNEL);
 						if(ses->serverDomain == NULL)
 							goto sesssetup_nomem;
 						cifs_strfromUCS_le(ses->serverDomain,
@@ -2005,22 +2142,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					} /* else no more room so create dummy domain string */
 					else
 						ses->serverDomain = 
-							kcalloc(1, 2, GFP_KERNEL);
+							kzalloc(2, GFP_KERNEL);
 				} else {	/* no room so create dummy domain and NOS string */
 					/* if these kcallocs fail not much we
 					   can do, but better to not fail the
 					   sesssetup itself */
 					ses->serverDomain =
-					    kcalloc(1, 2, GFP_KERNEL);
+					    kzalloc(2, GFP_KERNEL);
 					ses->serverNOS =
-					    kcalloc(1, 2, GFP_KERNEL);
+					    kzalloc(2, GFP_KERNEL);
 				}
 			} else {	/* ASCII */
 				len = strnlen(bcc_ptr, 1024);
 				if (((long) bcc_ptr + len) - (long)
 				    pByteArea(smb_buffer_response)
 					    <= BCC(smb_buffer_response)) {
-					ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL);
+					ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
 					if(ses->serverOS == NULL)
 						goto sesssetup_nomem;
 					strncpy(ses->serverOS,bcc_ptr, len);
@@ -2030,7 +2167,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					bcc_ptr++;
 
 					len = strnlen(bcc_ptr, 1024);
-					ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL);
+					ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
 					if(ses->serverNOS == NULL)
 						goto sesssetup_nomem;
 					strncpy(ses->serverNOS, bcc_ptr, len);
@@ -2039,7 +2176,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					bcc_ptr++;
 
 					len = strnlen(bcc_ptr, 1024);
-					ses->serverDomain = kcalloc(1, len + 1,GFP_KERNEL);
+					ses->serverDomain = kzalloc(len + 1,GFP_KERNEL);
 					if(ses->serverDomain == NULL)
 						goto sesssetup_nomem;
 					strncpy(ses->serverDomain, bcc_ptr, len);
@@ -2105,6 +2242,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	/* send SMBsessionSetup here */
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
 			NULL /* no tCon exists yet */ , 12 /* wct */ );
+
+	smb_buffer->Mid = GetNextMid(ses->server);
 	pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	pSMB->req.AndXCommand = 0xFF;
 	pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
@@ -2240,7 +2379,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
    the end since (at least) WIN2K and Windows XP have a major bug in not null
    terminating last Unicode string in response  */
 					ses->serverOS =
-					    kcalloc(1, 2 * (len + 1), GFP_KERNEL);
+					    kzalloc(2 * (len + 1), GFP_KERNEL);
 					cifs_strfromUCS_le(ses->serverOS,
 							   (wchar_t *)
 							   bcc_ptr, len,
@@ -2254,7 +2393,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 								 remaining_words
 								 - 1);
 						ses->serverNOS =
-						    kcalloc(1, 2 * (len + 1),
+						    kzalloc(2 * (len + 1),
 							    GFP_KERNEL);
 						cifs_strfromUCS_le(ses->serverNOS,
 								   (wchar_t *)bcc_ptr,
@@ -2267,7 +2406,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						if (remaining_words > 0) {
 							len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);	
                             /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
-							ses->serverDomain = kcalloc(1, 2*(len+1),GFP_KERNEL);
+							ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
 							cifs_strfromUCS_le(ses->serverDomain,
 							     (wchar_t *)bcc_ptr, 
                                  len,
@@ -2278,10 +2417,10 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						} /* else no more room so create dummy domain string */
 						else
 							ses->serverDomain =
-							    kcalloc(1, 2,GFP_KERNEL);
+							    kzalloc(2,GFP_KERNEL);
 					} else {	/* no room so create dummy domain and NOS string */
-						ses->serverDomain = kcalloc(1, 2, GFP_KERNEL);
-						ses->serverNOS = kcalloc(1, 2, GFP_KERNEL);
+						ses->serverDomain = kzalloc(2, GFP_KERNEL);
+						ses->serverNOS = kzalloc(2, GFP_KERNEL);
 					}
 				} else {	/* ASCII */
 
@@ -2289,7 +2428,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					if (((long) bcc_ptr + len) - (long)
 					    pByteArea(smb_buffer_response)
 					    <= BCC(smb_buffer_response)) {
-						ses->serverOS = kcalloc(1, len + 1, GFP_KERNEL);
+						ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 						strncpy(ses->serverOS, bcc_ptr, len);
 
 						bcc_ptr += len;
@@ -2297,14 +2436,14 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL);
+						ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
 						strncpy(ses->serverNOS, bcc_ptr, len);
 						bcc_ptr += len;
 						bcc_ptr[0] = 0;
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						ses->serverDomain = kcalloc(1, len + 1, GFP_KERNEL);
+						ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
 						strncpy(ses->serverDomain, bcc_ptr, len);
 						bcc_ptr += len;
 						bcc_ptr[0] = 0;
@@ -2371,6 +2510,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 	/* send SMBsessionSetup here */
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
 			NULL /* no tCon exists yet */ , 12 /* wct */ );
+
+	smb_buffer->Mid = GetNextMid(ses->server);
 	pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
 
@@ -2554,7 +2695,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
    the end since (at least) WIN2K and Windows XP have a major bug in not null
    terminating last Unicode string in response  */
 					ses->serverOS =
-					    kcalloc(1, 2 * (len + 1), GFP_KERNEL);
+					    kzalloc(2 * (len + 1), GFP_KERNEL);
 					cifs_strfromUCS_le(ses->serverOS,
 							   (wchar_t *)
 							   bcc_ptr, len,
@@ -2569,7 +2710,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 								 remaining_words
 								 - 1);
 						ses->serverNOS =
-						    kcalloc(1, 2 * (len + 1),
+						    kzalloc(2 * (len + 1),
 							    GFP_KERNEL);
 						cifs_strfromUCS_le(ses->
 								   serverNOS,
@@ -2586,7 +2727,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 							len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);	
            /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
 							ses->serverDomain =
-							    kcalloc(1, 2 *
+							    kzalloc(2 *
 								    (len +
 								     1),
 								    GFP_KERNEL);
@@ -2612,13 +2753,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 						} /* else no more room so create dummy domain string */
 						else
 							ses->serverDomain =
-							    kcalloc(1, 2,
+							    kzalloc(2,
 								    GFP_KERNEL);
 					} else {	/* no room so create dummy domain and NOS string */
 						ses->serverDomain =
-						    kcalloc(1, 2, GFP_KERNEL);
+						    kzalloc(2, GFP_KERNEL);
 						ses->serverNOS =
-						    kcalloc(1, 2, GFP_KERNEL);
+						    kzalloc(2, GFP_KERNEL);
 					}
 				} else {	/* ASCII */
 					len = strnlen(bcc_ptr, 1024);
@@ -2626,7 +2767,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 					    pByteArea(smb_buffer_response)
 					    <= BCC(smb_buffer_response)) {
 						ses->serverOS =
-						    kcalloc(1, len + 1,
+						    kzalloc(len + 1,
 							    GFP_KERNEL);
 						strncpy(ses->serverOS,
 							bcc_ptr, len);
@@ -2637,7 +2778,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 
 						len = strnlen(bcc_ptr, 1024);
 						ses->serverNOS =
-						    kcalloc(1, len + 1,
+						    kzalloc(len + 1,
 							    GFP_KERNEL);
 						strncpy(ses->serverNOS, bcc_ptr, len);
 						bcc_ptr += len;
@@ -2646,7 +2787,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 
 						len = strnlen(bcc_ptr, 1024);
 						ses->serverDomain =
-						    kcalloc(1, len + 1,
+						    kzalloc(len + 1,
 							    GFP_KERNEL);
 						strncpy(ses->serverDomain, bcc_ptr, len);	
 						bcc_ptr += len;
@@ -2713,6 +2854,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	/* send SMBsessionSetup here */
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
 			NULL /* no tCon exists yet */ , 12 /* wct */ );
+
+	smb_buffer->Mid = GetNextMid(ses->server);
 	pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
 	pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	pSMB->req.AndXCommand = 0xFF;
@@ -2948,7 +3091,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
   the end since (at least) WIN2K and Windows XP have a major bug in not null
   terminating last Unicode string in response  */
 					ses->serverOS =
-					    kcalloc(1, 2 * (len + 1), GFP_KERNEL);
+					    kzalloc(2 * (len + 1), GFP_KERNEL);
 					cifs_strfromUCS_le(ses->serverOS,
 							   (wchar_t *)
 							   bcc_ptr, len,
@@ -2963,7 +3106,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 								 remaining_words
 								 - 1);
 						ses->serverNOS =
-						    kcalloc(1, 2 * (len + 1),
+						    kzalloc(2 * (len + 1),
 							    GFP_KERNEL);
 						cifs_strfromUCS_le(ses->
 								   serverNOS,
@@ -2979,7 +3122,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 							len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);	
      /* last string not always null terminated (e.g. for Windows XP & 2000) */
 							ses->serverDomain =
-							    kcalloc(1, 2 *
+							    kzalloc(2 *
 								    (len +
 								     1),
 								    GFP_KERNEL);
@@ -3004,17 +3147,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 							    = 0;
 						} /* else no more room so create dummy domain string */
 						else
-							ses->serverDomain = kcalloc(1, 2,GFP_KERNEL);
+							ses->serverDomain = kzalloc(2,GFP_KERNEL);
 					} else {  /* no room so create dummy domain and NOS string */
-						ses->serverDomain = kcalloc(1, 2, GFP_KERNEL);
-						ses->serverNOS = kcalloc(1, 2, GFP_KERNEL);
+						ses->serverDomain = kzalloc(2, GFP_KERNEL);
+						ses->serverNOS = kzalloc(2, GFP_KERNEL);
 					}
 				} else {	/* ASCII */
 					len = strnlen(bcc_ptr, 1024);
 					if (((long) bcc_ptr + len) - 
                         (long) pByteArea(smb_buffer_response) 
                             <= BCC(smb_buffer_response)) {
-						ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL);
+						ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
 						strncpy(ses->serverOS,bcc_ptr, len);
 
 						bcc_ptr += len;
@@ -3022,14 +3165,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						ses->serverNOS = kcalloc(1, len+1,GFP_KERNEL);
+						ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
 						strncpy(ses->serverNOS, bcc_ptr, len);	
 						bcc_ptr += len;
 						bcc_ptr[0] = 0;
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						ses->serverDomain = kcalloc(1, len+1,GFP_KERNEL);
+						ses->serverDomain = kzalloc(len+1,GFP_KERNEL);
 						strncpy(ses->serverDomain, bcc_ptr, len);
 						bcc_ptr += len;
 						bcc_ptr[0] = 0;
@@ -3084,6 +3227,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 
 	header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
 			NULL /*no tid */ , 4 /*wct */ );
+
+	smb_buffer->Mid = GetNextMid(ses->server);
 	smb_buffer->Uid = ses->Suid;
 	pSMB = (TCONX_REQ *) smb_buffer;
 	pSMBr = (TCONX_RSP *) smb_buffer_response;
@@ -3141,7 +3286,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 				if(tcon->nativeFileSystem)
 					kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
-				    kcalloc(1, length + 2, GFP_KERNEL);
+				    kzalloc(length + 2, GFP_KERNEL);
 				cifs_strfromUCS_le(tcon->nativeFileSystem,
 						   (wchar_t *) bcc_ptr,
 						   length, nls_codepage);
@@ -3159,7 +3304,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 				if(tcon->nativeFileSystem)
 					kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
-				    kcalloc(1, length + 1, GFP_KERNEL);
+				    kzalloc(length + 1, GFP_KERNEL);
 				strncpy(tcon->nativeFileSystem, bcc_ptr,
 					length);
 			}
@@ -3205,8 +3350,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 				return 0;
 			} else if (rc == -ESHUTDOWN) {
 				cFYI(1,("Waking up socket by sending it signal"));
-				if(cifsd_task)
+				if(cifsd_task) {
 					send_sig(SIGKILL,cifsd_task,1);
+					wait_for_completion(&cifsd_complete);
+				}
 				rc = 0;
 			} /* else - we have an smb session
 				left on this socket do not kill cifsd */
@@ -3215,10 +3362,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	}
 	
 	cifs_sb->tcon = NULL;
-	if (ses) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 2);
-	}
+	if (ses)
+		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
 		sesInfoFree(ses);
 
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3f3538d4a1f..8dfe717a332 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -48,6 +48,7 @@ build_path_from_dentry(struct dentry *direntry)
 	struct dentry *temp;
 	int namelen = 0;
 	char *full_path;
+	char dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
 
 	if(direntry == NULL)
 		return NULL;  /* not much we can do if dentry is freed and
@@ -74,7 +75,7 @@ cifs_bp_rename_retry:
 		if (namelen < 0) {
 			break;
 		} else {
-			full_path[namelen] = '\\';
+			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
 			cFYI(0, (" name: %s ", full_path + namelen));
@@ -145,24 +146,23 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		return -ENOMEM;
 	}
 
-	if(nd) {
-		if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
-			desiredAccess = GENERIC_READ;
-		else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) {
-			desiredAccess = GENERIC_WRITE;
-			write_only = TRUE;
-		} else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
-			/* GENERIC_ALL is too much permission to request */
-			/* can cause unnecessary access denied on create */
-			/* desiredAccess = GENERIC_ALL; */
-			desiredAccess = GENERIC_READ | GENERIC_WRITE;
+	if(nd && (nd->flags & LOOKUP_OPEN)) {
+		int oflags = nd->intent.open.flags;
+
+		desiredAccess = 0;
+		if (oflags & FMODE_READ)
+			desiredAccess |= GENERIC_READ;
+		if (oflags & FMODE_WRITE) {
+			desiredAccess |= GENERIC_WRITE;
+			if (!(oflags & FMODE_READ))
+				write_only = TRUE;
 		}
 
-		if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+		if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 			disposition = FILE_CREATE;
-		else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+		else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 			disposition = FILE_OVERWRITE_IF;
-		else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
+		else if((oflags & O_CREAT) == O_CREAT)
 			disposition = FILE_OPEN_IF;
 		else {
 			cFYI(1,("Create flag not set in create function"));
@@ -184,6 +184,13 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			 desiredAccess, CREATE_NOT_DIR,
 			 &fileHandle, &oplock, buf, cifs_sb->local_nls,
 			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if(rc == -EIO) {
+		/* old server, retry the open legacy style */
+		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
+			desiredAccess, CREATE_NOT_DIR,
+			&fileHandle, &oplock, buf, cifs_sb->local_nls,
+			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	} 
 	if (rc) {
 		cFYI(1, ("cifs_create returned 0x%x ", rc));
 	} else {
@@ -209,7 +216,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			}
 		else {
-			/* BB implement via Windows security descriptors */
+			/* BB implement mode setting via Windows security descriptors */
 			/* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/
 			/* could set r/o dos attribute if mode & 0222 == 0 */
 		}
@@ -226,10 +233,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		}
 
 		if (rc != 0) {
-			cFYI(1,("Create worked but get_inode_info failed with rc = %d",
+			cFYI(1,
+			     ("Create worked but get_inode_info failed rc = %d",
 			      rc));
 		} else {
-			direntry->d_op = &cifs_dentry_ops;
+			if (pTcon->nocase)
+				direntry->d_op = &cifs_ci_dentry_ops;
+			else
+				direntry->d_op = &cifs_dentry_ops;
 			d_instantiate(direntry, newinode);
 		}
 		if((nd->flags & LOOKUP_OPEN) == FALSE) {
@@ -303,8 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, dev_t dev
 	up(&direntry->d_sb->s_vfs_rename_sem);
 	if(full_path == NULL)
 		rc = -ENOMEM;
-	
-	if (full_path && (pTcon->ses->capabilities & CAP_UNIX)) {
+	else if (pTcon->ses->capabilities & CAP_UNIX) {
 		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 			rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
 				mode,(__u64)current->euid,(__u64)current->egid,
@@ -322,10 +332,49 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, dev_t dev
 		if(!rc) {
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
 						inode->i_sb,xid);
-			direntry->d_op = &cifs_dentry_ops;
+			if (pTcon->nocase)
+				direntry->d_op = &cifs_ci_dentry_ops;
+			else
+				direntry->d_op = &cifs_dentry_ops;
 			if(rc == 0)
 				d_instantiate(direntry, newinode);
 		}
+	} else {
+		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+			int oplock = 0;
+			u16 fileHandle;
+			FILE_ALL_INFO * buf;
+
+			cFYI(1,("sfu compat create special file"));
+
+			buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL);
+			if(buf == NULL) {
+				kfree(full_path);
+				FreeXid(xid);
+				return -ENOMEM;
+			}
+
+			rc = CIFSSMBOpen(xid, pTcon, full_path,
+					 FILE_CREATE, /* fail if exists */
+					 GENERIC_WRITE /* BB would 
+					  WRITE_OWNER | WRITE_DAC be better? */,
+					 /* Create a file and set the
+					    file attribute to SYSTEM */
+					 CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
+					 &fileHandle, &oplock, buf,
+					 cifs_sb->local_nls,
+					 cifs_sb->mnt_cifs_flags & 
+					    CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+			if(!rc) {
+				/* BB Do not bother to decode buf since no
+				   local inode yet to put timestamps in */
+				CIFSSMBClose(xid, pTcon, fileHandle);
+				d_drop(direntry);
+			}
+			kfree(buf);
+			/* add code here to set EAs */
+		}
 	}
 
 	kfree(full_path);
@@ -382,7 +431,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 					 parent_dir_inode->i_sb,xid);
 
 	if ((rc == 0) && (newInode != NULL)) {
-		direntry->d_op = &cifs_dentry_ops;
+		if (pTcon->nocase)
+			direntry->d_op = &cifs_ci_dentry_ops;
+		else
+			direntry->d_op = &cifs_dentry_ops;
 		d_add(direntry, newInode);
 
 		/* since paths are not looked up by component - the parent directories are presumed to be good here */
@@ -441,3 +493,42 @@ struct dentry_operations cifs_dentry_ops = {
 /* d_delete:       cifs_d_delete,       *//* not needed except for debugging */
 	/* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */
 };
+
+static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
+{
+	struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+	unsigned long hash;
+	int i;
+
+	hash = init_name_hash();
+	for (i = 0; i < q->len; i++)
+		hash = partial_name_hash(nls_tolower(codepage, q->name[i]),
+					 hash);
+	q->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+static int cifs_ci_compare(struct dentry *dentry, struct qstr *a,
+			   struct qstr *b)
+{
+	struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+
+	if ((a->len == b->len) &&
+	    (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) {
+		/*
+		 * To preserve case, don't let an existing negative dentry's
+		 * case take precedence.  If a is not a negative dentry, this
+		 * should have no side effects
+		 */
+		memcpy((unsigned char *)a->name, b->name, a->len);
+		return 0;
+	}
+	return 1;
+}
+
+struct dentry_operations cifs_ci_dentry_ops = {
+	.d_revalidate = cifs_d_revalidate,
+	.d_hash = cifs_ci_hash,
+	.d_compare = cifs_ci_compare,
+};
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 7d2a9202c39..a7a47bb36bf 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -78,6 +78,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 	__u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
 	__u16 netfid;
 
+
+	if(experimEnabled == 0)
+		return 0;
+
 	xid = GetXid();
 	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
 	pTcon = cifs_sb->tcon;
@@ -100,8 +104,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 		} else {
 			filter = convert_to_cifs_notify_flags(arg);
 			if(filter != 0) {
-				rc = CIFSSMBNotify(xid, pTcon, 0 /* no subdirs */, netfid, 
-					filter, cifs_sb->local_nls);
+				rc = CIFSSMBNotify(xid, pTcon, 
+					0 /* no subdirs */, netfid,
+					filter, file, arg & DN_MULTISHOT,
+					cifs_sb->local_nls);
 			} else {
 				rc = -EINVAL;
 			}
@@ -109,7 +115,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 			it would close automatically but may be a way
 			to do it easily when inode freed or when
 			notify info is cleared/changed */
-            cERROR(1,("notify rc %d",rc));
+			cFYI(1,("notify rc %d",rc));
 		}
 	}
 	
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 30ab70ce554..da4f5e10b3c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -21,11 +21,15 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
+#include <linux/backing-dev.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
+#include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
+#include <linux/writeback.h>
+#include <linux/delay.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -47,6 +51,11 @@ static inline struct cifsFileInfo *cifs_init_private(
 	private_data->pInode = inode;
 	private_data->invalidHandle = FALSE;
 	private_data->closePend = FALSE;
+	/* we have to track num writers to the inode, since writepages
+	does not tell us which handle the write is for so there can
+	be a close (overlapping with write) of the filehandle that
+	cifs_writepages chose to use */
+	atomic_set(&private_data->wrtPending,0); 
 
 	return private_data;
 }
@@ -256,6 +265,13 @@ int cifs_open(struct inode *inode, struct file *file)
 			 CREATE_NOT_DIR, &netfid, &oplock, buf,
 			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
 				 & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (rc == -EIO) {
+		/* Old server, try legacy style OpenX */
+		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
+			desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
+			cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
+				& CIFS_MOUNT_MAP_SPECIAL_CHR);
+	}
 	if (rc) {
 		cFYI(1, ("cifs_open returned 0x%x ", rc));
 		goto out;
@@ -463,6 +479,20 @@ int cifs_close(struct inode *inode, struct file *file)
 			/* no sense reconnecting to close a file that is
 			   already closed */
 			if (pTcon->tidStatus != CifsNeedReconnect) {
+				int timeout = 2;
+				while((atomic_read(&pSMBFile->wrtPending) != 0)
+					 && (timeout < 1000) ) {
+					/* Give write a better chance to get to
+					server ahead of the close.  We do not
+					want to add a wait_q here as it would
+					increase the memory utilization as
+					the struct would be in each open file,
+					but this should give enough time to 
+					clear the socket */
+					cERROR(1,("close with pending writes"));
+					msleep(timeout);
+					timeout *= 4;
+				} 
 				write_unlock(&file->f_owner.lock);
 				rc = CIFSSMBClose(xid, pTcon,
 						  pSMBFile->netfid);
@@ -643,7 +673,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 			 netfid, length,
 			 pfLock->fl_start, numUnlock, numLock, lockType,
 			 wait_flag);
-	if (rc == 0 && (pfLock->fl_flags & FL_POSIX))
+	if (pfLock->fl_flags & FL_POSIX)
 		posix_lock_file_wait(file, pfLock);
 	FreeXid(xid);
 	return rc;
@@ -744,14 +774,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 				    15 seconds is plenty */
 	}
 
-#ifdef CONFIG_CIFS_STATS
-	if (total_written > 0) {
-		atomic_inc(&pTcon->num_writes);
-		spin_lock(&pTcon->stat_lock);
-		pTcon->bytes_written += total_written;
-		spin_unlock(&pTcon->stat_lock);
-	}
-#endif		
+	cifs_stats_bytes_written(pTcon, total_written);
 
 	/* since the write may have blocked check these pointers again */
 	if (file->f_dentry) {
@@ -791,9 +814,8 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 
 	pTcon = cifs_sb->tcon;
 
-	/* cFYI(1,
-	   (" write %d bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_dentry->d_name.name)); */
+	cFYI(1,("write %zd bytes to offset %lld of %s", write_size,
+	   *poffset, file->f_dentry->d_name.name));
 
 	if (file->private_data == NULL)
 		return -EBADF;
@@ -846,7 +868,26 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 				if (rc != 0)
 					break;
 			}
-
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+			/* BB FIXME We can not sign across two buffers yet */
+			if((experimEnabled) && ((pTcon->ses->server->secMode & 
+			 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0)) {
+				struct kvec iov[2];
+				unsigned int len;
+
+				len = min((size_t)cifs_sb->wsize,
+					  write_size - total_written);
+				/* iov[0] is reserved for smb header */
+				iov[1].iov_base = (char *)write_data +
+						  total_written;
+				iov[1].iov_len = len;
+				rc = CIFSSMBWrite2(xid, pTcon,
+						open_file->netfid, len,
+						*poffset, &bytes_written,
+						iov, 1, long_op);
+			} else
+			/* BB FIXME fixup indentation of line below */
+#endif			
 			rc = CIFSSMBWrite(xid, pTcon,
 				 open_file->netfid,
 				 min_t(const int, cifs_sb->wsize, 
@@ -867,14 +908,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 				    15 seconds is plenty */
 	}
 
-#ifdef CONFIG_CIFS_STATS
-	if (total_written > 0) {
-		atomic_inc(&pTcon->num_writes);
-		spin_lock(&pTcon->stat_lock);
-		pTcon->bytes_written += total_written;
-		spin_unlock(&pTcon->stat_lock);
-	}
-#endif		
+	cifs_stats_bytes_written(pTcon, total_written);
 
 	/* since the write may have blocked check these pointers again */
 	if (file->f_dentry) {
@@ -893,6 +927,43 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 	return total_written;
 }
 
+struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
+{
+	struct cifsFileInfo *open_file;
+	int rc;
+
+	read_lock(&GlobalSMBSeslock);
+	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+		if (open_file->closePend)
+			continue;
+		if (open_file->pfile &&
+		    ((open_file->pfile->f_flags & O_RDWR) ||
+		     (open_file->pfile->f_flags & O_WRONLY))) {
+			atomic_inc(&open_file->wrtPending);
+			read_unlock(&GlobalSMBSeslock);
+			if((open_file->invalidHandle) && 
+			   (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
+				rc = cifs_reopen_file(&cifs_inode->vfs_inode, 
+						      open_file->pfile, FALSE);
+				/* if it fails, try another handle - might be */
+				/* dangerous to hold up writepages with retry */
+				if(rc) {
+					cFYI(1,("failed on reopen file in wp"));
+					read_lock(&GlobalSMBSeslock);
+					/* can not use this handle, no write
+					pending on this one after all */
+					atomic_dec
+					     (&open_file->wrtPending);
+					continue;
+				}
+			}
+			return open_file;
+		}
+	}
+	read_unlock(&GlobalSMBSeslock);
+	return NULL;
+}
+
 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 {
 	struct address_space *mapping = page->mapping;
@@ -903,10 +974,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
 	struct inode *inode;
-	struct cifsInodeInfo *cifsInode;
-	struct cifsFileInfo *open_file = NULL;
-	struct list_head *tmp;
-	struct list_head *tmp1;
+	struct cifsFileInfo *open_file;
 
 	if (!mapping || !mapping->host)
 		return -EFAULT;
@@ -934,49 +1002,20 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	if (mapping->host->i_size - offset < (loff_t)to)
 		to = (unsigned)(mapping->host->i_size - offset); 
 
-	cifsInode = CIFS_I(mapping->host);
-	read_lock(&GlobalSMBSeslock); 
-	/* BB we should start at the end */
-	list_for_each_safe(tmp, tmp1, &cifsInode->openFileList) {            
-		open_file = list_entry(tmp, struct cifsFileInfo, flist);
-		if (open_file->closePend)
-			continue;
-		/* We check if file is open for writing first */
-		if ((open_file->pfile) && 
-		   ((open_file->pfile->f_flags & O_RDWR) || 
-			(open_file->pfile->f_flags & O_WRONLY))) {
-			read_unlock(&GlobalSMBSeslock);
-			bytes_written = cifs_write(open_file->pfile,
-						write_data, to-from,
-						&offset);
-			read_lock(&GlobalSMBSeslock);
+	open_file = find_writable_file(CIFS_I(mapping->host));
+	if (open_file) {
+		bytes_written = cifs_write(open_file->pfile, write_data,
+					   to-from, &offset);
+		atomic_dec(&open_file->wrtPending);
 		/* Does mm or vfs already set times? */
-			inode->i_atime = 
-			inode->i_mtime = current_fs_time(inode->i_sb);
-			if ((bytes_written > 0) && (offset)) {
-				rc = 0;
-			} else if (bytes_written < 0) {
-				if (rc == -EBADF) {
-				/* have seen a case in which kernel seemed to
-				   have closed/freed a file even with writes
-				   active so we might as well see if there are
-				   other file structs to try for the same
-				   inode before giving up */
-					continue;
-				} else
-					rc = bytes_written;
-			}
-			break;  /* now that we found a valid file handle and
-				   tried to write to it we are done, no sense
-				   continuing to loop looking for another */
-		}
-		if (tmp->next == NULL) {
-			cFYI(1, ("File instance %p removed", tmp));
-			break;
+		inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
+		if ((bytes_written > 0) && (offset)) {
+			rc = 0;
+		} else if (bytes_written < 0) {
+			if (rc != -EBADF)
+				rc = bytes_written;
 		}
-	}
-	read_unlock(&GlobalSMBSeslock);
-	if (open_file == NULL) {
+	} else {
 		cFYI(1, ("No writeable filehandles for inode"));
 		rc = -EIO;
 	}
@@ -985,20 +1024,207 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	return rc;
 }
 
-#if 0
+#ifdef CONFIG_CIFS_EXPERIMENTAL
 static int cifs_writepages(struct address_space *mapping,
-	struct writeback_control *wbc)
+			   struct writeback_control *wbc)
 {
-	int rc = -EFAULT;
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	unsigned int bytes_to_write;
+	unsigned int bytes_written;
+	struct cifs_sb_info *cifs_sb;
+	int done = 0;
+	pgoff_t end = -1;
+	pgoff_t index;
+	int is_range = 0;
+	struct kvec iov[32];
+	int len;
+	int n_iov = 0;
+	pgoff_t next;
+	int nr_pages;
+	__u64 offset = 0;
+	struct cifsFileInfo *open_file;
+	struct page *page;
+	struct pagevec pvec;
+	int rc = 0;
+	int scanned = 0;
 	int xid;
 
+	cifs_sb = CIFS_SB(mapping->host->i_sb);
+	
+	/*
+	 * If wsize is smaller that the page cache size, default to writing
+	 * one page at a time via cifs_writepage
+	 */
+	if (cifs_sb->wsize < PAGE_CACHE_SIZE)
+		return generic_writepages(mapping, wbc);
+
+	/* BB FIXME we do not have code to sign across multiple buffers yet,
+	   so go to older writepage style write which we can sign if needed */
+	if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
+		if(cifs_sb->tcon->ses->server->secMode &
+                          (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+			return generic_writepages(mapping, wbc);
+
+	/*
+	 * BB: Is this meaningful for a non-block-device file system?
+	 * If it is, we should test it again after we do I/O
+	 */
+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		wbc->encountered_congestion = 1;
+		return 0;
+	}
+
 	xid = GetXid();
 
-	/* Find contiguous pages then iterate through repeating
-	   call 16K write then Setpageuptodate or if LARGE_WRITE_X
-	   support then send larger writes via kevec so as to eliminate
-	   a memcpy */
+	pagevec_init(&pvec, 0);
+	if (wbc->sync_mode == WB_SYNC_NONE)
+		index = mapping->writeback_index; /* Start from prev offset */
+	else {
+		index = 0;
+		scanned = 1;
+	}
+	if (wbc->start || wbc->end) {
+		index = wbc->start >> PAGE_CACHE_SHIFT;
+		end = wbc->end >> PAGE_CACHE_SHIFT;
+		is_range = 1;
+		scanned = 1;
+	}
+retry:
+	while (!done && (index <= end) &&
+	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+			PAGECACHE_TAG_DIRTY,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
+		int first;
+		unsigned int i;
+
+		first = -1;
+		next = 0;
+		n_iov = 0;
+		bytes_to_write = 0;
+
+		for (i = 0; i < nr_pages; i++) {
+			page = pvec.pages[i];
+			/*
+			 * At this point we hold neither mapping->tree_lock nor
+			 * lock on the page itself: the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or even
+			 * swizzled back from swapper_space to tmpfs file
+			 * mapping
+			 */
+
+			if (first < 0)
+				lock_page(page);
+			else if (TestSetPageLocked(page))
+				break;
+
+			if (unlikely(page->mapping != mapping)) {
+				unlock_page(page);
+				break;
+			}
+
+			if (unlikely(is_range) && (page->index > end)) {
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+
+			if (next && (page->index != next)) {
+				/* Not next consecutive page */
+				unlock_page(page);
+				break;
+			}
+
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
+
+			if (PageWriteback(page) ||
+					!test_clear_page_dirty(page)) {
+				unlock_page(page);
+				break;
+			}
+
+			if (page_offset(page) >= mapping->host->i_size) {
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+
+			/*
+			 * BB can we get rid of this?  pages are held by pvec
+			 */
+			page_cache_get(page);
+
+			len = min(mapping->host->i_size - page_offset(page),
+				  (loff_t)PAGE_CACHE_SIZE);
+
+			/* reserve iov[0] for the smb header */
+			n_iov++;
+			iov[n_iov].iov_base = kmap(page);
+			iov[n_iov].iov_len = len;
+			bytes_to_write += len;
+
+			if (first < 0) {
+				first = i;
+				offset = page_offset(page);
+			}
+			next = page->index + 1;
+			if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
+				break;
+		}
+		if (n_iov) {
+			/* Search for a writable handle every time we call
+			 * CIFSSMBWrite2.  We can't rely on the last handle
+			 * we used to still be valid
+			 */
+			open_file = find_writable_file(CIFS_I(mapping->host));
+			if (!open_file) {
+				cERROR(1, ("No writable handles for inode"));
+				rc = -EBADF;
+			} else {
+				rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
+						   open_file->netfid,
+						   bytes_to_write, offset,
+						   &bytes_written, iov, n_iov,
+						   1);
+				atomic_dec(&open_file->wrtPending);
+				if (rc || bytes_written < bytes_to_write) {
+					cERROR(1,("Write2 ret %d, written = %d",
+						  rc, bytes_written));
+					/* BB what if continued retry is
+					   requested via mount flags? */
+					set_bit(AS_EIO, &mapping->flags);
+					SetPageError(page);
+				} else {
+					cifs_stats_bytes_written(cifs_sb->tcon,
+								 bytes_written);
+				}
+			}
+			for (i = 0; i < n_iov; i++) {
+				page = pvec.pages[first + i];
+				kunmap(page);
+				unlock_page(page);
+				page_cache_release(page);
+			}
+			if ((wbc->nr_to_write -= n_iov) <= 0)
+				done = 1;
+			index = next;
+		}
+		pagevec_release(&pvec);
+	}
+	if (!scanned && !done) {
+		/*
+		 * We hit the last page and there is more work to be done: wrap
+		 * back to the start of the file
+		 */
+		scanned = 1;
+		index = 0;
+		goto retry;
+	}
+	if (!is_range)
+		mapping->writeback_index = index;
+
 	FreeXid(xid);
+
 	return rc;
 }
 #endif
@@ -1207,12 +1433,10 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 				if (rc != 0)
 					break;
 			}
-
 			rc = CIFSSMBRead(xid, pTcon,
-				 open_file->netfid,
-				 current_read_size, *poffset,
-				 &bytes_read, &smb_read_data);
-
+					open_file->netfid,
+					current_read_size, *poffset,
+					&bytes_read, &smb_read_data);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
 			if (copy_to_user(current_offset, 
 					 smb_read_data + 4 /* RFC1001 hdr */
@@ -1235,12 +1459,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 				return rc;
 			}
 		} else {
-#ifdef CONFIG_CIFS_STATS
-			atomic_inc(&pTcon->num_reads);
-			spin_lock(&pTcon->stat_lock);
-			pTcon->bytes_read += total_read;
-			spin_unlock(&pTcon->stat_lock);
-#endif
+			cifs_stats_bytes_read(pTcon, bytes_read);
 			*poffset += bytes_read;
 		}
 	}
@@ -1280,6 +1499,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	     total_read += bytes_read, current_offset += bytes_read) {
 		current_read_size = min_t(const int, read_size - total_read,
 					  cifs_sb->rsize);
+		/* For windows me and 9x we do not want to request more
+		than it negotiated since it will refuse the read then */
+		if((pTcon->ses) && 
+			!(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
+			current_read_size = min_t(const int, current_read_size,
+					pTcon->ses->server->maxBuf - 128);
+		}
 		rc = -EAGAIN;
 		while (rc == -EAGAIN) {
 			if ((open_file->invalidHandle) && 
@@ -1289,11 +1515,10 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 				if (rc != 0)
 					break;
 			}
-
 			rc = CIFSSMBRead(xid, pTcon,
-				 open_file->netfid,
-				 current_read_size, *poffset,
-				 &bytes_read, &current_offset);
+					open_file->netfid,
+					current_read_size, *poffset,
+					&bytes_read, &current_offset);
 		}
 		if (rc || (bytes_read == 0)) {
 			if (total_read) {
@@ -1303,12 +1528,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 				return rc;
 			}
 		} else {
-#ifdef CONFIG_CIFS_STATS
-			atomic_inc(&pTcon->num_reads);
-			spin_lock(&pTcon->stat_lock);
-			pTcon->bytes_read += total_read;
-			spin_unlock(&pTcon->stat_lock);
-#endif
+			cifs_stats_bytes_read(pTcon, total_read);
 			*poffset += bytes_read;
 		}
 	}
@@ -1452,10 +1672,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 			}
 
 			rc = CIFSSMBRead(xid, pTcon,
-				open_file->netfid,
-				read_size, offset,
-				&bytes_read, &smb_read_data);
-			/* BB need to check return code here */
+					open_file->netfid,
+					read_size, offset,
+					&bytes_read, &smb_read_data);
+
+			/* BB more RC checks ? */
 			if (rc== -EAGAIN) {
 				if (smb_read_data) {
 					cifs_buf_release(smb_read_data);
@@ -1480,12 +1701,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
 
 			i +=  bytes_read >> PAGE_CACHE_SHIFT;
-#ifdef CONFIG_CIFS_STATS
-			atomic_inc(&pTcon->num_reads);
-			spin_lock(&pTcon->stat_lock);
-			pTcon->bytes_read += bytes_read;
-			spin_unlock(&pTcon->stat_lock);
-#endif
+			cifs_stats_bytes_read(pTcon, bytes_read);
 			if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
 				i++; /* account for partial page */
 
@@ -1603,40 +1819,21 @@ static int cifs_readpage(struct file *file, struct page *page)
    page caching in the current Linux kernel design */
 int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
 {
-	struct list_head *tmp;
-	struct list_head *tmp1;
 	struct cifsFileInfo *open_file = NULL;
-	int rc = TRUE;
-
-	if (cifsInode == NULL)
-		return rc;
 
-	read_lock(&GlobalSMBSeslock); 
-	list_for_each_safe(tmp, tmp1, &cifsInode->openFileList) {            
-		open_file = list_entry(tmp, struct cifsFileInfo, flist);
-		if (open_file == NULL)
-			break;
-		if (open_file->closePend)
-			continue;
-	/* We check if file is open for writing,   
-	   BB we could supplement this with a check to see if file size
-	   changes have been flushed to server - ie inode metadata dirty */
-		if ((open_file->pfile) && 
-		    ((open_file->pfile->f_flags & O_RDWR) || 
-		    (open_file->pfile->f_flags & O_WRONLY))) {
-			rc = FALSE;
-			break;
-		}
-		if (tmp->next == NULL) {
-			cFYI(1, ("File instance %p removed", tmp));
-			break;
-		}
-	}
-	read_unlock(&GlobalSMBSeslock);
-	return rc;
+	if (cifsInode)
+		open_file =  find_writable_file(cifsInode);
+ 
+	if(open_file) {
+		/* there is not actually a write pending so let
+		this handle go free and allow it to
+		be closable if needed */
+		atomic_dec(&open_file->wrtPending);
+		return 0;
+	} else
+		return 1;
 }
 
-
 static int cifs_prepare_write(struct file *file, struct page *page,
 	unsigned from, unsigned to)
 {
@@ -1676,6 +1873,9 @@ struct address_space_operations cifs_addr_ops = {
 	.readpage = cifs_readpage,
 	.readpages = cifs_readpages,
 	.writepage = cifs_writepage,
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+	.writepages = cifs_writepages,
+#endif
 	.prepare_write = cifs_prepare_write,
 	.commit_write = cifs_commit_write,
 	.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8d336a90025..912d401600f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -166,7 +166,13 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 				inode->i_fop = &cifs_file_direct_ops;
 			else
 				inode->i_fop = &cifs_file_ops;
+			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+				inode->i_fop->lock = NULL;
 			inode->i_data.a_ops = &cifs_addr_ops;
+			/* check if server can support readpages */
+			if(pTcon->ses->server->maxBuf < 
+			    4096 + MAX_CIFS_HDR_SIZE)
+				inode->i_data.a_ops->readpages = NULL;
 		} else if (S_ISDIR(inode->i_mode)) {
 			cFYI(1, (" Directory inode"));
 			inode->i_op = &cifs_dir_inode_ops;
@@ -213,8 +219,18 @@ int cifs_get_inode_info(struct inode **pinode,
 		pfindData = (FILE_ALL_INFO *)buf;
 		/* could do find first instead but this returns more info */
 		rc = CIFSSMBQPathInfo(xid, pTcon, search_path, pfindData,
-			      cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 
+			      cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
+		/* BB optimize code so we do not make the above call
+		when server claims no NT SMB support and the above call
+		failed at least once - set flag in tcon or mount */
+		if((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
+			rc = SMBQueryInformation(xid, pTcon, search_path,
+					pfindData, cifs_sb->local_nls, 
+					cifs_sb->mnt_cifs_flags &
+					  CIFS_MOUNT_MAP_SPECIAL_CHR);
+		}
+		
 	}
 	/* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
 	if (rc) {
@@ -320,6 +336,16 @@ int cifs_get_inode_info(struct inode **pinode,
 		   on dirs */
 			inode->i_mode = cifs_sb->mnt_dir_mode;
 			inode->i_mode |= S_IFDIR;
+		} else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+			   (cifsInfo->cifsAttrs & ATTR_SYSTEM) &&
+			   /* No need to le64 convert size of zero */
+			   (pfindData->EndOfFile == 0)) {
+			inode->i_mode = cifs_sb->mnt_file_mode;
+			inode->i_mode |= S_IFIFO;
+/* BB Finish for SFU style symlinks and devies */
+/*		} else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+			   (cifsInfo->cifsAttrs & ATTR_SYSTEM) && ) */
+
 		} else {
 			inode->i_mode |= S_IFREG;
 			/* treat the dos attribute of read-only as read-only
@@ -359,7 +385,12 @@ int cifs_get_inode_info(struct inode **pinode,
 				inode->i_fop = &cifs_file_direct_ops;
 			else
 				inode->i_fop = &cifs_file_ops;
+			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+				inode->i_fop->lock = NULL;
 			inode->i_data.a_ops = &cifs_addr_ops;
+			if(pTcon->ses->server->maxBuf < 
+			     4096 + MAX_CIFS_HDR_SIZE)
+				inode->i_data.a_ops->readpages = NULL;
 		} else if (S_ISDIR(inode->i_mode)) {
 			cFYI(1, (" Directory inode "));
 			inode->i_op = &cifs_dir_inode_ops;
@@ -577,7 +608,10 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			rc = cifs_get_inode_info(&newinode, full_path, NULL,
 						 inode->i_sb,xid);
 
-		direntry->d_op = &cifs_dentry_ops;
+		if (pTcon->nocase)
+			direntry->d_op = &cifs_ci_dentry_ops;
+		else
+			direntry->d_op = &cifs_dentry_ops;
 		d_instantiate(direntry, newinode);
 		if (direntry->d_inode)
 			direntry->d_inode->i_nlink = 2;
@@ -928,7 +962,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	struct cifsTconInfo *pTcon;
 	char *full_path = NULL;
 	int rc = -EACCES;
-	int found = FALSE;
 	struct cifsFileInfo *open_file = NULL;
 	FILE_BASIC_INFO time_buf;
 	int set_time = FALSE;
@@ -936,7 +969,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	__u64 uid = 0xFFFFFFFFFFFFFFFFULL;
 	__u64 gid = 0xFFFFFFFFFFFFFFFFULL;
 	struct cifsInodeInfo *cifsInode;
-	struct list_head *tmp;
 
 	xid = GetXid();
 
@@ -961,7 +993,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	filemap_fdatawait(direntry->d_inode->i_mapping);
 
 	if (attrs->ia_valid & ATTR_SIZE) {
-		read_lock(&GlobalSMBSeslock);
 		/* To avoid spurious oplock breaks from server, in the case of
 		   inodes that we already have open, avoid doing path based
 		   setting of file size if we can do it by handle.
@@ -969,40 +1000,23 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		   when the local oplock break takes longer to flush
 		   writebehind data than the SMB timeout for the SetPathInfo
 		   request would allow */
-		list_for_each(tmp, &cifsInode->openFileList) {
-			open_file = list_entry(tmp, struct cifsFileInfo,
-					       flist);
-			/* We check if file is open for writing first */
-			if ((open_file->pfile) &&
-			    ((open_file->pfile->f_flags & O_RDWR) ||
-			    (open_file->pfile->f_flags & O_WRONLY))) {
-				if (open_file->invalidHandle == FALSE) {
-					/* we found a valid, writeable network
-					   file handle to use to try to set the
-					   file size */
-					__u16 nfid = open_file->netfid;
-					__u32 npid = open_file->pid;
-					read_unlock(&GlobalSMBSeslock);
-					found = TRUE;
-					rc = CIFSSMBSetFileSize(xid, pTcon,
-						attrs->ia_size, nfid, npid,
-						FALSE);
-					cFYI(1, ("SetFileSize by handle "
-						 "(setattrs) rc = %d", rc));
-					/* Do not need reopen and retry on
-					   EAGAIN since we will retry by
-					   pathname below */
-
-					/* now that we found one valid file
-					   handle no sense continuing to loop
-					   trying others, so break here */
-					break;
-				}
+		open_file = find_writable_file(cifsInode);
+		if (open_file) {
+			__u16 nfid = open_file->netfid;
+			__u32 npid = open_file->pid;
+			rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
+						nfid, npid, FALSE);
+			atomic_dec(&open_file->wrtPending);
+			cFYI(1,("SetFSize for attrs rc = %d", rc));
+			if(rc == -EINVAL) {
+				int bytes_written;
+				rc = CIFSSMBWrite(xid, pTcon,
+						  nfid, 0, attrs->ia_size,
+						  &bytes_written, NULL, NULL,
+						  1 /* 45 seconds */);
+				cFYI(1,("Wrt seteof rc %d", rc));
 			}
 		}
-		if (found == FALSE)
-			read_unlock(&GlobalSMBSeslock);
-
 		if (rc != 0) {
 			/* Set file size by pathname rather than by handle
 			   either because no valid, writeable file handle for
@@ -1013,7 +1027,30 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 					   cifs_sb->local_nls, 
 					   cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-			cFYI(1, (" SetEOF by path (setattrs) rc = %d", rc));
+			cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
+			if(rc == -EINVAL) {
+				__u16 netfid;
+				int oplock = FALSE;
+
+				rc = SMBLegacyOpen(xid, pTcon, full_path,
+					FILE_OPEN,
+					SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+					CREATE_NOT_DIR, &netfid, &oplock,
+					NULL, cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+				if (rc==0) {
+					int bytes_written;
+					rc = CIFSSMBWrite(xid, pTcon,
+							netfid, 0,
+							attrs->ia_size,
+							&bytes_written, NULL,
+							NULL, 1 /* 45 sec */);
+					cFYI(1,("wrt seteof rc %d",rc));
+					CIFSSMBClose(xid, pTcon, netfid);
+				}
+
+			}
 		}
 
 		/* Server is ok setting allocation size implicitly - no need
@@ -1026,24 +1063,22 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 			rc = vmtruncate(direntry->d_inode, attrs->ia_size);
 			cifs_truncate_page(direntry->d_inode->i_mapping,
 					   direntry->d_inode->i_size);
-		}
+		} else 
+			goto cifs_setattr_exit;
 	}
 	if (attrs->ia_valid & ATTR_UID) {
-		cFYI(1, (" CIFS - UID changed to %d", attrs->ia_uid));
+		cFYI(1, ("UID changed to %d", attrs->ia_uid));
 		uid = attrs->ia_uid;
-		/* entry->uid = cpu_to_le16(attr->ia_uid); */
 	}
 	if (attrs->ia_valid & ATTR_GID) {
-		cFYI(1, (" CIFS - GID changed to %d", attrs->ia_gid));
+		cFYI(1, ("GID changed to %d", attrs->ia_gid));
 		gid = attrs->ia_gid;
-		/* entry->gid = cpu_to_le16(attr->ia_gid); */
 	}
 
 	time_buf.Attributes = 0;
 	if (attrs->ia_valid & ATTR_MODE) {
-		cFYI(1, (" CIFS - Mode changed to 0x%x", attrs->ia_mode));
+		cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode));
 		mode = attrs->ia_mode;
-		/* entry->mode = cpu_to_le16(attr->ia_mode); */
 	}
 
 	if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX)
@@ -1083,18 +1118,24 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
 	} else
 		time_buf.LastWriteTime = 0;
-
-	if (attrs->ia_valid & ATTR_CTIME) {
+	/* Do not set ctime explicitly unless other time
+	   stamps are changed explicitly (i.e. by utime()
+	   since we would then have a mix of client and
+	   server times */
+	   
+	if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
 		set_time = TRUE;
-		cFYI(1, (" CIFS - CTIME changed ")); /* BB probably no need */
+		/* Although Samba throws this field away
+		it may be useful to Windows - but we do
+		not want to set ctime unless some other
+		timestamp is changing */
+		cFYI(1, ("CIFS - CTIME changed "));
 		time_buf.ChangeTime =
 		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
 	} else
 		time_buf.ChangeTime = 0;
 
 	if (set_time || time_buf.Attributes) {
-		/* BB what if setting one attribute fails (such as size) but
-		   time setting works? */
 		time_buf.CreationTime = 0;	/* do not change */
 		/* In the future we should experiment - try setting timestamps
 		   via Handle (SetFileInfo) instead of by path */
@@ -1133,12 +1174,21 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
         	        		&time_buf, cifs_sb->local_nls); */
 			}
 		}
+		/* Even if error on time set, no sense failing the call if
+		the server would set the time to a reasonable value anyway,
+		and this check ensures that we are not being called from
+		sys_utimes in which case we ought to fail the call back to
+		the user when the server rejects the call */
+		if((rc) && (attrs->ia_valid &&
+			 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
+			rc = 0;
 	}
 
 	/* do not need local check to inode_check_ok since the server does
 	   that */
 	if (!rc)
 		rc = inode_setattr(direntry->d_inode, attrs);
+cifs_setattr_exit:
 	kfree(full_path);
 	FreeXid(xid);
 	return rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index bde0fabfece..b43e071fe11 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -92,7 +92,7 @@ cifs_hl_exit:
 	return rc;
 }
 
-int
+void *
 cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 {
 	struct inode *inode = direntry->d_inode;
@@ -148,7 +148,7 @@ out:
 out_no_free:
 	FreeXid(xid);
 	nd_set_link(nd, target_path);
-	return 0;
+	return NULL;	/* No cookie */
 }
 
 int
@@ -198,7 +198,10 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 			     ("Create symlink worked but get_inode_info failed with rc = %d ",
 			      rc));
 		} else {
-			direntry->d_op = &cifs_dentry_ops;
+			if (pTcon->nocase)
+				direntry->d_op = &cifs_ci_dentry_ops;
+			else
+				direntry->d_op = &cifs_dentry_ops;
 			d_instantiate(direntry, newinode);
 		}
 	}
@@ -330,7 +333,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 	return rc;
 }
 
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd)
+void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
 {
 	char *p = nd_get_link(nd);
 	if (!IS_ERR(p))
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 072b4ee8c53..eba1de917f2 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -34,8 +34,6 @@ extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern struct task_struct * oplockThread;
 
-static __u16 GlobalMid;		/* multiplex id - rotating counter */
-
 /* The xid serves as a useful identifier for each incoming vfs request, 
    in a similar way to the mid which is useful to track each sent smb, 
    and CurrentXid can also provide a running counter (although it 
@@ -51,6 +49,8 @@ _GetXid(void)
 	GlobalTotalActiveXid++;
 	if (GlobalTotalActiveXid > GlobalMaxActiveXid)
 		GlobalMaxActiveXid = GlobalTotalActiveXid;	/* keep high water mark for number of simultaneous vfs ops in our filesystem */
+	if(GlobalTotalActiveXid > 65000)
+		cFYI(1,("warning: more than 65000 requests active"));
 	xid = GlobalCurrentXid++;
 	spin_unlock(&GlobalMid_Lock);
 	return xid;
@@ -218,6 +218,76 @@ cifs_small_buf_release(void *buf_to_free)
 	return;
 }
 
+/* 
+	Find a free multiplex id (SMB mid). Otherwise there could be
+	mid collisions which might cause problems, demultiplexing the
+	wrong response to this request. Multiplex ids could collide if
+	one of a series requests takes much longer than the others, or
+	if a very large number of long lived requests (byte range
+	locks or FindNotify requests) are pending.  No more than
+	64K-1 requests can be outstanding at one time.  If no 
+	mids are available, return zero.  A future optimization
+	could make the combination of mids and uid the key we use
+	to demultiplex on (rather than mid alone).  
+	In addition to the above check, the cifs demultiplex
+	code already used the command code as a secondary
+	check of the frame and if signing is negotiated the
+	response would be discarded if the mid were the same
+	but the signature was wrong.  Since the mid is not put in the
+	pending queue until later (when it is about to be dispatched)
+	we do have to limit the number of outstanding requests 
+	to somewhat less than 64K-1 although it is hard to imagine
+	so many threads being in the vfs at one time.
+*/
+__u16 GetNextMid(struct TCP_Server_Info *server)
+{
+	__u16 mid = 0;
+	__u16 last_mid;
+	int   collision;  
+
+	if(server == NULL)
+		return mid;
+
+	spin_lock(&GlobalMid_Lock);
+	last_mid = server->CurrentMid; /* we do not want to loop forever */
+	server->CurrentMid++;
+	/* This nested loop looks more expensive than it is.
+	In practice the list of pending requests is short, 
+	fewer than 50, and the mids are likely to be unique
+	on the first pass through the loop unless some request
+	takes longer than the 64 thousand requests before it
+	(and it would also have to have been a request that
+	 did not time out) */
+	while(server->CurrentMid != last_mid) {
+		struct list_head *tmp;
+		struct mid_q_entry *mid_entry;
+
+		collision = 0;
+		if(server->CurrentMid == 0)
+			server->CurrentMid++;
+
+		list_for_each(tmp, &server->pending_mid_q) {
+			mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+
+			if ((mid_entry->mid == server->CurrentMid) &&
+			    (mid_entry->midState == MID_REQUEST_SUBMITTED)) {
+				/* This mid is in use, try a different one */
+				collision = 1;
+				break;
+			}
+		}
+		if(collision == 0) {
+			mid = server->CurrentMid;
+			break;
+		}
+		server->CurrentMid++;
+	}
+	spin_unlock(&GlobalMid_Lock);
+	return mid;
+}
+
+/* NB: MID can not be set if treeCon not passed in, in that
+   case it is responsbility of caller to set the mid */
 void
 header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 		const struct cifsTconInfo *treeCon, int word_count
@@ -233,7 +303,8 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 	    (2 * word_count) + sizeof (struct smb_hdr) -
 	    4 /*  RFC 1001 length field does not count */  +
 	    2 /* for bcc field itself */ ;
-	/* Note that this is the only network field that has to be converted to big endian and it is done just before we send it */
+	/* Note that this is the only network field that has to be converted
+	   to big endian and it is done just before we send it */
 
 	buffer->Protocol[0] = 0xFF;
 	buffer->Protocol[1] = 'S';
@@ -245,8 +316,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 	buffer->Pid = cpu_to_le16((__u16)current->tgid);
 	buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16));
 	spin_lock(&GlobalMid_Lock);
-	GlobalMid++;
-	buffer->Mid = GlobalMid;
 	spin_unlock(&GlobalMid_Lock);
 	if (treeCon) {
 		buffer->Tid = treeCon->tid;
@@ -256,8 +325,9 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 			if (treeCon->ses->capabilities & CAP_STATUS32) {
 				buffer->Flags2 |= SMBFLG2_ERR_STATUS;
 			}
-
-			buffer->Uid = treeCon->ses->Suid;	/* always in LE format */
+			/* Uid is not converted */
+			buffer->Uid = treeCon->ses->Suid;
+			buffer->Mid = GetNextMid(treeCon->ses->server);
 			if(multiuser_mount != 0) {
 		/* For the multiuser case, there are few obvious technically  */
 		/* possible mechanisms to match the local linux user (uid)    */
@@ -305,6 +375,8 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 		}
 		if (treeCon->Flags & SMB_SHARE_IS_IN_DFS)
 			buffer->Flags2 |= SMBFLG2_DFS;
+		if (treeCon->nocase)
+			buffer->Flags  |= SMBFLG_CASELESS;
 		if((treeCon->ses) && (treeCon->ses->server))
 			if(treeCon->ses->server->secMode & 
 			  (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
@@ -347,7 +419,8 @@ checkSMBhdr(struct smb_hdr *smb, __u16 mid)
 int
 checkSMB(struct smb_hdr *smb, __u16 mid, int length)
 {
-	__u32 len = be32_to_cpu(smb->smb_buf_length);
+	__u32 len = smb->smb_buf_length;
+	__u32 clc_len;  /* calculated length */
 	cFYI(0,
 	     ("Entering checkSMB with Length: %x, smb_buf_length: %x ",
 	      length, len));
@@ -368,23 +441,29 @@ checkSMB(struct smb_hdr *smb, __u16 mid, int length)
 			cERROR(1,
 			       ("smb_buf_length greater than MaxBufSize"));
 		cERROR(1,
-		       ("bad smb detected. Illegal length. The mid=%d",
+		       ("bad smb detected. Illegal length. mid=%d",
 			smb->Mid));
 		return 1;
 	}
 
 	if (checkSMBhdr(smb, mid))
 		return 1;
-
-	if ((4 + len != smbCalcSize(smb))
+	clc_len = smbCalcSize_LE(smb);
+	if ((4 + len != clc_len)
 	    || (4 + len != (unsigned int)length)) {
-		return 0;
-	} else {
-		cERROR(1, ("smbCalcSize %x ", smbCalcSize(smb)));
-		cERROR(1,
-		       ("bad smb size detected. The Mid=%d", smb->Mid));
-		return 1;
+		cERROR(1, ("Calculated size 0x%x vs actual length 0x%x",
+				clc_len, 4 + len));
+		cERROR(1, ("bad smb size detected for Mid=%d", smb->Mid));
+		/* Windows XP can return a few bytes too much, presumably
+		an illegal pad, at the end of byte range lock responses 
+		so we allow for up to eight byte pad, as long as actual
+		received length is as long or longer than calculated length */
+		if((4+len > clc_len) && (len <= clc_len + 3))
+			return 0;
+		else
+			return 1;
 	}
+	return 0;
 }
 int
 is_valid_oplock_break(struct smb_hdr *buf)
@@ -448,9 +527,7 @@ is_valid_oplock_break(struct smb_hdr *buf)
 	list_for_each(tmp, &GlobalTreeConnectionList) {
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
 		if (tcon->tid == buf->Tid) {
-#ifdef CONFIG_CIFS_STATS
-			atomic_inc(&tcon->num_oplock_brks);
-#endif
+			cifs_stats_inc(&tcon->num_oplock_brks);
 			list_for_each(tmp1,&tcon->openFileList){
 				netfile = list_entry(tmp1,struct cifsFileInfo,
 						     tlist);
@@ -603,6 +680,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen,
 	int i,j,charlen;
 	int len_remaining = maxlen;
 	char src_char;
+	__u16 temp;
 
 	if(!mapChars) 
 		return cifs_strtoUCS((wchar_t *) target, source, PATH_MAX, cp);
@@ -611,6 +689,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen,
 		src_char = source[i];
 		switch (src_char) {
 			case 0:
+				target[j] = 0;
 				goto ctoUCS_out;
 			case ':':
 				target[j] = cpu_to_le16(UNI_COLON);
@@ -638,13 +717,14 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen,
 				break;*/
 			default:
 				charlen = cp->char2uni(source+i,
-					len_remaining, target+j);
+					len_remaining, &temp);
 				/* if no match, use question mark, which
 				at least in some cases servers as wild card */
 				if(charlen < 1) {
 					target[j] = cpu_to_le16(0x003f);
 					charlen = 1;
-				}
+				} else
+					target[j] = cpu_to_le16(temp);
 				len_remaining -= charlen;
 				/* character may take more than one byte in the
 				   the source string, but will take exactly two
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index a92af41d441..f7814689844 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -133,7 +133,6 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
 int
 cifs_inet_pton(int address_family, char *cp,void *dst)
 {
-	struct in_addr address;
 	int value;
 	int digit;
 	int i;
@@ -190,8 +189,7 @@ cifs_inet_pton(int address_family, char *cp,void *dst)
 	if (value > addr_class_max[end - bytes])
 		return 0;
 
-	address.s_addr = *((__be32 *) bytes) | htonl(value);
-	*((__be32 *)dst) = address.s_addr;
+	*((__be32 *)dst) = *((__be32 *) bytes) | htonl(value);
 	return 1; /* success */
 }
 
@@ -815,7 +813,7 @@ map_smb_to_linux_error(struct smb_hdr *smb)
 	if (smb->Flags2 & SMBFLG2_ERR_STATUS) {
 		/* translate the newer STATUS codes to old style errors and then to POSIX errors */
 		__u32 err = le32_to_cpu(smb->Status.CifsError);
-		if(cifsFYI)
+		if(cifsFYI & CIFS_RC)
 			cifs_print_status(err);
 		ntstatus_to_dos(err, &smberrclass, &smberrcode);
 	} else {
@@ -870,7 +868,14 @@ unsigned int
 smbCalcSize(struct smb_hdr *ptr)
 {
 	return (sizeof (struct smb_hdr) + (2 * ptr->WordCount) +
-		BCC(ptr));
+		2 /* size of the bcc field */ + BCC(ptr));
+}
+
+unsigned int
+smbCalcSize_LE(struct smb_hdr *ptr)
+{
+	return (sizeof (struct smb_hdr) + (2 * ptr->WordCount) +
+		2 /* size of the bcc field */ + le16_to_cpu(BCC_LE(ptr)));
 }
 
 /* The following are taken from fs/ntfs/util.c */
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 6facb41117a..803389b64a2 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -19,8 +19,6 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
  */
 
-#pragma pack(1)
-
 #define NTLMSSP_SIGNATURE "NTLMSSP"
 /* Message Types */
 #define NtLmNegotiate     cpu_to_le32(1)
@@ -63,7 +61,7 @@ typedef struct _SECURITY_BUFFER {
 	__le16 Length;
 	__le16 MaximumLength;
 	__le32 Buffer;		/* offset to buffer */
-} SECURITY_BUFFER;
+} __attribute__((packed)) SECURITY_BUFFER;
 
 typedef struct _NEGOTIATE_MESSAGE {
 	__u8 Signature[sizeof (NTLMSSP_SIGNATURE)];
@@ -73,7 +71,7 @@ typedef struct _NEGOTIATE_MESSAGE {
 	SECURITY_BUFFER WorkstationName;	/* RFC 1001 and ASCII */
 	char DomainString[0];
 	/* followed by WorkstationString */
-} NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
+} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
 
 typedef struct _CHALLENGE_MESSAGE {
 	__u8 Signature[sizeof (NTLMSSP_SIGNATURE)];
@@ -83,7 +81,7 @@ typedef struct _CHALLENGE_MESSAGE {
 	__u8 Challenge[CIFS_CRYPTO_KEY_SIZE];
 	__u8 Reserved[8];
 	SECURITY_BUFFER TargetInfoArray;
-} CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE;
+} __attribute__((packed)) CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE;
 
 typedef struct _AUTHENTICATE_MESSAGE {
 	__u8 Signature[sizeof (NTLMSSP_SIGNATURE)];
@@ -96,6 +94,4 @@ typedef struct _AUTHENTICATE_MESSAGE {
 	SECURITY_BUFFER SessionKey;
 	__le32 NegotiateFlags;
 	char UserString[0];
-} AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE;
-
-#pragma pack()			/* resume default structure packing */
+} __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 22557716f9a..a86bd1c0760 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -91,7 +91,10 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
 		}
 
 		*ptmp_inode = new_inode(file->f_dentry->d_sb);
-		tmp_dentry->d_op = &cifs_dentry_ops;
+		if (pTcon->nocase)
+			tmp_dentry->d_op = &cifs_ci_dentry_ops;
+		else
+			tmp_dentry->d_op = &cifs_dentry_ops;
 		if(*ptmp_inode == NULL)
 			return rc;
 		rc = 1;
@@ -148,6 +151,13 @@ static void fill_in_inode(struct inode *tmp_inode,
 			tmp_inode->i_mode = cifs_sb->mnt_dir_mode;
 		}
 		tmp_inode->i_mode |= S_IFDIR;
+	} else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && 
+		   (attr & ATTR_SYSTEM) && (end_of_file == 0)) {
+		*pobject_type = DT_FIFO;
+		tmp_inode->i_mode |= S_IFIFO;
+/* BB Finish for SFU style symlinks and devies */
+/*	} else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+		(attr & ATTR_SYSTEM) && ) { */
 /* we no longer mark these because we could not follow them */
 /*        } else if (attr & ATTR_REPARSE) {
                 *pobject_type = DT_LNK;
@@ -187,11 +197,17 @@ static void fill_in_inode(struct inode *tmp_inode,
 			tmp_inode->i_fop = &cifs_file_direct_ops;
 		else
 			tmp_inode->i_fop = &cifs_file_ops;
+		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+			tmp_inode->i_fop->lock = NULL;
 		tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
+		if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
+		   (cifs_sb->tcon->ses->server->maxBuf <
+			4096 + MAX_CIFS_HDR_SIZE))
+			tmp_inode->i_data.a_ops->readpages = NULL;
 		if(isNewInode)
-			return; /* No sense invalidating pages for new inode since we
-					   have not started caching readahead file data yet */
+			return; /* No sense invalidating pages for new inode
+				   since have not started caching readahead file
+				   data yet */
 
 		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
 			(local_size == tmp_inode->i_size)) {
@@ -290,7 +306,13 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
 			tmp_inode->i_fop = &cifs_file_direct_ops;
 		else
 			tmp_inode->i_fop = &cifs_file_ops;
+		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+			tmp_inode->i_fop->lock = NULL;
 		tmp_inode->i_data.a_ops = &cifs_addr_ops;
+		if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
+		   (cifs_sb->tcon->ses->server->maxBuf < 
+			4096 + MAX_CIFS_HDR_SIZE))
+			tmp_inode->i_data.a_ops->readpages = NULL;
 
 		if(isNewInode)
 			return; /* No sense invalidating pages for new inode since we
@@ -374,7 +396,8 @@ ffirst_retry:
 
 	rc = CIFSFindFirst(xid, pTcon,full_path,cifs_sb->local_nls,
 		&cifsFile->netfid, &cifsFile->srch_inf,
-		cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+		cifs_sb->mnt_cifs_flags & 
+			CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
 	if(rc == 0)
 		cifsFile->invalidHandle = FALSE;
 	if((rc == -EOPNOTSUPP) && 
@@ -491,6 +514,30 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
 	return rc;
 }
 
+/* Check if directory that we are searching has changed so we can decide
+   whether we can use the cached search results from the previous search */
+static int is_dir_changed(struct file * file)
+{
+	struct inode * inode;
+	struct cifsInodeInfo *cifsInfo;
+
+	if(file->f_dentry == NULL)
+		return 0;
+
+	inode = file->f_dentry->d_inode;
+
+	if(inode == NULL)
+		return 0;
+
+	cifsInfo = CIFS_I(inode);
+
+	if(cifsInfo->time == 0)
+		return 1; /* directory was changed, perhaps due to unlink */
+	else
+		return 0;
+
+}
+
 /* find the corresponding entry in the search */
 /* Note that the SMB server returns search entries for . and .. which
    complicates logic here if we choose to parse for them and we do not
@@ -507,7 +554,8 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 	struct cifsFileInfo * cifsFile = file->private_data;
 	/* check if index in the buffer */
 	
-	if((cifsFile == NULL) || (ppCurrentEntry == NULL) || (num_to_ret == NULL))
+	if((cifsFile == NULL) || (ppCurrentEntry == NULL) || 
+	   (num_to_ret == NULL))
 		return -ENOENT;
 	
 	*ppCurrentEntry = NULL;
@@ -515,7 +563,9 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 		cifsFile->srch_inf.index_of_last_entry - 
 			cifsFile->srch_inf.entries_in_buffer;
 /*	dump_cifs_file_struct(file, "In fce ");*/
-	if(index_to_find < first_entry_in_buffer) {
+	if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 
+	     is_dir_changed(file)) || 
+	   (index_to_find < first_entry_in_buffer)) {
 		/* close and restart search */
 		cFYI(1,("search backing up - close and restart search"));
 		cifsFile->invalidHandle = TRUE;
@@ -536,7 +586,8 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 	while((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 
 	      (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)){
 	 	cFYI(1,("calling findnext2"));
-		rc = CIFSFindNext(xid,pTcon,cifsFile->netfid, &cifsFile->srch_inf);
+		rc = CIFSFindNext(xid,pTcon,cifsFile->netfid, 
+				  &cifsFile->srch_inf);
 		if(rc)
 			return -ENOENT;
 	}
@@ -548,14 +599,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 		char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 
 			smbCalcSize((struct smb_hdr *)
 				cifsFile->srch_inf.ntwrk_buf_start);
-/*	dump_cifs_file_struct(file,"found entry in fce "); */
 		first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
 					- cifsFile->srch_inf.entries_in_buffer;
 		pos_in_buf = index_to_find - first_entry_in_buffer;
 		cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 
 		current_entry = cifsFile->srch_inf.srch_entries_start;
 		for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
-			/* go entry to next entry figuring out which we need to start with */
+			/* go entry by entry figuring out which is first */
 			/* if( . or ..)
 				skip */
 			rc = cifs_entry_is_dot(current_entry,cifsFile);
@@ -582,11 +632,10 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 	}
 
 	if(pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) {
-		cFYI(1,("can not return entries when pos_in_buf beyond last entry"));
+		cFYI(1,("can not return entries pos_in_buf beyond last entry"));
 		*num_to_ret = 0;
 	} else
 		*num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf;
-/*	dump_cifs_file_struct(file, "end fce ");*/
 
 	return rc;
 }
@@ -721,7 +770,8 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 			      (FILE_DIRECTORY_INFO *)pfindEntry,&obj_type, rc);
 	}
 	
-	rc = filldir(direntry,qstring.name,qstring.len,file->f_pos,tmp_inode->i_ino,obj_type);
+	rc = filldir(direntry,qstring.name,qstring.len,file->f_pos,
+		     tmp_inode->i_ino,obj_type);
 	if(rc) {
 		cFYI(1,("filldir rc = %d",rc));
 	}
@@ -805,15 +855,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		FreeXid(xid);
 		return -EIO;
 	}
-/*	dump_cifs_file_struct(file, "Begin rdir "); */
 
 	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 	if(pTcon == NULL)
 		return -EINVAL;
 
-/*	cFYI(1,("readdir2 pos: %lld",file->f_pos)); */
-
 	switch ((int) file->f_pos) {
 	case 0:
 		/*if (filldir(direntry, ".", 1, file->f_pos,
@@ -866,7 +913,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		cifsFile->search_resume_name = NULL; */
 
 		/* BB account for . and .. in f_pos as special case */
-		/* dump_cifs_file_struct(file, "rdir after default ");*/
 
 		rc = find_cifs_entry(xid,pTcon, file,
 				&current_entry,&num_to_fill);
@@ -906,14 +952,14 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 				cifs_save_resume_key(current_entry,cifsFile);
 				break;
 			} else 
-				current_entry = nxt_dir_entry(current_entry,end_of_smb);
+				current_entry = nxt_dir_entry(current_entry,
+							      end_of_smb);
 		}
 		kfree(tmp_buf);
 		break;
 	} /* end switch */
 
 rddir2_exit:
-	/* dump_cifs_file_struct(file, "end rdir ");  */
 	FreeXid(xid);
 	return rc;
 }
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index 806c0ed06da..9222033cad8 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -21,8 +21,6 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
  */
 
-#pragma pack(1)
-
 /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
 
 	/* RFC 1002 session packet types */
@@ -48,17 +46,17 @@ struct rfc1002_session_packet {
 			__u8 calling_len;
 			__u8 calling_name[32];
 			__u8 scope2; /* null */
-		} session_req;
+		} __attribute__((packed)) session_req;
 		struct {
 			__u32 retarget_ip_addr;
 			__u16 port;
-		} retarget_resp;
+		} __attribute__((packed)) retarget_resp;
 		__u8 neg_ses_resp_error_code;
 		/* POSITIVE_SESSION_RESPONSE packet does not include trailer.
 		SESSION_KEEP_ALIVE packet also does not include a trailer.
 		Trailer for the SESSION_MESSAGE packet is SMB/CIFS header */
-	} trailer;
-};
+	} __attribute__((packed)) trailer;
+} __attribute__((packed));
 
 /* Negative Session Response error codes */
 #define RFC1002_NOT_LISTENING_CALLED  0x80 /* not listening on called name */
@@ -74,6 +72,3 @@ server netbios name). Currently server names are resolved only via DNS
 (tcp name) or ip address or an /etc/hosts equivalent mapping to ip address.*/
 
 #define DEFAULT_CIFS_CALLED_NAME  "*SMBSERVER      "
-
-#pragma pack()		/* resume default structure packing */
-                                                             
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0046c219833..981ea0d8b9c 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -49,7 +49,8 @@ AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 		return NULL;
 	}
 	
-	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,SLAB_KERNEL | SLAB_NOFS);
+	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
+						    SLAB_KERNEL | SLAB_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
@@ -58,7 +59,9 @@ AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 		temp->pid = current->pid;
 		temp->command = smb_buffer->Command;
 		cFYI(1, ("For smb_command %d", temp->command));
-		do_gettimeofday(&temp->when_sent);
+	/*	do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
+		/* when mid allocated can be before when sent */
+		temp->when_alloc = jiffies;
 		temp->ses = ses;
 		temp->tsk = current;
 	}
@@ -74,6 +77,9 @@ AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 static void
 DeleteMidQEntry(struct mid_q_entry *midEntry)
 {
+#ifdef CONFIG_CIFS_STATS2
+	unsigned long now;
+#endif
 	spin_lock(&GlobalMid_Lock);
 	midEntry->midState = MID_FREE;
 	list_del(&midEntry->qhead);
@@ -83,6 +89,22 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 		cifs_buf_release(midEntry->resp_buf);
 	else
 		cifs_small_buf_release(midEntry->resp_buf);
+#ifdef CONFIG_CIFS_STATS2
+	now = jiffies;
+	/* commands taking longer than one second are indications that
+	   something is wrong, unless it is quite a slow link or server */
+	if((now - midEntry->when_alloc) > HZ) {
+		if((cifsFYI & CIFS_TIMER) && 
+		   (midEntry->command != SMB_COM_LOCKING_ANDX)) {
+			printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %d",
+			       midEntry->command, midEntry->mid);
+			printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+			       now - midEntry->when_alloc,
+			       now - midEntry->when_sent,
+			       now - midEntry->when_received);
+		}
+	}
+#endif
 	mempool_free(midEntry, cifs_mid_poolp);
 }
 
@@ -146,32 +168,37 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
 	   Flags2 is converted in SendReceive */
 
 	smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
-	cFYI(1, ("Sending smb of length %d ", smb_buf_length));
+	cFYI(1, ("Sending smb of length %d", smb_buf_length));
 	dump_smb(smb_buffer, len);
 
 	while (len > 0) {
 		rc = kernel_sendmsg(ssocket, &smb_msg, &iov, 1, len);
 		if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
 			i++;
-			if(i > 60) {
+		/* smaller timeout here than send2 since smaller size */
+		/* Although it may not be required, this also is smaller 
+		   oplock break time */  
+			if(i > 12) {
 				cERROR(1,
-				   ("sends on sock %p stuck for 30 seconds",
+				   ("sends on sock %p stuck for 7 seconds",
 				    ssocket));
 				rc = -EAGAIN;
 				break;
 			}
-			msleep(500);
+			msleep(1 << i);
 			continue;
 		}
 		if (rc < 0) 
 			break;
+		else
+			i = 0; /* reset i after each successful send */
 		iov.iov_base += rc;
 		iov.iov_len -= rc;
 		len -= rc;
 	}
 
 	if (rc < 0) {
-		cERROR(1,("Error %d sending data on socket to server.", rc));
+		cERROR(1,("Error %d sending data on socket to server", rc));
 	} else {
 		rc = 0;
 	}
@@ -179,26 +206,21 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
 	return rc;
 }
 
-#ifdef CIFS_EXPERIMENTAL
-/* BB finish off this function, adding support for writing set of pages as iovec */
-/* and also adding support for operations that need to parse the response smb    */
-
-int
-smb_sendv(struct socket *ssocket, struct smb_hdr *smb_buffer,
-	 unsigned int smb_buf_length, struct kvec * write_vector 
-	  /* page list */, struct sockaddr *sin)
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+static int
+smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
+	  struct sockaddr *sin)
 {
 	int rc = 0;
 	int i = 0;
 	struct msghdr smb_msg;
-	number_of_pages += 1; /* account for SMB header */
-	struct kvec * piov  = kmalloc(number_of_pages * sizeof(struct kvec));
-	unsigned len = smb_buf_length + 4;
-
+	struct smb_hdr *smb_buffer = iov[0].iov_base;
+	unsigned int len = iov[0].iov_len;
+	unsigned int total_len;
+	int first_vec = 0;
+	
 	if(ssocket == NULL)
 		return -ENOTSOCK; /* BB eventually add reconnect code here */
-	iov.iov_base = smb_buffer;
-	iov.iov_len = len;
 
 	smb_msg.msg_name = sin;
 	smb_msg.msg_namelen = sizeof (struct sockaddr);
@@ -211,49 +233,80 @@ smb_sendv(struct socket *ssocket, struct smb_hdr *smb_buffer,
 	   cifssmb.c and RFC1001 len is converted to bigendian in smb_send 
 	   Flags2 is converted in SendReceive */
 
+
+	total_len = 0;
+	for (i = 0; i < n_vec; i++)
+		total_len += iov[i].iov_len;
+
 	smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
-	cFYI(1, ("Sending smb of length %d ", smb_buf_length));
+	cFYI(1, ("Sending smb:  total_len %d", total_len));
 	dump_smb(smb_buffer, len);
 
-	while (len > 0) {
-		rc = kernel_sendmsg(ssocket, &smb_msg, &iov, number_of_pages, 
-				    len);
+	while (total_len) {
+		rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
+				    n_vec - first_vec, total_len);
 		if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
 			i++;
-			if(i > 60) {
+			if(i >= 14) {
 				cERROR(1,
-				   ("sends on sock %p stuck for 30 seconds",
+				   ("sends on sock %p stuck for 15 seconds",
 				    ssocket));
 				rc = -EAGAIN;
 				break;
 			}
-			msleep(500);
+			msleep(1 << i);
 			continue;
 		}
 		if (rc < 0) 
 			break;
-		iov.iov_base += rc;
-		iov.iov_len -= rc;
-		len -= rc;
+
+		if (rc >= total_len) {
+			WARN_ON(rc > total_len);
+			break;
+		}
+		if(rc == 0) {
+			/* should never happen, letting socket clear before
+			   retrying is our only obvious option here */
+			cERROR(1,("tcp sent no data"));
+			msleep(500);
+			continue;
+		}
+		total_len -= rc;
+		/* the line below resets i */
+		for (i = first_vec; i < n_vec; i++) {
+			if (iov[i].iov_len) {
+				if (rc > iov[i].iov_len) {
+					rc -= iov[i].iov_len;
+					iov[i].iov_len = 0;
+				} else {
+					iov[i].iov_base += rc;
+					iov[i].iov_len -= rc;
+					first_vec = i;
+					break;
+				}
+			}
+		}
+		i = 0; /* in case we get ENOSPC on the next send */
 	}
 
 	if (rc < 0) {
-		cERROR(1,("Error %d sending data on socket to server.", rc));
-	} else {
+		cERROR(1,("Error %d sending data on socket to server", rc));
+	} else
 		rc = 0;
-	}
 
 	return rc;
 }
 
-
 int
-CIFSSendRcv(const unsigned int xid, struct cifsSesInfo *ses,
-	    struct smb_hdr *in_buf, struct kvec * write_vector /* page list */, int *pbytes_returned, const int long_op)
+SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 
+	     struct kvec *iov, int n_vec, int *pbytes_returned,
+	     const int long_op)
 {
 	int rc = 0;
-	unsigned long timeout = 15 * HZ;
-	struct mid_q_entry *midQ = NULL;
+	unsigned int receive_len;
+	unsigned long timeout;
+	struct mid_q_entry *midQ;
+	struct smb_hdr *in_buf = iov[0].iov_base;
 
 	if (ses == NULL) {
 		cERROR(1,("Null smb session"));
@@ -263,14 +316,8 @@ CIFSSendRcv(const unsigned int xid, struct cifsSesInfo *ses,
 		cERROR(1,("Null tcp session"));
 		return -EIO;
 	}
-	if(pbytes_returned == NULL)
-		return -EIO;
-	else
-		*pbytes_returned = 0;
 
-  
-
-	if(ses->server->tcpStatus == CIFS_EXITING)
+	if(ses->server->tcpStatus == CifsExiting)
 		return -ENOENT;
 
 	/* Ensure that we do not send more than 50 overlapping requests 
@@ -282,11 +329,18 @@ CIFSSendRcv(const unsigned int xid, struct cifsSesInfo *ses,
 	} else {
 		spin_lock(&GlobalMid_Lock); 
 		while(1) {        
-			if(atomic_read(&ses->server->inFlight) >= cifs_max_pending){
+			if(atomic_read(&ses->server->inFlight) >= 
+					cifs_max_pending){
 				spin_unlock(&GlobalMid_Lock);
+#ifdef CONFIG_CIFS_STATS2
+				atomic_inc(&ses->server->num_waiters);
+#endif
 				wait_event(ses->server->request_q,
 					atomic_read(&ses->server->inFlight)
 					 < cifs_max_pending);
+#ifdef CONFIG_CIFS_STATS2
+				atomic_dec(&ses->server->num_waiters);
+#endif
 				spin_lock(&GlobalMid_Lock);
 			} else {
 				if(ses->server->tcpStatus == CifsExiting) {
@@ -314,17 +368,17 @@ CIFSSendRcv(const unsigned int xid, struct cifsSesInfo *ses,
 
 	if (ses->server->tcpStatus == CifsExiting) {
 		rc = -ENOENT;
-		goto cifs_out_label;
+		goto out_unlock2;
 	} else if (ses->server->tcpStatus == CifsNeedReconnect) {
 		cFYI(1,("tcp session dead - return to caller to retry"));
 		rc = -EAGAIN;
-		goto cifs_out_label;
+		goto out_unlock2;
 	} else if (ses->status != CifsGood) {
 		/* check if SMB session is bad because we are setting it up */
 		if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && 
 			(in_buf->Command != SMB_COM_NEGOTIATE)) {
 			rc = -EAGAIN;
-			goto cifs_out_label;
+			goto out_unlock2;
 		} /* else ok - we are setting up session */
 	}
 	midQ = AllocMidQEntry(in_buf, ses);
@@ -338,51 +392,162 @@ CIFSSendRcv(const unsigned int xid, struct cifsSesInfo *ses,
 		return -ENOMEM;
 	}
 
-	if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
-		up(&ses->server->tcpSem);
-		cERROR(1,
-		       ("Illegal length, greater than maximum frame, %d ",
-			in_buf->smb_buf_length));
+/* BB FIXME */
+/* 	rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); */
+
+	midQ->midState = MID_REQUEST_SUBMITTED;
+#ifdef CONFIG_CIFS_STATS2
+	atomic_inc(&ses->server->inSend);
+#endif
+	rc = smb_send2(ses->server->ssocket, iov, n_vec,
+		      (struct sockaddr *) &(ses->server->addr.sockAddr));
+#ifdef CONFIG_CIFS_STATS2
+	atomic_dec(&ses->server->inSend);
+	midQ->when_sent = jiffies;
+#endif
+	if(rc < 0) {
 		DeleteMidQEntry(midQ);
+		up(&ses->server->tcpSem);
 		/* If not lock req, update # of requests on wire to server */
 		if(long_op < 3) {
 			atomic_dec(&ses->server->inFlight); 
 			wake_up(&ses->server->request_q);
 		}
-		return -EIO;
+		return rc;
+	} else
+		up(&ses->server->tcpSem);
+	if (long_op == -1)
+		goto cifs_no_response_exit2;
+	else if (long_op == 2) /* writes past end of file can take loong time */
+		timeout = 180 * HZ;
+	else if (long_op == 1)
+		timeout = 45 * HZ; /* should be greater than 
+			servers oplock break timeout (about 43 seconds) */
+	else if (long_op > 2) {
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	} else
+		timeout = 15 * HZ;
+	/* wait for 15 seconds or until woken up due to response arriving or 
+	   due to last connection to this server being unmounted */
+	if (signal_pending(current)) {
+		/* if signal pending do not hold up user for full smb timeout
+		but we still give response a change to complete */
+		timeout = 2 * HZ;
+	}   
+
+	/* No user interrupts in wait - wreaks havoc with performance */
+	if(timeout != MAX_SCHEDULE_TIMEOUT) {
+		timeout += jiffies;
+		wait_event(ses->server->response_q,
+			(!(midQ->midState & MID_REQUEST_SUBMITTED)) || 
+			time_after(jiffies, timeout) || 
+			((ses->server->tcpStatus != CifsGood) &&
+			 (ses->server->tcpStatus != CifsNew)));
+	} else {
+		wait_event(ses->server->response_q,
+			(!(midQ->midState & MID_REQUEST_SUBMITTED)) || 
+			((ses->server->tcpStatus != CifsGood) &&
+			 (ses->server->tcpStatus != CifsNew)));
 	}
 
-	/* BB can we sign efficiently in this path? */
-	rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
+	spin_lock(&GlobalMid_Lock);
+	if (midQ->resp_buf) {
+		spin_unlock(&GlobalMid_Lock);
+		receive_len = midQ->resp_buf->smb_buf_length;
+	} else {
+		cERROR(1,("No response to cmd %d mid %d",
+			midQ->command, midQ->mid));
+		if(midQ->midState == MID_REQUEST_SUBMITTED) {
+			if(ses->server->tcpStatus == CifsExiting)
+				rc = -EHOSTDOWN;
+			else {
+				ses->server->tcpStatus = CifsNeedReconnect;
+				midQ->midState = MID_RETRY_NEEDED;
+			}
+		}
 
-	midQ->midState = MID_REQUEST_SUBMITTED;
-/*	rc = smb_sendv(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
-		       piovec, 
-		       (struct sockaddr *) &(ses->server->addr.sockAddr));*/
-	if(rc < 0) {
+		if (rc != -EHOSTDOWN) {
+			if(midQ->midState == MID_RETRY_NEEDED) {
+				rc = -EAGAIN;
+				cFYI(1,("marking request for retry"));
+			} else {
+				rc = -EIO;
+			}
+		}
+		spin_unlock(&GlobalMid_Lock);
 		DeleteMidQEntry(midQ);
-		up(&ses->server->tcpSem);
 		/* If not lock req, update # of requests on wire to server */
 		if(long_op < 3) {
 			atomic_dec(&ses->server->inFlight); 
 			wake_up(&ses->server->request_q);
 		}
 		return rc;
-	} else
-		up(&ses->server->tcpSem);
-cifs_out_label:
-	if(midQ)
-	        DeleteMidQEntry(midQ);
-                                                                                                                           
+	}
+  
+	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
+		cERROR(1, ("Frame too large received.  Length: %d  Xid: %d",
+			receive_len, xid));
+		rc = -EIO;
+	} else {		/* rcvd frame is ok */
+
+		if (midQ->resp_buf && 
+			(midQ->midState == MID_RESPONSE_RECEIVED)) {
+			in_buf->smb_buf_length = receive_len;
+			/* BB verify that length would not overrun small buf */
+			memcpy((char *)in_buf + 4,
+			       (char *)midQ->resp_buf + 4,
+			       receive_len);
+
+			dump_smb(in_buf, 80);
+			/* convert the length into a more usable form */
+			if((receive_len > 24) &&
+			   (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
+					SECMODE_SIGN_ENABLED))) {
+				rc = cifs_verify_signature(in_buf,
+						ses->server->mac_signing_key,
+						midQ->sequence_number+1);
+				if(rc) {
+					cERROR(1,("Unexpected SMB signature"));
+					/* BB FIXME add code to kill session */
+				}
+			}
+
+			*pbytes_returned = in_buf->smb_buf_length;
+
+			/* BB special case reconnect tid and uid here? */
+			rc = map_smb_to_linux_error(in_buf);
+
+			/* convert ByteCount if necessary */
+			if (receive_len >=
+			    sizeof (struct smb_hdr) -
+			    4 /* do not count RFC1001 header */  +
+			    (2 * in_buf->WordCount) + 2 /* bcc */ )
+				BCC(in_buf) = le16_to_cpu(BCC(in_buf));
+		} else {
+			rc = -EIO;
+			cFYI(1,("Bad MID state?"));
+		}
+	}
+cifs_no_response_exit2:
+	DeleteMidQEntry(midQ);
+
 	if(long_op < 3) {
-		atomic_dec(&ses->server->inFlight);
+		atomic_dec(&ses->server->inFlight); 
 		wake_up(&ses->server->request_q);
 	}
 
 	return rc;
-}
 
+out_unlock2:
+	up(&ses->server->tcpSem);
+	/* If not lock req, update # of requests on wire to server */
+	if(long_op < 3) {
+		atomic_dec(&ses->server->inFlight); 
+		wake_up(&ses->server->request_q);
+	}
 
+	return rc;
+}
 #endif /* CIFS_EXPERIMENTAL */
 
 int
@@ -419,9 +584,15 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 			if(atomic_read(&ses->server->inFlight) >= 
 					cifs_max_pending){
 				spin_unlock(&GlobalMid_Lock);
+#ifdef CONFIG_CIFS_STATS2
+				atomic_inc(&ses->server->num_waiters);
+#endif
 				wait_event(ses->server->request_q,
 					atomic_read(&ses->server->inFlight)
 					 < cifs_max_pending);
+#ifdef CONFIG_CIFS_STATS2
+				atomic_dec(&ses->server->num_waiters);
+#endif
 				spin_lock(&GlobalMid_Lock);
 			} else {
 				if(ses->server->tcpStatus == CifsExiting) {
@@ -490,8 +661,15 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 	rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
+#ifdef CONFIG_CIFS_STATS2
+	atomic_inc(&ses->server->inSend);
+#endif
 	rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
 		      (struct sockaddr *) &(ses->server->addr.sockAddr));
+#ifdef CONFIG_CIFS_STATS2
+	atomic_dec(&ses->server->inSend);
+	midQ->when_sent = jiffies;
+#endif
 	if(rc < 0) {
 		DeleteMidQEntry(midQ);
 		up(&ses->server->tcpSem);
@@ -506,7 +684,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 	if (long_op == -1)
 		goto cifs_no_response_exit;
 	else if (long_op == 2) /* writes past end of file can take loong time */
-		timeout = 300 * HZ;
+		timeout = 180 * HZ;
 	else if (long_op == 1)
 		timeout = 45 * HZ; /* should be greater than 
 			servers oplock break timeout (about 43 seconds) */
@@ -540,9 +718,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 	spin_lock(&GlobalMid_Lock);
 	if (midQ->resp_buf) {
 		spin_unlock(&GlobalMid_Lock);
-		receive_len = be32_to_cpu(*(__be32 *)midQ->resp_buf);
+		receive_len = midQ->resp_buf->smb_buf_length;
 	} else {
-		cERROR(1,("No response buffer"));
+		cERROR(1,("No response for cmd %d mid %d",
+			  midQ->command, midQ->mid));
 		if(midQ->midState == MID_REQUEST_SUBMITTED) {
 			if(ses->server->tcpStatus == CifsExiting)
 				rc = -EHOSTDOWN;
@@ -610,7 +789,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 				BCC(out_buf) = le16_to_cpu(BCC(out_buf));
 		} else {
 			rc = -EIO;
-			cFYI(1,("Bad MID state? "));
+			cERROR(1,("Bad MID state? "));
 		}
 	}
 cifs_no_response_exit:
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 3d1cce3653b..6a3df88accf 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -370,8 +370,8 @@ static int init_coda_psdev(void)
 	}		
 	devfs_mk_dir ("coda");
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_device_create(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i),
-				NULL, "cfs%d", i);
+		class_device_create(coda_psdev_class, NULL,
+				MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i);
 		err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i),
 				S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i);
 		if (err)
diff --git a/fs/compat.c b/fs/compat.c
index 6b06b6bae35..8e71cdbecc7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -44,6 +44,8 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/personality.h>
 #include <linux/rwsem.h>
+#include <linux/acct.h>
+#include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
 
@@ -310,96 +312,6 @@ static int __init init_sys32_ioctl(void)
 
 __initcall(init_sys32_ioctl);
 
-int register_ioctl32_conversion(unsigned int cmd,
-				ioctl_trans_handler_t handler)
-{
-	struct ioctl_trans *t;
-	struct ioctl_trans *new_t;
-	unsigned long hash = ioctl32_hash(cmd);
-
-	new_t = kmalloc(sizeof(*new_t), GFP_KERNEL);
-	if (!new_t)
-		return -ENOMEM;
-
-	down_write(&ioctl32_sem);
-	for (t = ioctl32_hash_table[hash]; t; t = t->next) {
-		if (t->cmd == cmd) {
-			printk(KERN_ERR "Trying to register duplicated ioctl32 "
-					"handler %x\n", cmd);
-			up_write(&ioctl32_sem);
-			kfree(new_t);
-			return -EINVAL; 
-		}
-	}
-	new_t->next = NULL;
-	new_t->cmd = cmd;
-	new_t->handler = handler;
-	ioctl32_insert_translation(new_t);
-
-	up_write(&ioctl32_sem);
-	return 0;
-}
-EXPORT_SYMBOL(register_ioctl32_conversion);
-
-static inline int builtin_ioctl(struct ioctl_trans *t)
-{ 
-	return t >= ioctl_start && t < (ioctl_start + ioctl_table_size);
-} 
-
-/* Problem: 
-   This function cannot unregister duplicate ioctls, because they are not
-   unique.
-   When they happen we need to extend the prototype to pass the handler too. */
-
-int unregister_ioctl32_conversion(unsigned int cmd)
-{
-	unsigned long hash = ioctl32_hash(cmd);
-	struct ioctl_trans *t, *t1;
-
-	down_write(&ioctl32_sem);
-
-	t = ioctl32_hash_table[hash];
-	if (!t) { 
-		up_write(&ioctl32_sem);
-		return -EINVAL;
-	} 
-
-	if (t->cmd == cmd) { 
-		if (builtin_ioctl(t)) {
-			printk("%p tried to unregister builtin ioctl %x\n",
-			       __builtin_return_address(0), cmd);
-		} else { 
-			ioctl32_hash_table[hash] = t->next;
-			up_write(&ioctl32_sem);
-			kfree(t);
-			return 0;
-		}
-	} 
-	while (t->next) {
-		t1 = t->next;
-		if (t1->cmd == cmd) { 
-			if (builtin_ioctl(t1)) {
-				printk("%p tried to unregister builtin "
-					"ioctl %x\n",
-					__builtin_return_address(0), cmd);
-				goto out;
-			} else { 
-				t->next = t1->next;
-				up_write(&ioctl32_sem);
-				kfree(t1);
-				return 0;
-			}
-		}
-		t = t1;
-	}
-	printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n",
-				cmd);
-out:
-	up_write(&ioctl32_sem);
-	return -EINVAL;
-}
-EXPORT_SYMBOL(unregister_ioctl32_conversion); 
-
 static void compat_ioctl_error(struct file *filp, unsigned int fd,
 		unsigned int cmd, unsigned long arg)
 {
@@ -720,14 +632,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
 struct compat_ncp_mount_data {
 	compat_int_t version;
 	compat_uint_t ncp_fd;
-	compat_uid_t mounted_uid;
+	__compat_uid_t mounted_uid;
 	compat_pid_t wdog_pid;
 	unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
 	compat_uint_t time_out;
 	compat_uint_t retry_count;
 	compat_uint_t flags;
-	compat_uid_t uid;
-	compat_gid_t gid;
+	__compat_uid_t uid;
+	__compat_gid_t gid;
 	compat_mode_t file_mode;
 	compat_mode_t dir_mode;
 };
@@ -784,9 +696,9 @@ static void *do_ncp_super_data_conv(void *raw_data)
 
 struct compat_smb_mount_data {
 	compat_int_t version;
-	compat_uid_t mounted_uid;
-	compat_uid_t uid;
-	compat_gid_t gid;
+	__compat_uid_t mounted_uid;
+	__compat_uid_t uid;
+	__compat_gid_t gid;
 	compat_mode_t file_mode;
 	compat_mode_t dir_mode;
 };
@@ -1365,6 +1277,16 @@ out:
 }
 
 /*
+ * Exactly like fs/open.c:sys_open(), except that it doesn't set the
+ * O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open(const char __user *filename, int flags, int mode)
+{
+	return do_sys_open(filename, flags, mode);
+}
+
+/*
  * compat_count() counts the number of arguments/envelopes. It is basically
  * a copy of count() from fs/exec.c, except that it works with 32 bit argv
  * and envp pointers.
@@ -1567,6 +1489,7 @@ int compat_do_execve(char * filename,
 
 		/* execve success */
 		security_bprm_free(bprm);
+		acct_update_integrals(current);
 		kfree(bprm);
 		return retval;
 	}
@@ -1699,6 +1622,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
 	char *bits;
 	long timeout;
 	int size, max_fdset, ret = -EINVAL;
+	struct fdtable *fdt;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -1724,7 +1648,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
-	max_fdset = current->files->max_fdset;
+	rcu_read_lock();
+	fdt = files_fdtable(current->files);
+	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
 	if (n > max_fdset)
 		n = max_fdset;
 
@@ -1808,8 +1735,8 @@ struct compat_nfsctl_export {
 	compat_dev_t	ex32_dev;
 	compat_ino_t	ex32_ino;
 	compat_int_t	ex32_flags;
-	compat_uid_t	ex32_anon_uid;
-	compat_gid_t	ex32_anon_gid;
+	__compat_uid_t	ex32_anon_uid;
+	__compat_gid_t	ex32_anon_gid;
 };
 
 struct compat_nfsctl_fdparm {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f..43dbcb0b21e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 		r = (void *) &r4;
 	}
 
-	if (ret)
-		return -EFAULT;
+	if (ret) {
+		ret = -EFAULT;
+		goto out;
+	}
 
 	set_fs (KERNEL_DS);
 	ret = sys_ioctl (fd, cmd, (unsigned long) r);
 	set_fs (old_fs);
 
+out:
 	if (mysock)
 		sockfd_put(mysock);
 
@@ -3043,10 +3046,15 @@ HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
+#ifdef TIOCGLTC
+COMPATIBLE_IOCTL(TIOCGLTC)
+COMPATIBLE_IOCTL(TIOCSLTC)
+#endif
 /* Usbdevfs */
 HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control)
 HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk)
 HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
+COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
 /* i2c */
 HANDLE_IOCTL(I2C_FUNCS, w_long)
 HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 6c285efa200..7fe85415ae7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,12 +39,47 @@ static DECLARE_MUTEX(read_mutex);
 #define CRAMINO(x)	((x)->offset?(x)->offset<<2:1)
 #define OFFSET(x)	((x)->i_ino)
 
-static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode)
+
+static int cramfs_iget5_test(struct inode *inode, void *opaque)
+{
+	struct cramfs_inode *cramfs_inode = opaque;
+
+	if (inode->i_ino != CRAMINO(cramfs_inode))
+		return 0; /* does not match */
+
+	if (inode->i_ino != 1)
+		return 1;
+
+	/* all empty directories, char, block, pipe, and sock, share inode #1 */
+
+	if ((inode->i_mode != cramfs_inode->mode) ||
+	    (inode->i_gid != cramfs_inode->gid) ||
+	    (inode->i_uid != cramfs_inode->uid))
+		return 0; /* does not match */
+
+	if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
+	    (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
+		return 0; /* does not match */
+
+	return 1; /* matches */
+}
+
+static int cramfs_iget5_set(struct inode *inode, void *opaque)
+{
+	struct cramfs_inode *cramfs_inode = opaque;
+	inode->i_ino = CRAMINO(cramfs_inode);
+	return 0;
+}
+
+static struct inode *get_cramfs_inode(struct super_block *sb,
+				struct cramfs_inode * cramfs_inode)
 {
-	struct inode * inode = new_inode(sb);
+	struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
+					    cramfs_iget5_test, cramfs_iget5_set,
+					    cramfs_inode);
 	static struct timespec zerotime;
 
-	if (inode) {
+	if (inode && (inode->i_state & I_NEW)) {
 		inode->i_mode = cramfs_inode->mode;
 		inode->i_uid = cramfs_inode->uid;
 		inode->i_size = cramfs_inode->size;
@@ -58,7 +93,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
 		   but it's the best we can do without reading the directory
 	           contents.  1 yields the right result in GNU find, even
 		   without -noleaf option. */
-		insert_inode_hash(inode);
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &generic_ro_fops;
 			inode->i_data.a_ops = &cramfs_aops;
@@ -74,6 +108,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
 			init_special_inode(inode, inode->i_mode,
 				old_decode_dev(cramfs_inode->size));
 		}
+		unlock_new_inode(inode);
 	}
 	return inode;
 }
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 5034365b06a..8def89f2c43 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/vmalloc.h>
 #include <linux/zlib.h>
+#include <linux/cramfs_fs.h>
 
 static z_stream stream;
 static int initialized;
diff --git a/fs/dcache.c b/fs/dcache.c
index 3aa8a7e980d..e90512ed35a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/fsnotify.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
@@ -101,6 +102,8 @@ static inline void dentry_iput(struct dentry * dentry)
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
+		if (!inode->i_nlink)
+			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
 			dentry->d_op->d_iput(dentry, inode);
 		else
@@ -335,12 +338,10 @@ struct dentry * d_find_alias(struct inode *inode)
  */
 void d_prune_aliases(struct inode *inode)
 {
-	struct list_head *tmp, *head = &inode->i_dentry;
+	struct dentry *dentry;
 restart:
 	spin_lock(&dcache_lock);
-	tmp = head;
-	while ((tmp = tmp->next) != head) {
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!atomic_read(&dentry->d_count)) {
 			__dget_locked(dentry);
@@ -461,10 +462,7 @@ void shrink_dcache_sb(struct super_block * sb)
 	 * superblock to the most recent end of the unused list.
 	 */
 	spin_lock(&dcache_lock);
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
+	list_for_each_safe(tmp, next, &dentry_unused) {
 		dentry = list_entry(tmp, struct dentry, d_lru);
 		if (dentry->d_sb != sb)
 			continue;
@@ -476,10 +474,7 @@ void shrink_dcache_sb(struct super_block * sb)
 	 * Pass two ... free the dentries for this superblock.
 	 */
 repeat:
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
+	list_for_each_safe(tmp, next, &dentry_unused) {
 		dentry = list_entry(tmp, struct dentry, d_lru);
 		if (dentry->d_sb != sb)
 			continue;
@@ -694,7 +689,7 @@ void shrink_dcache_anon(struct hlist_head *head)
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
@@ -1165,13 +1160,16 @@ out:
  
 void d_delete(struct dentry * dentry)
 {
+	int isdir = 0;
 	/*
 	 * Are we the only user?
 	 */
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
+	isdir = S_ISDIR(dentry->d_inode->i_mode);
 	if (atomic_read(&dentry->d_count) == 1) {
 		dentry_iput(dentry);
+		fsnotify_nameremove(dentry, isdir);
 		return;
 	}
 
@@ -1180,6 +1178,8 @@ void d_delete(struct dentry * dentry)
 
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
+
+	fsnotify_nameremove(dentry, isdir);
 }
 
 static void __d_rehash(struct dentry * entry, struct hlist_head *list)
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 1ecfe1f184d..8b679b67e5e 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2491,11 +2491,11 @@ static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	return 0;
 }				/*  End Function devfs_mknod  */
 
-static int devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode);
 	nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV));
-	return 0;
+	return NULL;
 }				/*  End Function devfs_follow_link  */
 
 static struct inode_operations devfs_iops = {
diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile
index 5800df2e50c..236696efcba 100644
--- a/fs/devpts/Makefile
+++ b/fs/devpts/Makefile
@@ -5,4 +5,3 @@
 obj-$(CONFIG_UNIX98_PTYS)		+= devpts.o
 
 devpts-$(CONFIG_UNIX98_PTYS)		:= inode.o
-devpts-$(CONFIG_DEVPTS_FS_SECURITY)	+= xattr_security.o
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1571c8d6c23..f2be44d4491 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,28 +18,9 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/devpts_fs.h>
-#include <linux/xattr.h>
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
-extern struct xattr_handler devpts_xattr_security_handler;
-
-static struct xattr_handler *devpts_xattr_handlers[] = {
-#ifdef CONFIG_DEVPTS_FS_SECURITY
-	&devpts_xattr_security_handler,
-#endif
-	NULL
-};
-
-static struct inode_operations devpts_file_inode_operations = {
-#ifdef CONFIG_DEVPTS_FS_XATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
-#endif
-};
-
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
 
@@ -102,7 +83,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	s->s_blocksize_bits = 10;
 	s->s_magic = DEVPTS_SUPER_MAGIC;
 	s->s_op = &devpts_sops;
-	s->s_xattr = devpts_xattr_handlers;
 	s->s_time_gran = 1;
 
 	inode = new_inode(s);
@@ -175,7 +155,6 @@ int devpts_pty_new(struct tty_struct *tty)
 	inode->i_gid = config.setgid ? config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	init_special_inode(inode, S_IFCHR|config.mode, device);
-	inode->i_op = &devpts_file_inode_operations;
 	inode->u.generic_ip = tty;
 
 	dentry = get_node(number);
diff --git a/fs/devpts/xattr_security.c b/fs/devpts/xattr_security.c
deleted file mode 100644
index 864cb5c79ba..00000000000
--- a/fs/devpts/xattr_security.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Security xattr support for devpts.
- *
- * Author: Stephen Smalley <sds@epoch.ncsc.mil>
- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/security.h>
-#include <linux/xattr.h>
-
-static size_t
-devpts_xattr_security_list(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
-{
-	return security_inode_listsecurity(inode, list, list_len);
-}
-
-static int
-devpts_xattr_security_get(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return security_inode_getsecurity(inode, name, buffer, size);
-}
-
-static int
-devpts_xattr_security_set(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return security_inode_setsecurity(inode, name, value, size, flags);
-}
-
-struct xattr_handler devpts_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.list	= devpts_xattr_security_list,
-	.get	= devpts_xattr_security_get,
-	.set	= devpts_xattr_security_set,
-};
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0d06097bc99..3931e7f1e6b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,6 +162,7 @@ static int dio_refill_pages(struct dio *dio)
 	up_read(&current->mm->mmap_sem);
 
 	if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
+		struct page *page = ZERO_PAGE(dio->curr_user_address);
 		/*
 		 * A memory fault, but the filesystem has some outstanding
 		 * mapped blocks.  We need to use those blocks up to avoid
@@ -169,7 +170,8 @@ static int dio_refill_pages(struct dio *dio)
 		 */
 		if (dio->page_errors == 0)
 			dio->page_errors = ret;
-		dio->pages[0] = ZERO_PAGE(dio->curr_user_address);
+		page_cache_get(page);
+		dio->pages[0] = page;
 		dio->head = 0;
 		dio->tail = 1;
 		ret = 0;
diff --git a/fs/dquot.c b/fs/dquot.c
index b9732335bcd..ea7644227a6 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -500,7 +500,7 @@ static void prune_dqcache(int count)
  * more memory
  */
 
-static int shrink_dqcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		spin_lock(&dq_list_lock);
@@ -662,7 +662,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 restart:
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
-		struct file *filp = list_entry(p, struct file, f_list);
+		struct file *filp = list_entry(p, struct file, f_u.fu_list);
 		struct inode *inode = filp->f_dentry->d_inode;
 		if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
 			struct dentry *dentry = dget(filp->f_dentry);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 6ab1dd0ca90..4284cd31eba 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,6 +101,10 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
+/* Maximum msec timeout value storeable in a long int */
+#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+
+
 struct epoll_filefd {
 	struct file *file;
 	int fd;
@@ -231,8 +235,9 @@ struct ep_pqueue {
 
 static void ep_poll_safewake_init(struct poll_safewake *psw);
 static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
-static int ep_file_init(struct file *file);
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+		    struct eventpoll *ep);
+static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
 static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
@@ -501,38 +506,37 @@ void eventpoll_release_file(struct file *file)
 asmlinkage long sys_epoll_create(int size)
 {
 	int error, fd;
+	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
 		     current, size));
 
-	/* Sanity check on the size parameter */
+	/*
+	 * Sanity check on the size parameter, and create the internal data
+	 * structure ( "struct eventpoll" ).
+	 */
 	error = -EINVAL;
-	if (size <= 0)
+	if (size <= 0 || (error = ep_alloc(&ep)) != 0)
 		goto eexit_1;
 
 	/*
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure, and inode and a free file descriptor.
 	 */
-	error = ep_getfd(&fd, &inode, &file);
-	if (error)
-		goto eexit_1;
-
-	/* Setup the file internal data structure ( "struct eventpoll" ) */
-	error = ep_file_init(file);
+	error = ep_getfd(&fd, &inode, &file, ep);
 	if (error)
 		goto eexit_2;
 
-
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
 		     current, size, fd));
 
 	return fd;
 
 eexit_2:
-	sys_close(fd);
+	ep_free(ep);
+	kfree(ep);
 eexit_1:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
 		     current, size, error));
@@ -706,7 +710,8 @@ eexit_1:
 /*
  * Creates the file descriptor to be used by the epoll interface.
  */
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+		    struct eventpoll *ep)
 {
 	struct qstr this;
 	char name[32];
@@ -756,7 +761,7 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
 	file->f_op = &eventpoll_fops;
 	file->f_mode = FMODE_READ;
 	file->f_version = 0;
-	file->private_data = NULL;
+	file->private_data = ep;
 
 	/* Install the new setup file into the allocated fd. */
 	fd_install(fd, file);
@@ -777,14 +782,13 @@ eexit_1:
 }
 
 
-static int ep_file_init(struct file *file)
+static int ep_alloc(struct eventpoll **pep)
 {
-	struct eventpoll *ep;
+	struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
 
-	if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
+	if (!ep)
 		return -ENOMEM;
 
-	memset(ep, 0, sizeof(*ep));
 	rwlock_init(&ep->lock);
 	init_rwsem(&ep->sem);
 	init_waitqueue_head(&ep->wq);
@@ -792,9 +796,9 @@ static int ep_file_init(struct file *file)
 	INIT_LIST_HEAD(&ep->rdllist);
 	ep->rbr = RB_ROOT;
 
-	file->private_data = ep;
+	*pep = ep;
 
-	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
+	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
 		     current, ep));
 	return 0;
 }
@@ -1506,8 +1510,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	 * and the overflow condition. The passed timeout is in milliseconds,
 	 * that why (t * HZ) / 1000.
 	 */
-	jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
-		MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
+	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
+		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
 
 retry:
 	write_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d..10d493fea7c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	nd.intent.open.flags = FMODE_READ;
-	error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	if (error)
 		goto out;
 
@@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (error)
 		goto exit;
 
-	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+	file = nameidata_to_filp(&nd, O_RDONLY);
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out;
@@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library)
 out:
   	return error;
 exit:
+	release_open_intent(&nd);
 	path_release(&nd);
 	goto out;
 }
@@ -309,40 +309,36 @@ void install_arg_page(struct vm_area_struct *vma,
 	pud_t * pud;
 	pmd_t * pmd;
 	pte_t * pte;
+	spinlock_t *ptl;
 
 	if (unlikely(anon_vma_prepare(vma)))
-		goto out_sig;
+		goto out;
 
 	flush_dcache_page(page);
 	pgd = pgd_offset(mm, address);
-
-	spin_lock(&mm->page_table_lock);
 	pud = pud_alloc(mm, pgd, address);
 	if (!pud)
 		goto out;
 	pmd = pmd_alloc(mm, pud, address);
 	if (!pmd)
 		goto out;
-	pte = pte_alloc_map(mm, pmd, address);
+	pte = pte_alloc_map_lock(mm, pmd, address, &ptl);
 	if (!pte)
 		goto out;
 	if (!pte_none(*pte)) {
-		pte_unmap(pte);
+		pte_unmap_unlock(pte, ptl);
 		goto out;
 	}
-	inc_mm_counter(mm, rss);
+	inc_mm_counter(mm, anon_rss);
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
 	page_add_anon_rmap(page, vma, address);
-	pte_unmap(pte);
-	spin_unlock(&mm->page_table_lock);
+	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
 	return;
 out:
-	spin_unlock(&mm->page_table_lock);
-out_sig:
 	__free_page(page);
 	force_sig(SIGKILL, current);
 }
@@ -421,11 +417,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	if (!mpnt)
 		return -ENOMEM;
 
-	if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
-		kmem_cache_free(vm_area_cachep, mpnt);
-		return -ENOMEM;
-	}
-
 	memset(mpnt, 0, sizeof(*mpnt));
 
 	down_write(&mm->mmap_sem);
@@ -495,8 +486,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	nd.intent.open.flags = FMODE_READ;
-	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	file = ERR_PTR(err);
 
 	if (!err) {
@@ -509,7 +499,7 @@ struct file *open_exec(const char *name)
 				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
-				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+				file = nameidata_to_filp(&nd, O_RDONLY);
 				if (!IS_ERR(file)) {
 					err = deny_write_access(file);
 					if (err) {
@@ -521,6 +511,7 @@ out:
 				return file;
 			}
 		}
+		release_open_intent(&nd);
 		path_release(&nd);
 	}
 	goto out;
@@ -639,10 +630,9 @@ static inline int de_thread(struct task_struct *tsk)
 	/*
 	 * Account for the thread group leader hanging around:
 	 */
-	count = 2;
-	if (thread_group_leader(current))
-		count = 1;
-	else {
+	count = 1;
+	if (!thread_group_leader(current)) {
+		count = 2;
 		/*
 		 * The SIGALRM timer survives the exec, but needs to point
 		 * at us as the new group leader now.  We have a race with
@@ -651,8 +641,10 @@ static inline int de_thread(struct task_struct *tsk)
 		 * before we can safely let the old group leader die.
 		 */
 		sig->real_timer.data = (unsigned long)current;
+		spin_unlock_irq(lock);
 		if (del_timer_sync(&sig->real_timer))
 			add_timer(&sig->real_timer);
+		spin_lock_irq(lock);
 	}
 	while (atomic_read(&sig->count) > count) {
 		sig->group_exit_task = current;
@@ -664,7 +656,6 @@ static inline int de_thread(struct task_struct *tsk)
 	}
 	sig->group_exit_task = NULL;
 	sig->notify_count = 0;
-	sig->real_timer.data = (unsigned long)current;
 	spin_unlock_irq(lock);
 
 	/*
@@ -745,8 +736,8 @@ static inline int de_thread(struct task_struct *tsk)
         }
 
 	/*
-	 * Now there are really no other threads at all,
-	 * so it's safe to stop telling them to kill themselves.
+	 * There may be one thread left which is just exiting,
+	 * but it's safe to stop telling the group to kill themselves.
 	 */
 	sig->flags = 0;
 
@@ -785,7 +776,6 @@ no_thread_group:
 			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
-	BUG_ON(!thread_group_empty(current));
 	BUG_ON(!thread_group_leader(current));
 	return 0;
 }
@@ -798,6 +788,7 @@ no_thread_group:
 static inline void flush_old_files(struct files_struct * files)
 {
 	long j = -1;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
 	for (;;) {
@@ -805,12 +796,13 @@ static inline void flush_old_files(struct files_struct * files)
 
 		j++;
 		i = j * __NFDBITS;
-		if (i >= files->max_fds || i >= files->max_fdset)
+		fdt = files_fdtable(files);
+		if (i >= fdt->max_fds || i >= fdt->max_fdset)
 			break;
-		set = files->close_on_exec->fds_bits[j];
+		set = fdt->close_on_exec->fds_bits[j];
 		if (!set)
 			continue;
-		files->close_on_exec->fds_bits[j] = 0;
+		fdt->close_on_exec->fds_bits[j] = 0;
 		spin_unlock(&files->file_lock);
 		for ( ; set ; i++,set >>= 1) {
 			if (set & 1) {
@@ -1211,7 +1203,6 @@ int do_execve(char * filename,
 		/* execve success */
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
-		update_mem_hiwater(current);
 		kfree(bprm);
 		return retval;
 	}
@@ -1426,19 +1417,16 @@ static void zap_threads (struct mm_struct *mm)
 static void coredump_wait(struct mm_struct *mm)
 {
 	DECLARE_COMPLETION(startup_done);
+	int core_waiters;
 
-	mm->core_waiters++; /* let other threads block */
 	mm->core_startup_done = &startup_done;
 
-	/* give other threads a chance to run: */
-	yield();
-
 	zap_threads(mm);
-	if (--mm->core_waiters) {
-		up_write(&mm->mmap_sem);
+	core_waiters = mm->core_waiters;
+	up_write(&mm->mmap_sem);
+
+	if (core_waiters)
 		wait_for_completion(&startup_done);
-	} else
-		up_write(&mm->mmap_sem);
 	BUG_ON(mm->core_waiters);
 }
 
@@ -1472,11 +1460,21 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		current->fsuid = 0;	/* Dump root private */
 	}
 	mm->dumpable = 0;
-	init_completion(&mm->core_done);
+
+	retval = -EAGAIN;
 	spin_lock_irq(&current->sighand->siglock);
-	current->signal->flags = SIGNAL_GROUP_EXIT;
-	current->signal->group_exit_code = exit_code;
+	if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
+		current->signal->flags = SIGNAL_GROUP_EXIT;
+		current->signal->group_exit_code = exit_code;
+		retval = 0;
+	}
 	spin_unlock_irq(&current->sighand->siglock);
+	if (retval) {
+		up_write(&mm->mmap_sem);
+		goto fail;
+	}
+
+	init_completion(&mm->core_done);
 	coredump_wait(mm);
 
 	/*
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c..e2d6208633a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -605,22 +605,28 @@ got:
 	insert_inode_hash(inode);
 
 	if (DQUOT_ALLOC_INODE(inode)) {
-		DQUOT_DROP(inode);
 		err = -ENOSPC;
-		goto fail2;
+		goto fail_drop;
 	}
+
 	err = ext2_init_acl(inode, dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
-	}
+	if (err)
+		goto fail_free_drop;
+
+	err = ext2_init_security(inode,dir);
+	if (err)
+		goto fail_free_drop;
+
 	mark_inode_dirty(inode);
 	ext2_debug("allocating inode %lu\n", inode->i_ino);
 	ext2_preread_inode(inode);
 	return inode;
 
-fail2:
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c659..e7d3f0522d0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
  */
 void ext2_delete_inode (struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
@@ -438,6 +440,10 @@ static int ext2_alloc_branch(struct inode *inode,
 		 * the pointer to new one, then send parent to disk.
 		 */
 		bh = sb_getblk(inode->i_sb, parent);
+		if (!bh) {
+			err = -EIO;
+			break;
+		}
 		lock_buffer(bh);
 		memset(bh->b_data, 0, blocksize);
 		branch[n].bh = bh;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index dcfe331dc4c..3c0c7c6a5b4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -19,6 +19,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -27,6 +28,8 @@
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
 #include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
 #include <asm/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
@@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode)
 #endif
 }
 
+static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb);
+
+	if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
+		seq_puts(seq, ",grpid");
+	else
+		seq_puts(seq, ",nogrpid");
+
+#if defined(CONFIG_QUOTA)
+	if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
+		seq_puts(seq, ",usrquota");
+
+	if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
+		seq_puts(seq, ",grpquota");
+#endif
+
+	return 0;
+}
+
 #ifdef CONFIG_QUOTA
 static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
 static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
@@ -218,6 +241,7 @@ static struct super_operations ext2_sops = {
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
 	.clear_inode	= ext2_clear_inode,
+	.show_options	= ext2_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext2_quota_read,
 	.quota_write	= ext2_quota_write,
@@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data)
 
 enum {
 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
-	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
-	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
-	Opt_ignore, Opt_err,
+	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
+	Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug,
+	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
+	Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+	Opt_usrquota, Opt_grpquota
 };
 
 static match_table_t tokens = {
@@ -288,10 +313,10 @@ static match_table_t tokens = {
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
 	{Opt_xip, "xip"},
-	{Opt_ignore, "grpquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_ignore, "noquota"},
-	{Opt_ignore, "quota"},
-	{Opt_ignore, "usrquota"},
+	{Opt_quota, "quota"},
+	{Opt_usrquota, "usrquota"},
 	{Opt_err, NULL}
 };
 
@@ -406,6 +431,26 @@ static int parse_options (char * options,
 			printk("EXT2 xip option not supported\n");
 #endif
 			break;
+
+#if defined(CONFIG_QUOTA)
+		case Opt_quota:
+		case Opt_usrquota:
+			set_opt(sbi->s_mount_opt, USRQUOTA);
+			break;
+
+		case Opt_grpquota:
+			set_opt(sbi->s_mount_opt, GRPQUOTA);
+			break;
+#else
+		case Opt_quota:
+		case Opt_usrquota:
+		case Opt_grpquota:
+			printk(KERN_ERR
+				"EXT2-fs: quota operations not supported.\n");
+
+			break;
+#endif
+
 		case Opt_ignore:
 			break;
 		default:
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 9f7bac01d55..1e67d87cfa9 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -21,11 +21,11 @@
 #include "xattr.h"
 #include <linux/namei.h>
 
-static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
 	nd_set_link(nd, (char *)ei->i_data);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations ext2_symlink_inode_operations = {
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b81..67cfeb66e89 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
 
 # endif  /* CONFIG_EXT2_FS_XATTR */
 
+#ifdef CONFIG_EXT2_FS_SECURITY
+extern int ext2_init_security(struct inode *inode, struct inode *dir);
+#else
+static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe59..a2661279847 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
 #include <linux/ext2_fs.h>
+#include <linux/security.h>
 #include "xattr.h"
 
 static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
 			      value, size, flags);
 }
 
+int
+ext2_init_security(struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
+			     name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
 struct xattr_handler ext2_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
 	.list	= ext2_xattr_security_list,
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index e463dca008e..7992d21e0e0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,8 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
+#include "bitmap.h"
+
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -1010,7 +1012,7 @@ retry:
  * allocation within the reservation window.
  *
  * This will avoid keeping on searching the reservation list again and
- * again when someboday is looking for a free block (without
+ * again when somebody is looking for a free block (without
  * reservation), and there are lots of free blocks, but they are all
  * being reserved.
  *
@@ -1410,18 +1412,19 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	unsigned long desc_count;
 	struct ext3_group_desc *gdp;
 	int i;
-	unsigned long ngroups;
+	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
 #ifdef EXT3FS_DEBUG
 	struct ext3_super_block *es;
 	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	lock_super(sb);
 	es = EXT3_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+
+	smp_rmb();
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1439,11 +1442,9 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	brelse(bitmap_bh);
 	printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n",
 	       le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
-	unlock_super(sb);
 	return bitmap_count;
 #else
 	desc_count = 0;
-	ngroups = EXT3_SB(sb)->s_groups_count;
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 6c419b9ab0e..5b4ba3e246e 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/buffer_head.h>
-
+#include "bitmap.h"
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
diff --git a/fs/ext3/bitmap.h b/fs/ext3/bitmap.h
new file mode 100644
index 00000000000..6ee503a6bb4
--- /dev/null
+++ b/fs/ext3/bitmap.h
@@ -0,0 +1,8 @@
+/*  linux/fs/ext3/bitmap.c
+ *
+ * Copyright (C) 2005 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+*/
+
+extern unsigned long ext3_count_free (struct buffer_head *, unsigned int );
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ed..df3f517c54a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -26,6 +26,7 @@
 
 #include <asm/byteorder.h>
 
+#include "bitmap.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -597,22 +598,22 @@ got:
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
-		DQUOT_DROP(inode);
 		err = -EDQUOT;
-		goto fail2;
+		goto fail_drop;
 	}
+
 	err = ext3_init_acl(handle, inode, dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
-  	}
+	if (err)
+		goto fail_free_drop;
+
+	err = ext3_init_security(handle,inode, dir);
+	if (err)
+		goto fail_free_drop;
+
 	err = ext3_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext3_std_error(sb, err);
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
+		goto fail_free_drop;
 	}
 
 	ext3_debug("allocating inode %lu\n", inode->i_ino);
@@ -626,7 +627,11 @@ really_out:
 	brelse(bitmap_bh);
 	return ret;
 
-fail2:
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
@@ -700,7 +705,6 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
@@ -723,7 +727,6 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 	brelse(bitmap_bh);
 	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
 		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
-	unlock_super(sb);
 	return desc_count;
 #else
 	desc_count = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5..5d9b00e2883 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 
@@ -489,7 +491,7 @@ static unsigned long ext3_find_goal(struct inode *inode, long block,
  *	the same format as ext3_get_branch() would do. We are calling it after
  *	we had read the existing part of chain and partial points to the last
  *	triple of that (one with zero ->key). Upon the exit we have the same
- *	picture as after the successful ext3_get_block(), excpet that in one
+ *	picture as after the successful ext3_get_block(), except that in one
  *	place chain is disconnected - *branch->p is still zero (we did not
  *	set the last link), but branch->key contains the number that should
  *	be placed into *branch->p to fill that gap.
@@ -521,7 +523,6 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 			if (!nr)
 				break;
 			branch[n].key = cpu_to_le32(nr);
-			keys = n+1;
 
 			/*
 			 * Get buffer_head for parent block, zero it out
@@ -529,6 +530,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 			 * parent to disk.  
 			 */
 			bh = sb_getblk(inode->i_sb, parent);
+			if (!bh)
+				break;
+			keys = n+1;
 			branch[n].bh = bh;
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
@@ -862,6 +866,10 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 	if (!*errp && buffer_mapped(&dummy)) {
 		struct buffer_head *bh;
 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+		if (!bh) {
+			*errp = -EIO;
+			goto err;
+		}
 		if (buffer_new(&dummy)) {
 			J_ASSERT(create != 0);
 			J_ASSERT(handle != 0);
@@ -894,6 +902,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
 		}
 		return bh;
 	}
+err:
 	return NULL;
 }
 
@@ -1432,7 +1441,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
 	return journal_invalidatepage(journal, page, offset);
 }
 
-static int ext3_releasepage(struct page *page, int wait)
+static int ext3_releasepage(struct page *page, gfp_t wait)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 50378d8ff84..b3c690a3b54 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,8 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
+
+#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
 
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
new file mode 100644
index 00000000000..f2ce2b0065c
--- /dev/null
+++ b/fs/ext3/namei.h
@@ -0,0 +1,8 @@
+/*  linux/fs/ext3/namei.h
+ *
+ * Copyright (C) 2005 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+*/
+
+extern struct dentry *ext3_get_parent(struct dentry *child);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 2c9f81278d5..1be78b4b4de 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -118,6 +118,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 	int err;
 
 	bh = sb_getblk(sb, blk);
+	if (!bh)
+		return ERR_PTR(-EIO);
 	if ((err = ext3_journal_get_write_access(handle, bh))) {
 		brelse(bh);
 		bh = ERR_PTR(err);
@@ -202,6 +204,10 @@ static int setup_new_group_blocks(struct super_block *sb,
 		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
 
 		gdb = sb_getblk(sb, block);
+		if (!gdb) {
+			err = -EIO;
+			goto exit_bh;
+		}
 		if ((err = ext3_journal_get_write_access(handle, gdb))) {
 			brelse(gdb);
 			goto exit_bh;
@@ -242,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 		struct buffer_head *it;
 
-		ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
+		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
 		if (IS_ERR(it = bclean(handle, sb, block))) {
 			err = PTR_ERR(it);
 			goto exit_bh;
@@ -643,8 +649,12 @@ static void update_backups(struct super_block *sb,
 			break;
 
 		bh = sb_getblk(sb, group * bpg + blk_off);
-		ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
-			   bh->b_blocknr);
+		if (!bh) {
+			err = -EIO;
+			break;
+		}
+		ext3_debug("update metadata backup %#04lx\n",
+			  (unsigned long)bh->b_blocknr);
 		if ((err = ext3_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c3c6e399fb..f594989ccb7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -35,9 +35,13 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
+#include <linux/seq_file.h>
+
 #include <asm/uaccess.h>
+
 #include "xattr.h"
 #include "acl.h"
+#include "namei.h"
 
 static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
 static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -509,8 +513,46 @@ static void ext3_clear_inode(struct inode *inode)
 	kfree(rsv);
 }
 
-#ifdef CONFIG_QUOTA
+static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
+{
+#if defined(CONFIG_QUOTA)
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sbi->s_jquota_fmt)
+		seq_printf(seq, ",jqfmt=%s",
+		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+
+	if (sbi->s_qf_names[USRQUOTA])
+		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+
+	if (sbi->s_qf_names[GRPQUOTA])
+		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+		seq_puts(seq, ",usrquota");
 
+	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+		seq_puts(seq, ",grpquota");
+#endif
+}
+
+static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct super_block *sb = vfs->mnt_sb;
+
+	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+		seq_puts(seq, ",data=journal");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+		seq_puts(seq, ",data=ordered");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+		seq_puts(seq, ",data=writeback");
+
+	ext3_show_quota_options(seq, sb);
+
+	return 0;
+}
+
+#ifdef CONFIG_QUOTA
 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 
@@ -569,13 +611,13 @@ static struct super_operations ext3_sops = {
 	.statfs		= ext3_statfs,
 	.remount_fs	= ext3_remount,
 	.clear_inode	= ext3_clear_inode,
+	.show_options	= ext3_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext3_quota_read,
 	.quota_write	= ext3_quota_write,
 #endif
 };
 
-struct dentry *ext3_get_parent(struct dentry *child);
 static struct export_operations ext3_export_ops = {
 	.get_parent = ext3_get_parent,
 };
@@ -590,7 +632,8 @@ enum {
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-	Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
+	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+	Opt_grpquota
 };
 
 static match_table_t tokens = {
@@ -634,10 +677,10 @@ static match_table_t tokens = {
 	{Opt_grpjquota, "grpjquota=%s"},
 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
-	{Opt_quota, "grpquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_noquota, "noquota"},
 	{Opt_quota, "quota"},
-	{Opt_quota, "usrquota"},
+	{Opt_usrquota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
@@ -903,7 +946,13 @@ clear_qf_name:
 			sbi->s_jquota_fmt = QFMT_VFS_V0;
 			break;
 		case Opt_quota:
+		case Opt_usrquota:
 			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, USRQUOTA);
+			break;
+		case Opt_grpquota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, GRPQUOTA);
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_enabled(sb)) {
@@ -912,8 +961,13 @@ clear_qf_name:
 				return 0;
 			}
 			clear_opt(sbi->s_mount_opt, QUOTA);
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
 			break;
 #else
+		case Opt_quota:
+		case Opt_usrquota:
+		case Opt_grpquota:
 		case Opt_usrjquota:
 		case Opt_grpjquota:
 		case Opt_offusrjquota:
@@ -924,7 +978,6 @@ clear_qf_name:
 				"EXT3-fs: journalled quota options not "
 				"supported.\n");
 			break;
-		case Opt_quota:
 		case Opt_noquota:
 			break;
 #endif
@@ -962,14 +1015,38 @@ clear_qf_name:
 		}
 	}
 #ifdef CONFIG_QUOTA
-	if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] ||
-	    sbi->s_qf_names[GRPQUOTA])) {
-		printk(KERN_ERR
-			"EXT3-fs: journalled quota format not specified.\n");
-		return 0;
+	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
+		     sbi->s_qf_names[USRQUOTA])
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+
+		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
+		     sbi->s_qf_names[GRPQUOTA])
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
+
+		if ((sbi->s_qf_names[USRQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
+		    (sbi->s_qf_names[GRPQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
+			printk(KERN_ERR "EXT3-fs: old and new quota "
+					"format mixing.\n");
+			return 0;
+		}
+
+		if (!sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"not specified.\n");
+			return 0;
+		}
+	} else {
+		if (sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"specified with no journalling "
+					"enabled.\n");
+			return 0;
+		}
 	}
 #endif
-
 	return 1;
 }
 
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 8c3e72818fb..4f79122cde6 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -23,11 +23,11 @@
 #include <linux/namei.h>
 #include "xattr.h"
 
-static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
 	nd_set_link(nd, (char*)ei->i_data);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations ext3_symlink_inode_operations = {
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 269c7b92db9..430de9f63be 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -210,7 +210,7 @@ ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
 	return cmp ? -ENODATA : 0;
 }
 
-int
+static int
 ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
@@ -354,7 +354,7 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 	return buffer_size - rest;
 }
 
-int
+static int
 ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
@@ -626,7 +626,7 @@ struct ext3_xattr_block_find {
 	struct buffer_head *bh;
 };
 
-int
+static int
 ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
 		      struct ext3_xattr_block_find *bs)
 {
@@ -859,7 +859,7 @@ struct ext3_xattr_ibody_find {
 	struct ext3_iloc iloc;
 };
 
-int
+static int
 ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
 		      struct ext3_xattr_ibody_find *is)
 {
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82d..2ceae38f3d4 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
 #define ext3_xattr_handlers	NULL
 
 # endif  /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e..b9c40c15647 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
 #include <linux/smp_lock.h>
 #include <linux/ext3_jbd.h>
 #include <linux/ext3_fs.h>
+#include <linux/security.h>
 #include "xattr.h"
 
 static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
 			      value, size, flags);
 }
 
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+				    name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
 struct xattr_handler ext3_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
 	.list	= ext3_xattr_security_list,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index e5ae1b720dd..ba824964b9b 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -30,6 +30,29 @@ static inline loff_t fat_make_i_pos(struct super_block *sb,
 		| (de - (struct msdos_dir_entry *)bh->b_data);
 }
 
+static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
+				     sector_t phys)
+{
+	struct super_block *sb = dir->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	struct buffer_head *bh;
+	int sec;
+
+	/* This is not a first sector of cluster, or sec_per_clus == 1 */
+	if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
+		return;
+	/* root dir of FAT12/FAT16 */
+	if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+		return;
+
+	bh = sb_getblk(sb, phys);
+	if (bh && !buffer_uptodate(bh)) {
+		for (sec = 0; sec < sbi->sec_per_clus; sec++)
+			sb_breadahead(sb, phys + sec);
+	}
+	brelse(bh);
+}
+
 /* Returns the inode number of the directory entry at offset pos. If bh is
    non-NULL, it is brelse'd before. Pos is incremented. The buffer header is
    returned in bh.
@@ -58,6 +81,8 @@ next:
 	if (err || !phys)
 		return -1;	/* beyond EOF or error */
 
+	fat_dir_readahead(dir, iblock, phys);
+
 	*bh = sb_bread(sb, phys);
 	if (*bh == NULL) {
 		printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
@@ -197,6 +222,80 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
 	return len;
 }
 
+enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
+
+/**
+ * fat_parse_long - Parse extended directory entry.
+ *
+ * This function returns zero on success, negative value on error, or one of
+ * the following:
+ *
+ * %PARSE_INVALID - Directory entry is invalid.
+ * %PARSE_NOT_LONGNAME - Directory entry does not contain longname.
+ * %PARSE_EOF - Directory has no more entries.
+ */
+static int fat_parse_long(struct inode *dir, loff_t *pos,
+			  struct buffer_head **bh, struct msdos_dir_entry **de,
+			  wchar_t **unicode, unsigned char *nr_slots)
+{
+	struct msdos_dir_slot *ds;
+	unsigned char id, slot, slots, alias_checksum;
+
+	if (!*unicode) {
+		*unicode = (wchar_t *)__get_free_page(GFP_KERNEL);
+		if (!*unicode) {
+			brelse(*bh);
+			return -ENOMEM;
+		}
+	}
+parse_long:
+	slots = 0;
+	ds = (struct msdos_dir_slot *)*de;
+	id = ds->id;
+	if (!(id & 0x40))
+		return PARSE_INVALID;
+	slots = id & ~0x40;
+	if (slots > 20 || !slots)	/* ceil(256 * 2 / 26) */
+		return PARSE_INVALID;
+	*nr_slots = slots;
+	alias_checksum = ds->alias_checksum;
+
+	slot = slots;
+	while (1) {
+		int offset;
+
+		slot--;
+		offset = slot * 13;
+		fat16_towchar(*unicode + offset, ds->name0_4, 5);
+		fat16_towchar(*unicode + offset + 5, ds->name5_10, 6);
+		fat16_towchar(*unicode + offset + 11, ds->name11_12, 2);
+
+		if (ds->id & 0x40)
+			(*unicode)[offset + 13] = 0;
+		if (fat_get_entry(dir, pos, bh, de) < 0)
+			return PARSE_EOF;
+		if (slot == 0)
+			break;
+		ds = (struct msdos_dir_slot *)*de;
+		if (ds->attr != ATTR_EXT)
+			return PARSE_NOT_LONGNAME;
+		if ((ds->id & ~0x40) != slot)
+			goto parse_long;
+		if (ds->alias_checksum != alias_checksum)
+			goto parse_long;
+	}
+	if ((*de)->name[0] == DELETED_FLAG)
+		return PARSE_INVALID;
+	if ((*de)->attr == ATTR_EXT)
+		goto parse_long;
+	if (IS_FREE((*de)->name) || ((*de)->attr & ATTR_VOLUME))
+		return PARSE_INVALID;
+	if (fat_checksum((*de)->name) != alias_checksum)
+		*nr_slots = 0;
+
+	return 0;
+}
+
 /*
  * Return values: negative -> error, 0 -> not found, positive -> found,
  * value is the total amount of slots, including the shortname entry.
@@ -234,68 +333,16 @@ parse_record:
 		if (de->attr != ATTR_EXT && IS_FREE(de->name))
 			continue;
 		if (de->attr == ATTR_EXT) {
-			struct msdos_dir_slot *ds;
-			unsigned char id;
-			unsigned char slot;
-			unsigned char slots;
-			unsigned char sum;
-			unsigned char alias_checksum;
-
-			if (!unicode) {
-				unicode = (wchar_t *)
-					__get_free_page(GFP_KERNEL);
-				if (!unicode) {
-					brelse(bh);
-					return -ENOMEM;
-				}
-			}
-parse_long:
-			slots = 0;
-			ds = (struct msdos_dir_slot *) de;
-			id = ds->id;
-			if (!(id & 0x40))
-				continue;
-			slots = id & ~0x40;
-			if (slots > 20 || !slots)	/* ceil(256 * 2 / 26) */
+			int status = fat_parse_long(inode, &cpos, &bh, &de,
+						    &unicode, &nr_slots);
+			if (status < 0)
+				return status;
+			else if (status == PARSE_INVALID)
 				continue;
-			nr_slots = slots;
-			alias_checksum = ds->alias_checksum;
-
-			slot = slots;
-			while (1) {
-				int offset;
-
-				slot--;
-				offset = slot * 13;
-				fat16_towchar(unicode + offset, ds->name0_4, 5);
-				fat16_towchar(unicode + offset + 5, ds->name5_10, 6);
-				fat16_towchar(unicode + offset + 11, ds->name11_12, 2);
-
-				if (ds->id & 0x40) {
-					unicode[offset + 13] = 0;
-				}
-				if (fat_get_entry(inode, &cpos, &bh, &de) < 0)
-					goto EODir;
-				if (slot == 0)
-					break;
-				ds = (struct msdos_dir_slot *) de;
-				if (ds->attr !=  ATTR_EXT)
-					goto parse_record;
-				if ((ds->id & ~0x40) != slot)
-					goto parse_long;
-				if (ds->alias_checksum != alias_checksum)
-					goto parse_long;
-			}
-			if (de->name[0] == DELETED_FLAG)
-				continue;
-			if (de->attr ==  ATTR_EXT)
-				goto parse_long;
-			if (IS_FREE(de->name) || (de->attr & ATTR_VOLUME))
-				continue;
-			for (sum = 0, i = 0; i < 11; i++)
-				sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + de->name[i];
-			if (sum != alias_checksum)
-				nr_slots = 0;
+			else if (status == PARSE_NOT_LONGNAME)
+				goto parse_record;
+			else if (status == PARSE_EOF)
+				goto EODir;
 		}
 
 		memcpy(work, de->name, sizeof(de->name));
@@ -383,8 +430,8 @@ struct fat_ioctl_filldir_callback {
 	int short_len;
 };
 
-static int fat_readdirx(struct inode *inode, struct file *filp, void *dirent,
-			filldir_t filldir, int short_only, int both)
+static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
+			 filldir_t filldir, int short_only, int both)
 {
 	struct super_block *sb = inode->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -433,9 +480,10 @@ static int fat_readdirx(struct inode *inode, struct file *filp, void *dirent,
 
 	bh = NULL;
 GetNew:
-	long_slots = 0;
 	if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
 		goto EODir;
+parse_record:
+	long_slots = 0;
 	/* Check for long filename entry */
 	if (isvfat) {
 		if (de->name[0] == DELETED_FLAG)
@@ -450,69 +498,18 @@ GetNew:
 	}
 
 	if (isvfat && de->attr == ATTR_EXT) {
-		struct msdos_dir_slot *ds;
-		unsigned char id;
-		unsigned char slot;
-		unsigned char slots;
-		unsigned char sum;
-		unsigned char alias_checksum;
-
-		if (!unicode) {
-			unicode = (wchar_t *)__get_free_page(GFP_KERNEL);
-			if (!unicode) {
-				filp->f_pos = cpos;
-				brelse(bh);
-				ret = -ENOMEM;
-				goto out;
-			}
-		}
-ParseLong:
-		slots = 0;
-		ds = (struct msdos_dir_slot *) de;
-		id = ds->id;
-		if (!(id & 0x40))
-			goto RecEnd;
-		slots = id & ~0x40;
-		if (slots > 20 || !slots)	/* ceil(256 * 2 / 26) */
+		int status = fat_parse_long(inode, &cpos, &bh, &de,
+					    &unicode, &long_slots);
+		if (status < 0) {
+			filp->f_pos = cpos;
+			ret = status;
+			goto out;
+		} else if (status == PARSE_INVALID)
 			goto RecEnd;
-		long_slots = slots;
-		alias_checksum = ds->alias_checksum;
-
-		slot = slots;
-		while (1) {
-			int offset;
-
-			slot--;
-			offset = slot * 13;
-			fat16_towchar(unicode + offset, ds->name0_4, 5);
-			fat16_towchar(unicode + offset + 5, ds->name5_10, 6);
-			fat16_towchar(unicode + offset + 11, ds->name11_12, 2);
-
-			if (ds->id & 0x40) {
-				unicode[offset + 13] = 0;
-			}
-			if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-				goto EODir;
-			if (slot == 0)
-				break;
-			ds = (struct msdos_dir_slot *) de;
-			if (ds->attr !=  ATTR_EXT)
-				goto RecEnd;	/* XXX */
-			if ((ds->id & ~0x40) != slot)
-				goto ParseLong;
-			if (ds->alias_checksum != alias_checksum)
-				goto ParseLong;
-		}
-		if (de->name[0] == DELETED_FLAG)
-			goto RecEnd;
-		if (de->attr ==  ATTR_EXT)
-			goto ParseLong;
-		if (IS_FREE(de->name) || (de->attr & ATTR_VOLUME))
-			goto RecEnd;
-		for (sum = 0, i = 0; i < 11; i++)
-			sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + de->name[i];
-		if (sum != alias_checksum)
-			long_slots = 0;
+		else if (status == PARSE_NOT_LONGNAME)
+			goto parse_record;
+		else if (status == PARSE_EOF)
+			goto EODir;
 	}
 
 	if (sbi->options.dotsOK) {
@@ -635,8 +632,7 @@ RecEnd:
 EODir:
 	filp->f_pos = cpos;
 FillFailed:
-	if (bh)
-		brelse(bh);
+	brelse(bh);
 	if (unicode)
 		free_page((unsigned long)unicode);
 out:
@@ -647,7 +643,7 @@ out:
 static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
-	return fat_readdirx(inode, filp, dirent, filldir, 0, 0);
+	return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
 }
 
 static int fat_ioctl_filldir(void *__buf, const char *name, int name_len,
@@ -736,8 +732,8 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 	down(&inode->i_sem);
 	ret = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
-		ret = fat_readdirx(inode, filp, &buf, fat_ioctl_filldir,
-				   short_only, both);
+		ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir,
+				    short_only, both);
 	}
 	up(&inode->i_sem);
 	if (ret >= 0)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 62ffa913940..7134403d5be 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -12,39 +12,6 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 
-static ssize_t fat_file_aio_write(struct kiocb *iocb, const char __user *buf,
-				  size_t count, loff_t pos)
-{
-	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
-	int retval;
-
-	retval = generic_file_aio_write(iocb, buf, count, pos);
-	if (retval > 0) {
-		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-		mark_inode_dirty(inode);
-//		check the locking rules
-//		if (IS_SYNC(inode))
-//			fat_sync_inode(inode);
-	}
-	return retval;
-}
-
-static ssize_t fat_file_writev(struct file *filp, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t *ppos)
-{
-	struct inode *inode = filp->f_dentry->d_inode;
-	int retval;
-
-	retval = generic_file_writev(filp, iov, nr_segs, ppos);
-	if (retval > 0) {
-		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-		mark_inode_dirty(inode);
-	}
-	return retval;
-}
-
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
 {
@@ -148,9 +115,9 @@ struct file_operations fat_file_operations = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.readv		= generic_file_readv,
-	.writev		= fat_file_writev,
+	.writev		= generic_file_writev,
 	.aio_read	= generic_file_aio_read,
-	.aio_write	= fat_file_aio_write,
+	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.ioctl		= fat_generic_ioctl,
 	.fsync		= file_fsync,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eb..e2effe2dc9b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -102,6 +102,19 @@ static int fat_prepare_write(struct file *file, struct page *page,
 				  &MSDOS_I(page->mapping->host)->mmu_private);
 }
 
+static int fat_commit_write(struct file *file, struct page *page,
+			    unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	int err = generic_commit_write(file, page, from, to);
+	if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+		mark_inode_dirty(inode);
+	}
+	return err;
+}
+
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping, block, fat_get_block);
@@ -112,7 +125,7 @@ static struct address_space_operations fat_aops = {
 	.writepage	= fat_writepage,
 	.sync_page	= block_sync_page,
 	.prepare_write	= fat_prepare_write,
-	.commit_write	= generic_commit_write,
+	.commit_write	= fat_commit_write,
 	.bmap		= _fat_bmap
 };
 
@@ -287,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blksize = sbi->cluster_size;
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+	inode->i_mtime.tv_sec =
 		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
-	inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
@@ -297,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 			date_dos2unix(le16_to_cpu(de->ctime),
 				      le16_to_cpu(de->cdate)) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
+		inode->i_atime.tv_sec =
+			date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate));
+		inode->i_atime.tv_nsec = 0;
 	} else
-		inode->i_ctime = inode->i_mtime;
+		inode->i_ctime = inode->i_atime = inode->i_mtime;
 
 	return 0;
 }
@@ -335,6 +351,8 @@ EXPORT_SYMBOL(fat_build_inode);
 
 static void fat_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (!is_bad_inode(inode)) {
 		inode->i_size = 0;
 		fat_truncate(inode);
@@ -498,7 +516,9 @@ retry:
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
 	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
 	if (sbi->options.isvfat) {
+		__le16 atime;
 		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
+		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc3..863b46e0d78 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
+#include <linux/rcupdate.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -24,21 +25,25 @@
 void fastcall set_close_on_exec(unsigned int fd, int flag)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
 	if (flag)
-		FD_SET(fd, files->close_on_exec);
+		FD_SET(fd, fdt->close_on_exec);
 	else
-		FD_CLR(fd, files->close_on_exec);
+		FD_CLR(fd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 }
 
 static inline int get_close_on_exec(unsigned int fd)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	int res;
-	spin_lock(&files->file_lock);
-	res = FD_ISSET(fd, files->close_on_exec);
-	spin_unlock(&files->file_lock);
+	rcu_read_lock();
+	fdt = files_fdtable(files);
+	res = FD_ISSET(fd, fdt->close_on_exec);
+	rcu_read_unlock();
 	return res;
 }
 
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files,
 	unsigned int newfd;
 	unsigned int start;
 	int error;
+	struct fdtable *fdt;
 
 	error = -EINVAL;
 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 		goto out;
 
 repeat:
+	fdt = files_fdtable(files);
 	/*
 	 * Someone might have closed fd's in the range
-	 * orig_start..files->next_fd
+	 * orig_start..fdt->next_fd
 	 */
 	start = orig_start;
-	if (start < files->next_fd)
-		start = files->next_fd;
+	if (start < fdt->next_fd)
+		start = fdt->next_fd;
 
 	newfd = start;
-	if (start < files->max_fdset) {
-		newfd = find_next_zero_bit(files->open_fds->fds_bits,
-			files->max_fdset, start);
+	if (start < fdt->max_fdset) {
+		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+			fdt->max_fdset, start);
 	}
 	
 	error = -EMFILE;
@@ -89,9 +96,15 @@ repeat:
 	if (error)
 		goto repeat;
 
-	if (start <= files->next_fd)
-		files->next_fd = newfd + 1;
-	
+	/*
+	 * We reacquired files_lock, so we are safe as long as
+	 * we reacquire the fdtable pointer and use it while holding
+	 * the lock, no one can free it during that time.
+	 */
+	fdt = files_fdtable(files);
+	if (start <= fdt->next_fd)
+		fdt->next_fd = newfd + 1;
+
 	error = newfd;
 	
 out:
@@ -101,13 +114,16 @@ out:
 static int dupfd(struct file *file, unsigned int start)
 {
 	struct files_struct * files = current->files;
+	struct fdtable *fdt;
 	int fd;
 
 	spin_lock(&files->file_lock);
 	fd = locate_fd(files, file, start);
 	if (fd >= 0) {
-		FD_SET(fd, files->open_fds);
-		FD_CLR(fd, files->close_on_exec);
+		/* locate_fd() may have expanded fdtable, load the ptr */
+		fdt = files_fdtable(files);
+		FD_SET(fd, fdt->open_fds);
+		FD_CLR(fd, fdt->close_on_exec);
 		spin_unlock(&files->file_lock);
 		fd_install(fd, file);
 	} else {
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 	int err = -EBADF;
 	struct file * file, *tofree;
 	struct files_struct * files = current->files;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 
 	/* Yes. It's a race. In user space. Nothing sane to do */
 	err = -EBUSY;
-	tofree = files->fd[newfd];
-	if (!tofree && FD_ISSET(newfd, files->open_fds))
+	fdt = files_fdtable(files);
+	tofree = fdt->fd[newfd];
+	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
 		goto out_fput;
 
-	files->fd[newfd] = file;
-	FD_SET(newfd, files->open_fds);
-	FD_CLR(newfd, files->close_on_exec);
+	rcu_assign_pointer(fdt->fd[newfd], file);
+	FD_SET(newfd, fdt->open_fds);
+	FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d..fd066b261c7 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
 #include <linux/vmalloc.h>
 #include <linux/file.h>
 #include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+struct fdtable_defer {
+	spinlock_t lock;
+	struct work_struct wq;
+	struct timer_list timer;
+	struct fdtable *next;
+};
+
+/*
+ * We use this list to defer free fdtables that have vmalloced
+ * sets/arrays. By keeping a per-cpu list, we avoid having to embed
+ * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
+ * this per-task structure.
+ */
+static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 
 
 /*
@@ -48,82 +67,139 @@ void free_fd_array(struct file **array, int num)
 		vfree(array);
 }
 
-/*
- * Expand the fd array in the files_struct.  Called with the files
- * spinlock held for write.
- */
+static void __free_fdtable(struct fdtable *fdt)
+{
+	free_fdset(fdt->open_fds, fdt->max_fdset);
+	free_fdset(fdt->close_on_exec, fdt->max_fdset);
+	free_fd_array(fdt->fd, fdt->max_fds);
+	kfree(fdt);
+}
 
-static int expand_fd_array(struct files_struct *files, int nr)
-	__releases(files->file_lock)
-	__acquires(files->file_lock)
+static void fdtable_timer(unsigned long data)
 {
-	struct file **new_fds;
-	int error, nfds;
+	struct fdtable_defer *fddef = (struct fdtable_defer *)data;
 
-	
-	error = -EMFILE;
-	if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
+	spin_lock(&fddef->lock);
+	/*
+	 * If someone already emptied the queue return.
+	 */
+	if (!fddef->next)
 		goto out;
+	if (!schedule_work(&fddef->wq))
+		mod_timer(&fddef->timer, 5);
+out:
+	spin_unlock(&fddef->lock);
+}
 
-	nfds = files->max_fds;
-	spin_unlock(&files->file_lock);
+static void free_fdtable_work(struct fdtable_defer *f)
+{
+	struct fdtable *fdt;
 
-	/* 
-	 * Expand to the max in easy steps, and keep expanding it until
-	 * we have enough for the requested fd array size. 
-	 */
+	spin_lock_bh(&f->lock);
+	fdt = f->next;
+	f->next = NULL;
+	spin_unlock_bh(&f->lock);
+	while(fdt) {
+		struct fdtable *next = fdt->next;
+		__free_fdtable(fdt);
+		fdt = next;
+	}
+}
 
-	do {
-#if NR_OPEN_DEFAULT < 256
-		if (nfds < 256)
-			nfds = 256;
-		else 
-#endif
-		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
-			nfds = PAGE_SIZE / sizeof(struct file *);
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-		}
-	} while (nfds <= nr);
+static void free_fdtable_rcu(struct rcu_head *rcu)
+{
+	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
+	int fdset_size, fdarray_size;
+	struct fdtable_defer *fddef;
 
-	error = -ENOMEM;
-	new_fds = alloc_fd_array(nfds);
-	spin_lock(&files->file_lock);
-	if (!new_fds)
-		goto out;
+	BUG_ON(!fdt);
+	fdset_size = fdt->max_fdset / 8;
+	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
-	/* Copy the existing array and install the new pointer */
-
-	if (nfds > files->max_fds) {
-		struct file **old_fds;
-		int i;
-		
-		old_fds = xchg(&files->fd, new_fds);
-		i = xchg(&files->max_fds, nfds);
-
-		/* Don't copy/clear the array if we are creating a new
-		   fd array for fork() */
-		if (i) {
-			memcpy(new_fds, old_fds, i * sizeof(struct file *));
-			/* clear the remainder of the array */
-			memset(&new_fds[i], 0,
-			       (nfds-i) * sizeof(struct file *)); 
-
-			spin_unlock(&files->file_lock);
-			free_fd_array(old_fds, i);
-			spin_lock(&files->file_lock);
-		}
+	if (fdt->free_files) {
+		/*
+		 * The this fdtable was embedded in the files structure
+		 * and the files structure itself was getting destroyed.
+		 * It is now safe to free the files structure.
+		 */
+		kmem_cache_free(files_cachep, fdt->free_files);
+		return;
+	}
+	if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+		/*
+		 * The fdtable was embedded
+		 */
+		return;
+	}
+	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
+		kfree(fdt->open_fds);
+		kfree(fdt->close_on_exec);
+		kfree(fdt->fd);
+		kfree(fdt);
 	} else {
-		/* Somebody expanded the array while we slept ... */
-		spin_unlock(&files->file_lock);
-		free_fd_array(new_fds, nfds);
-		spin_lock(&files->file_lock);
+		fddef = &get_cpu_var(fdtable_defer_list);
+		spin_lock(&fddef->lock);
+		fdt->next = fddef->next;
+		fddef->next = fdt;
+		/*
+		 * vmallocs are handled from the workqueue context.
+		 * If the per-cpu workqueue is running, then we
+		 * defer work scheduling through a timer.
+		 */
+		if (!schedule_work(&fddef->wq))
+			mod_timer(&fddef->timer, 5);
+		spin_unlock(&fddef->lock);
+		put_cpu_var(fdtable_defer_list);
 	}
-	error = 0;
-out:
-	return error;
+}
+
+void free_fdtable(struct fdtable *fdt)
+{
+	if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
+					fdt->max_fds > NR_OPEN_DEFAULT)
+		call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
+/*
+ * Expand the fdset in the files_struct.  Called with the files spinlock
+ * held for write.
+ */
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+{
+	int i;
+	int count;
+
+	BUG_ON(nfdt->max_fdset < fdt->max_fdset);
+	BUG_ON(nfdt->max_fds < fdt->max_fds);
+	/* Copy the existing tables and install the new pointers */
+
+	i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+	count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+
+	/*
+	 * Don't copy the entire array if the current fdset is
+	 * not yet initialised.
+	 */
+	if (i) {
+		memcpy (nfdt->open_fds, fdt->open_fds,
+						fdt->max_fdset/8);
+		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
+						fdt->max_fdset/8);
+		memset (&nfdt->open_fds->fds_bits[i], 0, count);
+		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
+	}
+
+	/* Don't copy/clear the array if we are creating a new
+	   fd array for fork() */
+	if (fdt->max_fds) {
+		memcpy(nfdt->fd, fdt->fd,
+			fdt->max_fds * sizeof(struct file *));
+		/* clear the remainder of the array */
+		memset(&nfdt->fd[fdt->max_fds], 0,
+		       (nfdt->max_fds - fdt->max_fds) *
+					sizeof(struct file *));
+	}
+	nfdt->next_fd = fdt->next_fd;
 }
 
 /*
@@ -154,26 +230,21 @@ void free_fdset(fd_set *array, int num)
 		vfree(array);
 }
 
-/*
- * Expand the fdset in the files_struct.  Called with the files spinlock
- * held for write.
- */
-static int expand_fdset(struct files_struct *files, int nr)
-	__releases(file->file_lock)
-	__acquires(file->file_lock)
+static struct fdtable *alloc_fdtable(int nr)
 {
-	fd_set *new_openset = NULL, *new_execset = NULL;
-	int error, nfds = 0;
-
-	error = -EMFILE;
-	if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
-		goto out;
+	struct fdtable *fdt = NULL;
+	int nfds = 0;
+  	fd_set *new_openset = NULL, *new_execset = NULL;
+	struct file **new_fds;
 
-	nfds = files->max_fdset;
-	spin_unlock(&files->file_lock);
+	fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+	if (!fdt)
+  		goto out;
+	memset(fdt, 0, sizeof(*fdt));
 
-	/* Expand to the max in easy steps */
-	do {
+	nfds = __FD_SETSIZE;
+  	/* Expand to the max in easy steps */
+  	do {
 		if (nfds < (PAGE_SIZE * 8))
 			nfds = PAGE_SIZE * 8;
 		else {
@@ -183,49 +254,88 @@ static int expand_fdset(struct files_struct *files, int nr)
 		}
 	} while (nfds <= nr);
 
-	error = -ENOMEM;
-	new_openset = alloc_fdset(nfds);
-	new_execset = alloc_fdset(nfds);
-	spin_lock(&files->file_lock);
-	if (!new_openset || !new_execset)
+  	new_openset = alloc_fdset(nfds);
+  	new_execset = alloc_fdset(nfds);
+  	if (!new_openset || !new_execset)
+  		goto out;
+	fdt->open_fds = new_openset;
+	fdt->close_on_exec = new_execset;
+	fdt->max_fdset = nfds;
+
+	nfds = NR_OPEN_DEFAULT;
+	/*
+	 * Expand to the max in easy steps, and keep expanding it until
+	 * we have enough for the requested fd array size.
+	 */
+	do {
+#if NR_OPEN_DEFAULT < 256
+		if (nfds < 256)
+			nfds = 256;
+		else
+#endif
+		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
+			nfds = PAGE_SIZE / sizeof(struct file *);
+		else {
+			nfds = nfds * 2;
+			if (nfds > NR_OPEN)
+				nfds = NR_OPEN;
+  		}
+	} while (nfds <= nr);
+	new_fds = alloc_fd_array(nfds);
+	if (!new_fds)
+		goto out;
+	fdt->fd = new_fds;
+	fdt->max_fds = nfds;
+	fdt->free_files = NULL;
+	return fdt;
+out:
+  	if (new_openset)
+  		free_fdset(new_openset, nfds);
+  	if (new_execset)
+  		free_fdset(new_execset, nfds);
+	kfree(fdt);
+	return NULL;
+}
+
+/*
+ * Expands the file descriptor table - it will allocate a new fdtable and
+ * both fd array and fdset. It is expected to be called with the
+ * files_lock held.
+ */
+static int expand_fdtable(struct files_struct *files, int nr)
+	__releases(files->file_lock)
+	__acquires(files->file_lock)
+{
+	int error = 0;
+	struct fdtable *fdt;
+	struct fdtable *nfdt = NULL;
+
+	spin_unlock(&files->file_lock);
+	nfdt = alloc_fdtable(nr);
+	if (!nfdt) {
+		error = -ENOMEM;
+		spin_lock(&files->file_lock);
 		goto out;
+	}
 
-	error = 0;
-	
-	/* Copy the existing tables and install the new pointers */
-	if (nfds > files->max_fdset) {
-		int i = files->max_fdset / (sizeof(unsigned long) * 8);
-		int count = (nfds - files->max_fdset) / 8;
-		
-		/* 
-		 * Don't copy the entire array if the current fdset is
-		 * not yet initialised.  
-		 */
-		if (i) {
-			memcpy (new_openset, files->open_fds, files->max_fdset/8);
-			memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
-			memset (&new_openset->fds_bits[i], 0, count);
-			memset (&new_execset->fds_bits[i], 0, count);
-		}
-		
-		nfds = xchg(&files->max_fdset, nfds);
-		new_openset = xchg(&files->open_fds, new_openset);
-		new_execset = xchg(&files->close_on_exec, new_execset);
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	/*
+	 * Check again since another task may have expanded the
+	 * fd table while we dropped the lock
+	 */
+	if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
+		copy_fdtable(nfdt, fdt);
+	} else {
+		/* Somebody expanded while we dropped file_lock */
 		spin_unlock(&files->file_lock);
-		free_fdset (new_openset, nfds);
-		free_fdset (new_execset, nfds);
+		__free_fdtable(nfdt);
 		spin_lock(&files->file_lock);
-		return 0;
-	} 
-	/* Somebody expanded the array while we slept ... */
-
+		goto out;
+	}
+	rcu_assign_pointer(files->fdt, nfdt);
+	free_fdtable(fdt);
 out:
-	spin_unlock(&files->file_lock);
-	if (new_openset)
-		free_fdset(new_openset, nfds);
-	if (new_execset)
-		free_fdset(new_execset, nfds);
-	spin_lock(&files->file_lock);
 	return error;
 }
 
@@ -237,18 +347,39 @@ out:
 int expand_files(struct files_struct *files, int nr)
 {
 	int err, expand = 0;
+	struct fdtable *fdt;
 
-	if (nr >= files->max_fdset) {
-		expand = 1;
-		if ((err = expand_fdset(files, nr)))
+	fdt = files_fdtable(files);
+	if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
+		if (fdt->max_fdset >= NR_OPEN ||
+			fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
+			err = -EMFILE;
 			goto out;
-	}
-	if (nr >= files->max_fds) {
+		}
 		expand = 1;
-		if ((err = expand_fd_array(files, nr)))
+		if ((err = expand_fdtable(files, nr)))
 			goto out;
 	}
 	err = expand;
 out:
 	return err;
 }
+
+static void __devinit fdtable_defer_list_init(int cpu)
+{
+	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
+	spin_lock_init(&fddef->lock);
+	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	init_timer(&fddef->timer);
+	fddef->timer.data = (unsigned long)fddef;
+	fddef->timer.function = fdtable_timer;
+	fddef->next = NULL;
+}
+
+void __init files_defer_init(void)
+{
+	int i;
+	/* Really early - can't use for_each_cpu */
+	for (i = 0; i < NR_CPUS; i++)
+		fdtable_defer_list_init(i);
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index 1d3de78e6bc..4dc20554654 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/eventpoll.h>
+#include <linux/rcupdate.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
 	spin_unlock_irqrestore(&filp_count_lock, flags);
 }
 
-static inline void file_free(struct file *f)
+static inline void file_free_rcu(struct rcu_head *head)
 {
+	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
 	kmem_cache_free(filp_cachep, f);
 }
 
+static inline void file_free(struct file *f)
+{
+	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
+}
+
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
@@ -88,8 +95,7 @@ struct file *get_empty_filp(void)
 	f->f_gid = current->fsgid;
 	rwlock_init(&f->f_owner.lock);
 	/* f->f_version: 0 */
-	INIT_LIST_HEAD(&f->f_list);
-	f->f_maxcount = INT_MAX;
+	INIT_LIST_HEAD(&f->f_u.fu_list);
 	return f;
 
 over:
@@ -111,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count))
+	if (rcuref_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -157,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
 	struct file *file;
 	struct files_struct *files = current->files;
 
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
 	file = fcheck_files(files, fd);
-	if (file)
-		get_file(file);
-	spin_unlock(&files->file_lock);
+	if (file) {
+		if (!rcuref_inc_lf(&file->f_count)) {
+			/* File object ref couldn't be taken */
+			rcu_read_unlock();
+			return NULL;
+		}
+	}
+	rcu_read_unlock();
+
 	return file;
 }
 
@@ -183,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 	if (likely((atomic_read(&files->count) == 1))) {
 		file = fcheck_files(files, fd);
 	} else {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			get_file(file);
-			*fput_needed = 1;
+			if (rcuref_inc_lf(&file->f_count))
+				*fput_needed = 1;
+			else
+				/* Didn't get the reference, someone's freed */
+				file = NULL;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 	}
+
 	return file;
 }
 
 
 void put_filp(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count)) {
+	if (rcuref_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
@@ -209,15 +225,15 @@ void file_move(struct file *file, struct list_head *list)
 	if (!list)
 		return;
 	file_list_lock();
-	list_move(&file->f_list, list);
+	list_move(&file->f_u.fu_list, list);
 	file_list_unlock();
 }
 
 void file_kill(struct file *file)
 {
-	if (!list_empty(&file->f_list)) {
+	if (!list_empty(&file->f_u.fu_list)) {
 		file_list_lock();
-		list_del_init(&file->f_list);
+		list_del_init(&file->f_u.fu_list);
 		file_list_unlock();
 	}
 }
@@ -229,7 +245,7 @@ int fs_may_remount_ro(struct super_block *sb)
 	/* Check that no files are currently opened for writing. */
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
-		struct file *file = list_entry(p, struct file, f_list);
+		struct file *file = list_entry(p, struct file, f_u.fu_list);
 		struct inode *inode = file->f_dentry->d_inode;
 
 		/* File with pending delete? */
@@ -258,4 +274,5 @@ void __init files_init(unsigned long mempages)
 	files_stat.max_files = n; 
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
+	files_defer_init();
 } 
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 44082bfdfec..9f1072836c8 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -12,6 +12,7 @@
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/sched.h>	/* for 'current' */
 #include <asm/uaccess.h>
 
 /*
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index ac677ab262b..d0401dc68d4 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -38,7 +38,7 @@
 #include "vxfs_inode.h"
 
 
-static int	vxfs_immed_follow_link(struct dentry *, struct nameidata *);
+static void *	vxfs_immed_follow_link(struct dentry *, struct nameidata *);
 
 static int	vxfs_immed_readpage(struct file *, struct page *);
 
@@ -72,12 +72,12 @@ struct address_space_operations vxfs_immed_aops = {
  * Returns:
  *   Zero on success, else a negative error code.
  */
-static int
+static void *
 vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
 {
 	struct vxfs_inode_info		*vip = VXFS_INO(dp->d_inode);
 	nd_set_link(np, vip->vii_immed.vi_immed);
-	return 0;
+	return NULL;
 }
 
 /**
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 27f66d3e8a0..6aa6fbe4f8e 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,7 +155,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 
 	sbp->s_flags |= MS_RDONLY;
 
-	infp = kcalloc(1, sizeof(*infp), GFP_KERNEL);
+	infp = kzalloc(sizeof(*infp), GFP_KERNEL);
 	if (!infp) {
 		printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
 		return -ENOMEM;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e94ab398b71..c27f8d4098b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -230,7 +230,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 			 * The inode is clean, unused
 			 */
 			list_move(&inode->i_list, &inode_unused);
-			inodes_stat.nr_unused++;
 		}
 	}
 	wake_up_inode(inode);
@@ -238,14 +237,20 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 }
 
 /*
- * Write out an inode's dirty pages.  Called under inode_lock.
+ * Write out an inode's dirty pages.  Called under inode_lock.  Either the
+ * caller has ref on the inode (either via __iget or via syscall against an fd)
+ * or the inode has I_WILL_FREE set (via generic_forget_inode)
  */
 static int
-__writeback_single_inode(struct inode *inode,
-			struct writeback_control *wbc)
+__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	wait_queue_head_t *wqh;
 
+	if (!atomic_read(&inode->i_count))
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+	else
+		WARN_ON(inode->i_state & I_WILL_FREE);
+
 	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) {
 		list_move(&inode->i_list, &inode->i_sb->s_dirty);
 		return 0;
@@ -259,11 +264,9 @@ __writeback_single_inode(struct inode *inode,
 
 		wqh = bit_waitqueue(&inode->i_state, __I_LOCK);
 		do {
-			__iget(inode);
 			spin_unlock(&inode_lock);
 			__wait_on_bit(wqh, &wq, inode_wait,
 							TASK_UNINTERRUPTIBLE);
-			iput(inode);
 			spin_lock(&inode_lock);
 		} while (inode->i_state & I_LOCK);
 	}
@@ -541,14 +544,15 @@ void sync_inodes(int wait)
 }
 
 /**
- *	write_inode_now	-	write an inode to disk
- *	@inode: inode to write to disk
- *	@sync: whether the write should be synchronous or not
+ * write_inode_now	-	write an inode to disk
+ * @inode: inode to write to disk
+ * @sync: whether the write should be synchronous or not
+ *
+ * This function commits an inode to disk immediately if it is dirty. This is
+ * primarily needed by knfsd.
  *
- *	This function commits an inode to disk immediately if it is
- *	dirty. This is primarily needed by knfsd.
+ * The caller must either have a ref on the inode or must have set I_WILL_FREE.
  */
- 
 int write_inode_now(struct inode *inode, int sync)
 {
 	int ret;
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 00000000000..c3e1f760cac
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE_FS) += fuse.o
+
+fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 00000000000..a6f90a6c754
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,877 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+
+static kmem_cache_t *fuse_req_cachep;
+
+static inline struct fuse_conn *fuse_get_conn(struct file *file)
+{
+	struct fuse_conn *fc;
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	if (fc && !fc->mounted)
+		fc = NULL;
+	spin_unlock(&fuse_lock);
+	return fc;
+}
+
+static inline void fuse_request_init(struct fuse_req *req)
+{
+	memset(req, 0, sizeof(*req));
+	INIT_LIST_HEAD(&req->list);
+	init_waitqueue_head(&req->waitq);
+	atomic_set(&req->count, 1);
+}
+
+struct fuse_req *fuse_request_alloc(void)
+{
+	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+	if (req)
+		fuse_request_init(req);
+	return req;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+	kmem_cache_free(fuse_req_cachep, req);
+}
+
+static inline void block_sigs(sigset_t *oldset)
+{
+	sigset_t mask;
+
+	siginitsetinv(&mask, sigmask(SIGKILL));
+	sigprocmask(SIG_BLOCK, &mask, oldset);
+}
+
+static inline void restore_sigs(sigset_t *oldset)
+{
+	sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
+void fuse_reset_request(struct fuse_req *req)
+{
+	int preallocated = req->preallocated;
+	BUG_ON(atomic_read(&req->count) != 1);
+	fuse_request_init(req);
+	req->preallocated = preallocated;
+}
+
+static void __fuse_get_request(struct fuse_req *req)
+{
+	atomic_inc(&req->count);
+}
+
+/* Must be called with > 1 refcount */
+static void __fuse_put_request(struct fuse_req *req)
+{
+	BUG_ON(atomic_read(&req->count) < 2);
+	atomic_dec(&req->count);
+}
+
+static struct fuse_req *do_get_request(struct fuse_conn *fc)
+{
+	struct fuse_req *req;
+
+	spin_lock(&fuse_lock);
+	BUG_ON(list_empty(&fc->unused_list));
+	req = list_entry(fc->unused_list.next, struct fuse_req, list);
+	list_del_init(&req->list);
+	spin_unlock(&fuse_lock);
+	fuse_request_init(req);
+	req->preallocated = 1;
+	req->in.h.uid = current->fsuid;
+	req->in.h.gid = current->fsgid;
+	req->in.h.pid = current->pid;
+	return req;
+}
+
+/* This can return NULL, but only in case it's interrupted by a SIGKILL */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc)
+{
+	int intr;
+	sigset_t oldset;
+
+	block_sigs(&oldset);
+	intr = down_interruptible(&fc->outstanding_sem);
+	restore_sigs(&oldset);
+	return intr ? NULL : do_get_request(fc);
+}
+
+static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	spin_lock(&fuse_lock);
+	if (req->preallocated)
+		list_add(&req->list, &fc->unused_list);
+	else
+		fuse_request_free(req);
+
+	/* If we are in debt decrease that first */
+	if (fc->outstanding_debt)
+		fc->outstanding_debt--;
+	else
+		up(&fc->outstanding_sem);
+	spin_unlock(&fuse_lock);
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	if (atomic_dec_and_test(&req->count))
+		fuse_putback_request(fc, req);
+}
+
+void fuse_release_background(struct fuse_req *req)
+{
+	iput(req->inode);
+	iput(req->inode2);
+	if (req->file)
+		fput(req->file);
+	spin_lock(&fuse_lock);
+	list_del(&req->bg_entry);
+	spin_unlock(&fuse_lock);
+}
+
+/*
+ * This function is called when a request is finished.  Either a reply
+ * has arrived or it was interrupted (and not yet sent) or some error
+ * occurred during communication with userspace, or the device file was
+ * closed.  It decreases the reference count for the request.  In case
+ * of a background request the reference to the stored objects are
+ * released.  The requester thread is woken up (if still waiting), and
+ * finally the request is either freed or put on the unused_list
+ *
+ * Called with fuse_lock, unlocks it
+ */
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int putback;
+	req->finished = 1;
+	putback = atomic_dec_and_test(&req->count);
+	spin_unlock(&fuse_lock);
+	if (req->background) {
+		down_read(&fc->sbput_sem);
+		if (fc->mounted)
+			fuse_release_background(req);
+		up_read(&fc->sbput_sem);
+	}
+	wake_up(&req->waitq);
+	if (req->in.h.opcode == FUSE_INIT) {
+		int i;
+
+		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
+			fc->conn_error = 1;
+
+		/* After INIT reply is received other requests can go
+		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
+		   up()s on outstanding_sem.  The last up() is done in
+		   fuse_putback_request() */
+		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+			up(&fc->outstanding_sem);
+	}
+	if (putback)
+		fuse_putback_request(fc, req);
+}
+
+/*
+ * Unfortunately request interruption not just solves the deadlock
+ * problem, it causes problems too.  These stem from the fact, that an
+ * interrupted request is continued to be processed in userspace,
+ * while all the locks and object references (inode and file) held
+ * during the operation are released.
+ *
+ * To release the locks is exactly why there's a need to interrupt the
+ * request, so there's not a lot that can be done about this, except
+ * introduce additional locking in userspace.
+ *
+ * More important is to keep inode and file references until userspace
+ * has replied, otherwise FORGET and RELEASE could be sent while the
+ * inode/file is still used by the filesystem.
+ *
+ * For this reason the concept of "background" request is introduced.
+ * An interrupted request is backgrounded if it has been already sent
+ * to userspace.  Backgrounding involves getting an extra reference to
+ * inode(s) or file used in the request, and adding the request to
+ * fc->background list.  When a reply is received for a background
+ * request, the object references are released, and the request is
+ * removed from the list.  If the filesystem is unmounted while there
+ * are still background requests, the list is walked and references
+ * are released as if a reply was received.
+ *
+ * There's one more use for a background request.  The RELEASE message is
+ * always sent as background, since it doesn't return an error or
+ * data.
+ */
+static void background_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->background = 1;
+	list_add(&req->bg_entry, &fc->background);
+	if (req->inode)
+		req->inode = igrab(req->inode);
+	if (req->inode2)
+		req->inode2 = igrab(req->inode2);
+	if (req->file)
+		get_file(req->file);
+}
+
+/* Called with fuse_lock held.  Releases, and then reacquires it. */
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+{
+	sigset_t oldset;
+
+	spin_unlock(&fuse_lock);
+	block_sigs(&oldset);
+	wait_event_interruptible(req->waitq, req->finished);
+	restore_sigs(&oldset);
+	spin_lock(&fuse_lock);
+	if (req->finished)
+		return;
+
+	req->out.h.error = -EINTR;
+	req->interrupted = 1;
+	if (req->locked) {
+		/* This is uninterruptible sleep, because data is
+		   being copied to/from the buffers of req.  During
+		   locked state, there mustn't be any filesystem
+		   operation (e.g. page fault), since that could lead
+		   to deadlock */
+		spin_unlock(&fuse_lock);
+		wait_event(req->waitq, !req->locked);
+		spin_lock(&fuse_lock);
+	}
+	if (!req->sent && !list_empty(&req->list)) {
+		list_del(&req->list);
+		__fuse_put_request(req);
+	} else if (!req->finished && req->sent)
+		background_request(fc, req);
+}
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+	unsigned nbytes = 0;
+	unsigned i;
+
+	for (i = 0; i < numargs; i++)
+		nbytes += args[i].size;
+
+	return nbytes;
+}
+
+static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	fc->reqctr++;
+	/* zero is special */
+	if (fc->reqctr == 0)
+		fc->reqctr = 1;
+	req->in.h.unique = fc->reqctr;
+	req->in.h.len = sizeof(struct fuse_in_header) +
+		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+	if (!req->preallocated) {
+		/* If request is not preallocated (either FORGET or
+		   RELEASE), then still decrease outstanding_sem, so
+		   user can't open infinite number of files while not
+		   processing the RELEASE requests.  However for
+		   efficiency do it without blocking, so if down()
+		   would block, just increase the debt instead */
+		if (down_trylock(&fc->outstanding_sem))
+			fc->outstanding_debt++;
+	}
+	list_add_tail(&req->list, &fc->pending);
+	wake_up(&fc->waitq);
+}
+
+/*
+ * This can only be interrupted by a SIGKILL
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->isreply = 1;
+	spin_lock(&fuse_lock);
+	if (!fc->connected)
+		req->out.h.error = -ENOTCONN;
+	else if (fc->conn_error)
+		req->out.h.error = -ECONNREFUSED;
+	else {
+		queue_request(fc, req);
+		/* acquire extra reference, since request is still needed
+		   after request_end() */
+		__fuse_get_request(req);
+
+		request_wait_answer(fc, req);
+	}
+	spin_unlock(&fuse_lock);
+}
+
+static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+{
+	spin_lock(&fuse_lock);
+	if (fc->connected) {
+		queue_request(fc, req);
+		spin_unlock(&fuse_lock);
+	} else {
+		req->out.h.error = -ENOTCONN;
+		request_end(fc, req);
+	}
+}
+
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->isreply = 0;
+	request_send_nowait(fc, req);
+}
+
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->isreply = 1;
+	spin_lock(&fuse_lock);
+	background_request(fc, req);
+	spin_unlock(&fuse_lock);
+	request_send_nowait(fc, req);
+}
+
+void fuse_send_init(struct fuse_conn *fc)
+{
+	/* This is called from fuse_read_super() so there's guaranteed
+	   to be a request available */
+	struct fuse_req *req = do_get_request(fc);
+	struct fuse_init_in_out *arg = &req->misc.init_in_out;
+	arg->major = FUSE_KERNEL_VERSION;
+	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	req->in.h.opcode = FUSE_INIT;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(*arg);
+	req->in.args[0].value = arg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(*arg);
+	req->out.args[0].value = arg;
+	request_send_background(fc, req);
+}
+
+/*
+ * Lock the request.  Up to the next unlock_request() there mustn't be
+ * anything that could cause a page-fault.  If the request was already
+ * interrupted bail out.
+ */
+static inline int lock_request(struct fuse_req *req)
+{
+	int err = 0;
+	if (req) {
+		spin_lock(&fuse_lock);
+		if (req->interrupted)
+			err = -ENOENT;
+		else
+			req->locked = 1;
+		spin_unlock(&fuse_lock);
+	}
+	return err;
+}
+
+/*
+ * Unlock request.  If it was interrupted during being locked, the
+ * requester thread is currently waiting for it to be unlocked, so
+ * wake it up.
+ */
+static inline void unlock_request(struct fuse_req *req)
+{
+	if (req) {
+		spin_lock(&fuse_lock);
+		req->locked = 0;
+		if (req->interrupted)
+			wake_up(&req->waitq);
+		spin_unlock(&fuse_lock);
+	}
+}
+
+struct fuse_copy_state {
+	int write;
+	struct fuse_req *req;
+	const struct iovec *iov;
+	unsigned long nr_segs;
+	unsigned long seglen;
+	unsigned long addr;
+	struct page *pg;
+	void *mapaddr;
+	void *buf;
+	unsigned len;
+};
+
+static void fuse_copy_init(struct fuse_copy_state *cs, int write,
+			   struct fuse_req *req, const struct iovec *iov,
+			   unsigned long nr_segs)
+{
+	memset(cs, 0, sizeof(*cs));
+	cs->write = write;
+	cs->req = req;
+	cs->iov = iov;
+	cs->nr_segs = nr_segs;
+}
+
+/* Unmap and put previous page of userspace buffer */
+static inline void fuse_copy_finish(struct fuse_copy_state *cs)
+{
+	if (cs->mapaddr) {
+		kunmap_atomic(cs->mapaddr, KM_USER0);
+		if (cs->write) {
+			flush_dcache_page(cs->pg);
+			set_page_dirty_lock(cs->pg);
+		}
+		put_page(cs->pg);
+		cs->mapaddr = NULL;
+	}
+}
+
+/*
+ * Get another pagefull of userspace buffer, and map it to kernel
+ * address space, and lock request
+ */
+static int fuse_copy_fill(struct fuse_copy_state *cs)
+{
+	unsigned long offset;
+	int err;
+
+	unlock_request(cs->req);
+	fuse_copy_finish(cs);
+	if (!cs->seglen) {
+		BUG_ON(!cs->nr_segs);
+		cs->seglen = cs->iov[0].iov_len;
+		cs->addr = (unsigned long) cs->iov[0].iov_base;
+		cs->iov ++;
+		cs->nr_segs --;
+	}
+	down_read(&current->mm->mmap_sem);
+	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
+			     &cs->pg, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (err < 0)
+		return err;
+	BUG_ON(err != 1);
+	offset = cs->addr % PAGE_SIZE;
+	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+	cs->buf = cs->mapaddr + offset;
+	cs->len = min(PAGE_SIZE - offset, cs->seglen);
+	cs->seglen -= cs->len;
+	cs->addr += cs->len;
+
+	return lock_request(cs->req);
+}
+
+/* Do as much copy to/from userspace buffer as we can */
+static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
+			       unsigned *size)
+{
+	unsigned ncpy = min(*size, cs->len);
+	if (val) {
+		if (cs->write)
+			memcpy(cs->buf, *val, ncpy);
+		else
+			memcpy(*val, cs->buf, ncpy);
+		*val += ncpy;
+	}
+	*size -= ncpy;
+	cs->len -= ncpy;
+	cs->buf += ncpy;
+	return ncpy;
+}
+
+/*
+ * Copy a page in the request to/from the userspace buffer.  Must be
+ * done atomically
+ */
+static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+				 unsigned offset, unsigned count, int zeroing)
+{
+	if (page && zeroing && count < PAGE_SIZE) {
+		void *mapaddr = kmap_atomic(page, KM_USER1);
+		memset(mapaddr, 0, PAGE_SIZE);
+		kunmap_atomic(mapaddr, KM_USER1);
+	}
+	while (count) {
+		int err;
+		if (!cs->len && (err = fuse_copy_fill(cs)))
+			return err;
+		if (page) {
+			void *mapaddr = kmap_atomic(page, KM_USER1);
+			void *buf = mapaddr + offset;
+			offset += fuse_copy_do(cs, &buf, &count);
+			kunmap_atomic(mapaddr, KM_USER1);
+		} else
+			offset += fuse_copy_do(cs, NULL, &count);
+	}
+	if (page && !cs->write)
+		flush_dcache_page(page);
+	return 0;
+}
+
+/* Copy pages in the request to/from userspace buffer */
+static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
+			   int zeroing)
+{
+	unsigned i;
+	struct fuse_req *req = cs->req;
+	unsigned offset = req->page_offset;
+	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
+
+	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+		struct page *page = req->pages[i];
+		int err = fuse_copy_page(cs, page, offset, count, zeroing);
+		if (err)
+			return err;
+
+		nbytes -= count;
+		count = min(nbytes, (unsigned) PAGE_SIZE);
+		offset = 0;
+	}
+	return 0;
+}
+
+/* Copy a single argument in the request to/from userspace buffer */
+static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
+{
+	while (size) {
+		int err;
+		if (!cs->len && (err = fuse_copy_fill(cs)))
+			return err;
+		fuse_copy_do(cs, &val, &size);
+	}
+	return 0;
+}
+
+/* Copy request arguments to/from userspace buffer */
+static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+			  unsigned argpages, struct fuse_arg *args,
+			  int zeroing)
+{
+	int err = 0;
+	unsigned i;
+
+	for (i = 0; !err && i < numargs; i++)  {
+		struct fuse_arg *arg = &args[i];
+		if (i == numargs - 1 && argpages)
+			err = fuse_copy_pages(cs, arg->size, zeroing);
+		else
+			err = fuse_copy_one(cs, arg->value, arg->size);
+	}
+	return err;
+}
+
+/* Wait until a request is available on the pending list */
+static void request_wait(struct fuse_conn *fc)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&fc->waitq, &wait);
+	while (fc->mounted && list_empty(&fc->pending)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (signal_pending(current))
+			break;
+
+		spin_unlock(&fuse_lock);
+		schedule();
+		spin_lock(&fuse_lock);
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&fc->waitq, &wait);
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer.  This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer.  If
+ * no reply is needed (FORGET) or request has been interrupted or
+ * there was an error during the copying then it's finished by calling
+ * request_end().  Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
+			      unsigned long nr_segs, loff_t *off)
+{
+	int err;
+	struct fuse_conn *fc;
+	struct fuse_req *req;
+	struct fuse_in *in;
+	struct fuse_copy_state cs;
+	unsigned reqsize;
+
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	err = -EPERM;
+	if (!fc)
+		goto err_unlock;
+	request_wait(fc);
+	err = -ENODEV;
+	if (!fc->mounted)
+		goto err_unlock;
+	err = -ERESTARTSYS;
+	if (list_empty(&fc->pending))
+		goto err_unlock;
+
+	req = list_entry(fc->pending.next, struct fuse_req, list);
+	list_del_init(&req->list);
+	spin_unlock(&fuse_lock);
+
+	in = &req->in;
+	reqsize = req->in.h.len;
+	fuse_copy_init(&cs, 1, req, iov, nr_segs);
+	err = -EINVAL;
+	if (iov_length(iov, nr_segs) >= reqsize) {
+		err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+		if (!err)
+			err = fuse_copy_args(&cs, in->numargs, in->argpages,
+					     (struct fuse_arg *) in->args, 0);
+	}
+	fuse_copy_finish(&cs);
+
+	spin_lock(&fuse_lock);
+	req->locked = 0;
+	if (!err && req->interrupted)
+		err = -ENOENT;
+	if (err) {
+		if (!req->interrupted)
+			req->out.h.error = -EIO;
+		request_end(fc, req);
+		return err;
+	}
+	if (!req->isreply)
+		request_end(fc, req);
+	else {
+		req->sent = 1;
+		list_add_tail(&req->list, &fc->processing);
+		spin_unlock(&fuse_lock);
+	}
+	return reqsize;
+
+ err_unlock:
+	spin_unlock(&fuse_lock);
+	return err;
+}
+
+static ssize_t fuse_dev_read(struct file *file, char __user *buf,
+			     size_t nbytes, loff_t *off)
+{
+	struct iovec iov;
+	iov.iov_len = nbytes;
+	iov.iov_base = buf;
+	return fuse_dev_readv(file, &iov, 1, off);
+}
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+{
+	struct list_head *entry;
+
+	list_for_each(entry, &fc->processing) {
+		struct fuse_req *req;
+		req = list_entry(entry, struct fuse_req, list);
+		if (req->in.h.unique == unique)
+			return req;
+	}
+	return NULL;
+}
+
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+			 unsigned nbytes)
+{
+	unsigned reqsize = sizeof(struct fuse_out_header);
+
+	if (out->h.error)
+		return nbytes != reqsize ? -EINVAL : 0;
+
+	reqsize += len_args(out->numargs, out->args);
+
+	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+		return -EINVAL;
+	else if (reqsize > nbytes) {
+		struct fuse_arg *lastarg = &out->args[out->numargs-1];
+		unsigned diffsize = reqsize - nbytes;
+		if (diffsize > lastarg->size)
+			return -EINVAL;
+		lastarg->size -= diffsize;
+	}
+	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
+			      out->page_zeroing);
+}
+
+/*
+ * Write a single reply to a request.  First the header is copied from
+ * the write buffer.  The request is then searched on the processing
+ * list by the unique ID found in the header.  If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t *off)
+{
+	int err;
+	unsigned nbytes = iov_length(iov, nr_segs);
+	struct fuse_req *req;
+	struct fuse_out_header oh;
+	struct fuse_copy_state cs;
+	struct fuse_conn *fc = fuse_get_conn(file);
+	if (!fc)
+		return -ENODEV;
+
+	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
+	if (nbytes < sizeof(struct fuse_out_header))
+		return -EINVAL;
+
+	err = fuse_copy_one(&cs, &oh, sizeof(oh));
+	if (err)
+		goto err_finish;
+	err = -EINVAL;
+	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
+	    oh.len != nbytes)
+		goto err_finish;
+
+	spin_lock(&fuse_lock);
+	req = request_find(fc, oh.unique);
+	err = -EINVAL;
+	if (!req)
+		goto err_unlock;
+
+	list_del_init(&req->list);
+	if (req->interrupted) {
+		request_end(fc, req);
+		fuse_copy_finish(&cs);
+		return -ENOENT;
+	}
+	req->out.h = oh;
+	req->locked = 1;
+	cs.req = req;
+	spin_unlock(&fuse_lock);
+
+	err = copy_out_args(&cs, &req->out, nbytes);
+	fuse_copy_finish(&cs);
+
+	spin_lock(&fuse_lock);
+	req->locked = 0;
+	if (!err) {
+		if (req->interrupted)
+			err = -ENOENT;
+	} else if (!req->interrupted)
+		req->out.h.error = -EIO;
+	request_end(fc, req);
+
+	return err ? err : nbytes;
+
+ err_unlock:
+	spin_unlock(&fuse_lock);
+ err_finish:
+	fuse_copy_finish(&cs);
+	return err;
+}
+
+static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
+			      size_t nbytes, loff_t *off)
+{
+	struct iovec iov;
+	iov.iov_len = nbytes;
+	iov.iov_base = (char __user *) buf;
+	return fuse_dev_writev(file, &iov, 1, off);
+}
+
+static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
+{
+	struct fuse_conn *fc = fuse_get_conn(file);
+	unsigned mask = POLLOUT | POLLWRNORM;
+
+	if (!fc)
+		return -ENODEV;
+
+	poll_wait(file, &fc->waitq, wait);
+
+	spin_lock(&fuse_lock);
+	if (!list_empty(&fc->pending))
+                mask |= POLLIN | POLLRDNORM;
+	spin_unlock(&fuse_lock);
+
+	return mask;
+}
+
+/* Abort all requests on the given list (pending or processing) */
+static void end_requests(struct fuse_conn *fc, struct list_head *head)
+{
+	while (!list_empty(head)) {
+		struct fuse_req *req;
+		req = list_entry(head->next, struct fuse_req, list);
+		list_del_init(&req->list);
+		req->out.h.error = -ECONNABORTED;
+		request_end(fc, req);
+		spin_lock(&fuse_lock);
+	}
+}
+
+static int fuse_dev_release(struct inode *inode, struct file *file)
+{
+	struct fuse_conn *fc;
+
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	if (fc) {
+		fc->connected = 0;
+		end_requests(fc, &fc->pending);
+		end_requests(fc, &fc->processing);
+		fuse_release_conn(fc);
+	}
+	spin_unlock(&fuse_lock);
+	return 0;
+}
+
+struct file_operations fuse_dev_operations = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= fuse_dev_read,
+	.readv		= fuse_dev_readv,
+	.write		= fuse_dev_write,
+	.writev		= fuse_dev_writev,
+	.poll		= fuse_dev_poll,
+	.release	= fuse_dev_release,
+};
+
+static struct miscdevice fuse_miscdevice = {
+	.minor = FUSE_MINOR,
+	.name  = "fuse",
+	.fops = &fuse_dev_operations,
+};
+
+int __init fuse_dev_init(void)
+{
+	int err = -ENOMEM;
+	fuse_req_cachep = kmem_cache_create("fuse_request",
+					    sizeof(struct fuse_req),
+					    0, 0, NULL, NULL);
+	if (!fuse_req_cachep)
+		goto out;
+
+	err = misc_register(&fuse_miscdevice);
+	if (err)
+		goto out_cache_clean;
+
+	return 0;
+
+ out_cache_clean:
+	kmem_cache_destroy(fuse_req_cachep);
+ out:
+	return err;
+}
+
+void fuse_dev_cleanup(void)
+{
+	misc_deregister(&fuse_miscdevice);
+	kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 00000000000..70dba721aca
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,989 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+static inline unsigned long time_to_jiffies(unsigned long sec,
+					    unsigned long nsec)
+{
+	struct timespec ts = {sec, nsec};
+	return jiffies + timespec_to_jiffies(&ts);
+}
+
+static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
+			     struct dentry *entry,
+			     struct fuse_entry_out *outarg)
+{
+	req->in.h.opcode = FUSE_LOOKUP;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(struct fuse_entry_out);
+	req->out.args[0].value = outarg;
+}
+
+static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+{
+	if (!entry->d_inode || is_bad_inode(entry->d_inode))
+		return 0;
+	else if (time_after(jiffies, entry->d_time)) {
+		int err;
+		struct fuse_entry_out outarg;
+		struct inode *inode = entry->d_inode;
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		struct fuse_conn *fc = get_fuse_conn(inode);
+		struct fuse_req *req = fuse_get_request(fc);
+		if (!req)
+			return 0;
+
+		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+		request_send(fc, req);
+		err = req->out.h.error;
+		if (!err) {
+			if (outarg.nodeid != get_node_id(inode)) {
+				fuse_send_forget(fc, req, outarg.nodeid, 1);
+				return 0;
+			}
+			fi->nlookup ++;
+		}
+		fuse_put_request(fc, req);
+		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+			return 0;
+
+		fuse_change_attributes(inode, &outarg.attr);
+		entry->d_time = time_to_jiffies(outarg.entry_valid,
+						outarg.entry_valid_nsec);
+		fi->i_time = time_to_jiffies(outarg.attr_valid,
+					     outarg.attr_valid_nsec);
+	}
+	return 1;
+}
+
+static struct dentry_operations fuse_dentry_operations = {
+	.d_revalidate	= fuse_dentry_revalidate,
+};
+
+static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
+			    struct inode **inodep)
+{
+	int err;
+	struct fuse_entry_out outarg;
+	struct inode *inode = NULL;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req;
+
+	if (entry->d_name.len > FUSE_NAME_MAX)
+		return -ENAMETOOLONG;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	fuse_lookup_init(req, dir, entry, &outarg);
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (!err && (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID))
+		err = -EIO;
+	if (!err) {
+		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+				  &outarg.attr);
+		if (!inode) {
+			fuse_send_forget(fc, req, outarg.nodeid, 1);
+			return -ENOMEM;
+		}
+	}
+	fuse_put_request(fc, req);
+	if (err && err != -ENOENT)
+		return err;
+
+	if (inode) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		entry->d_time =	time_to_jiffies(outarg.entry_valid,
+						outarg.entry_valid_nsec);
+		fi->i_time = time_to_jiffies(outarg.attr_valid,
+					     outarg.attr_valid_nsec);
+	}
+
+	entry->d_op = &fuse_dentry_operations;
+	*inodep = inode;
+	return 0;
+}
+
+void fuse_invalidate_attr(struct inode *inode)
+{
+	get_fuse_inode(inode)->i_time = jiffies - 1;
+}
+
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+	d_invalidate(entry);
+	entry->d_time = jiffies - 1;
+}
+
+static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+			    struct inode *dir, struct dentry *entry,
+			    int mode)
+{
+	struct fuse_entry_out outarg;
+	struct inode *inode;
+	struct fuse_inode *fi;
+	int err;
+
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (err) {
+		fuse_put_request(fc, req);
+		return err;
+	}
+	if (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID) {
+		fuse_put_request(fc, req);
+		return -EIO;
+	}
+	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+			  &outarg.attr);
+	if (!inode) {
+		fuse_send_forget(fc, req, outarg.nodeid, 1);
+		return -ENOMEM;
+	}
+	fuse_put_request(fc, req);
+
+	/* Don't allow userspace to do really stupid things... */
+	if ((inode->i_mode ^ mode) & S_IFMT) {
+		iput(inode);
+		return -EIO;
+	}
+
+	entry->d_time = time_to_jiffies(outarg.entry_valid,
+					outarg.entry_valid_nsec);
+
+	fi = get_fuse_inode(inode);
+	fi->i_time = time_to_jiffies(outarg.attr_valid,
+				     outarg.attr_valid_nsec);
+
+	d_instantiate(entry, inode);
+	fuse_invalidate_attr(dir);
+	return 0;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+		      dev_t rdev)
+{
+	struct fuse_mknod_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.mode = mode;
+	inarg.rdev = new_encode_dev(rdev);
+	req->in.h.opcode = FUSE_MKNOD;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = entry->d_name.len + 1;
+	req->in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, req, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+		       struct nameidata *nd)
+{
+	return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+{
+	struct fuse_mkdir_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.mode = mode;
+	req->in.h.opcode = FUSE_MKDIR;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = entry->d_name.len + 1;
+	req->in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, req, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+			const char *link)
+{
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	unsigned len = strlen(link) + 1;
+	struct fuse_req *req;
+
+	if (len > FUSE_SYMLINK_MAX)
+		return -ENAMETOOLONG;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.h.opcode = FUSE_SYMLINK;
+	req->in.numargs = 2;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	req->in.args[1].size = len;
+	req->in.args[1].value = link;
+	return create_new_entry(fc, req, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+	int err;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.h.opcode = FUSE_UNLINK;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		struct inode *inode = entry->d_inode;
+
+		/* Set nlink to zero so the inode can be cleared, if
+                   the inode does have more links this will be
+                   discovered at the next lookup/getattr */
+		inode->i_nlink = 0;
+		fuse_invalidate_attr(inode);
+		fuse_invalidate_attr(dir);
+	} else if (err == -EINTR)
+		fuse_invalidate_entry(entry);
+	return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+	int err;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.h.opcode = FUSE_RMDIR;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		entry->d_inode->i_nlink = 0;
+		fuse_invalidate_attr(dir);
+	} else if (err == -EINTR)
+		fuse_invalidate_entry(entry);
+	return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+		       struct inode *newdir, struct dentry *newent)
+{
+	int err;
+	struct fuse_rename_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(olddir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.newdir = get_node_id(newdir);
+	req->in.h.opcode = FUSE_RENAME;
+	req->in.h.nodeid = get_node_id(olddir);
+	req->inode = olddir;
+	req->inode2 = newdir;
+	req->in.numargs = 3;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = oldent->d_name.len + 1;
+	req->in.args[1].value = oldent->d_name.name;
+	req->in.args[2].size = newent->d_name.len + 1;
+	req->in.args[2].value = newent->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		fuse_invalidate_attr(olddir);
+		if (olddir != newdir)
+			fuse_invalidate_attr(newdir);
+	} else if (err == -EINTR) {
+		/* If request was interrupted, DEITY only knows if the
+		   rename actually took place.  If the invalidation
+		   fails (e.g. some process has CWD under the renamed
+		   directory), then there can be inconsistency between
+		   the dcache and the real filesystem.  Tough luck. */
+		fuse_invalidate_entry(oldent);
+		if (newent->d_inode)
+			fuse_invalidate_entry(newent);
+	}
+
+	return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+		     struct dentry *newent)
+{
+	int err;
+	struct fuse_link_in inarg;
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.oldnodeid = get_node_id(inode);
+	req->in.h.opcode = FUSE_LINK;
+	req->inode2 = inode;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = newent->d_name.len + 1;
+	req->in.args[1].value = newent->d_name.name;
+	err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+	/* Contrary to "normal" filesystems it can happen that link
+	   makes two "logical" inodes point to the same "physical"
+	   inode.  We invalidate the attributes of the old one, so it
+	   will reflect changes in the backing inode (link count,
+	   etc.)
+	*/
+	if (!err || err == -EINTR)
+		fuse_invalidate_attr(inode);
+	return err;
+}
+
+int fuse_do_getattr(struct inode *inode)
+{
+	int err;
+	struct fuse_attr_out arg;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.h.opcode = FUSE_GETATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(arg);
+	req->out.args[0].value = &arg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+			make_bad_inode(inode);
+			err = -EIO;
+		} else {
+			struct fuse_inode *fi = get_fuse_inode(inode);
+			fuse_change_attributes(inode, &arg.attr);
+			fi->i_time = time_to_jiffies(arg.attr_valid,
+						     arg.attr_valid_nsec);
+		}
+	}
+	return err;
+}
+
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the requester process.  This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways.  For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege.  This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+{
+	if (fc->flags & FUSE_ALLOW_OTHER)
+		return 1;
+
+	if (task->euid == fc->user_id &&
+	    task->suid == fc->user_id &&
+	    task->uid == fc->user_id &&
+	    task->egid == fc->group_id &&
+	    task->sgid == fc->group_id &&
+	    task->gid == fc->group_id)
+		return 1;
+
+	return 0;
+}
+
+static int fuse_revalidate(struct dentry *entry)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+	if (get_node_id(inode) != FUSE_ROOT_ID &&
+	    time_before_eq(jiffies, fi->i_time))
+		return 0;
+
+	return fuse_do_getattr(inode);
+}
+
+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+	else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+		int err = generic_permission(inode, mask, NULL);
+
+		/* If permission is denied, try to refresh file
+		   attributes.  This is also needed, because the root
+		   node will at first have no permissions */
+		if (err == -EACCES) {
+		 	err = fuse_do_getattr(inode);
+			if (!err)
+				err = generic_permission(inode, mask, NULL);
+		}
+
+		/* FIXME: Need some mechanism to revoke permissions:
+		   currently if the filesystem suddenly changes the
+		   file mode, we will not be informed about it, and
+		   continue to allow access to the file/directory.
+
+		   This is actually not so grave, since the user can
+		   simply keep access to the file/directory anyway by
+		   keeping it open... */
+
+		return err;
+	} else {
+		int mode = inode->i_mode;
+		if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
+                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+                        return -EROFS;
+		if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
+			return -EACCES;
+		return 0;
+	}
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+			 void *dstbuf, filldir_t filldir)
+{
+	while (nbytes >= FUSE_NAME_OFFSET) {
+		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+		size_t reclen = FUSE_DIRENT_SIZE(dirent);
+		int over;
+		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+			return -EIO;
+		if (reclen > nbytes)
+			break;
+
+		over = filldir(dstbuf, dirent->name, dirent->namelen,
+			       file->f_pos, dirent->ino, dirent->type);
+		if (over)
+			break;
+
+		buf += reclen;
+		nbytes -= reclen;
+		file->f_pos = dirent->off;
+	}
+
+	return 0;
+}
+
+static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
+				       struct inode *inode, loff_t pos,
+				       size_t count)
+{
+	return fuse_send_read_common(req, file, inode, pos, count, 1);
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+	int err;
+	size_t nbytes;
+	struct page *page;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		fuse_put_request(fc, req);
+		return -ENOMEM;
+	}
+	req->num_pages = 1;
+	req->pages[0] = page;
+	nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err)
+		err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+				    filldir);
+
+	__free_page(page);
+	fuse_invalidate_attr(inode); /* atime changed */
+	return err;
+}
+
+static char *read_link(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	char *link;
+
+	if (!req)
+		return ERR_PTR(-EINTR);
+
+	link = (char *) __get_free_page(GFP_KERNEL);
+	if (!link) {
+		link = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	req->in.h.opcode = FUSE_READLINK;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->out.argvar = 1;
+	req->out.numargs = 1;
+	req->out.args[0].size = PAGE_SIZE - 1;
+	req->out.args[0].value = link;
+	request_send(fc, req);
+	if (req->out.h.error) {
+		free_page((unsigned long) link);
+		link = ERR_PTR(req->out.h.error);
+	} else
+		link[req->out.args[0].size] = '\0';
+ out:
+	fuse_put_request(fc, req);
+	fuse_invalidate_attr(inode); /* atime changed */
+	return link;
+}
+
+static void free_link(char *link)
+{
+	if (!IS_ERR(link))
+		free_page((unsigned long) link);
+}
+
+static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	nd_set_link(nd, read_link(dentry));
+	return NULL;
+}
+
+static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+	free_link(nd_get_link(nd));
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+	return fuse_open_common(inode, file, 1);
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+	return fuse_release_common(inode, file, 1);
+}
+
+static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+{
+	/* nfsd can call this with no file */
+	return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+}
+
+static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
+{
+	unsigned ivalid = iattr->ia_valid;
+	unsigned fvalid = 0;
+
+	memset(fattr, 0, sizeof(*fattr));
+
+	if (ivalid & ATTR_MODE)
+		fvalid |= FATTR_MODE,   fattr->mode = iattr->ia_mode;
+	if (ivalid & ATTR_UID)
+		fvalid |= FATTR_UID,    fattr->uid = iattr->ia_uid;
+	if (ivalid & ATTR_GID)
+		fvalid |= FATTR_GID,    fattr->gid = iattr->ia_gid;
+	if (ivalid & ATTR_SIZE)
+		fvalid |= FATTR_SIZE,   fattr->size = iattr->ia_size;
+	/* You can only _set_ these together (they may change by themselves) */
+	if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
+		fvalid |= FATTR_ATIME | FATTR_MTIME;
+		fattr->atime = iattr->ia_atime.tv_sec;
+		fattr->mtime = iattr->ia_mtime.tv_sec;
+	}
+
+	return fvalid;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_req *req;
+	struct fuse_setattr_in inarg;
+	struct fuse_attr_out outarg;
+	int err;
+	int is_truncate = 0;
+
+	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+		err = inode_change_ok(inode, attr);
+		if (err)
+			return err;
+	}
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		unsigned long limit;
+		is_truncate = 1;
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+	}
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.valid = iattr_to_fattr(attr, &inarg.attr);
+	req->in.h.opcode = FUSE_SETATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+			make_bad_inode(inode);
+			err = -EIO;
+		} else {
+			if (is_truncate) {
+				loff_t origsize = i_size_read(inode);
+				i_size_write(inode, outarg.attr.size);
+				if (origsize > outarg.attr.size)
+					vmtruncate(inode, outarg.attr.size);
+			}
+			fuse_change_attributes(inode, &outarg.attr);
+			fi->i_time = time_to_jiffies(outarg.attr_valid,
+						     outarg.attr_valid_nsec);
+		}
+	} else if (err == -EINTR)
+		fuse_invalidate_attr(inode);
+
+	return err;
+}
+
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+			struct kstat *stat)
+{
+	struct inode *inode = entry->d_inode;
+	int err = fuse_revalidate(entry);
+	if (!err)
+		generic_fillattr(inode, stat);
+
+	return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+				  struct nameidata *nd)
+{
+	struct inode *inode;
+	int err = fuse_lookup_iget(dir, entry, &inode);
+	if (err)
+		return ERR_PTR(err);
+	if (inode && S_ISDIR(inode->i_mode)) {
+		/* Don't allow creating an alias to a directory  */
+		struct dentry *alias = d_find_alias(inode);
+		if (alias) {
+			dput(alias);
+			iput(inode);
+			return ERR_PTR(-EIO);
+		}
+	}
+	d_add(entry, inode);
+	return NULL;
+}
+
+static int fuse_setxattr(struct dentry *entry, const char *name,
+			 const void *value, size_t size, int flags)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_setxattr_in inarg;
+	int err;
+
+	if (size > FUSE_XATTR_SIZE_MAX)
+		return -E2BIG;
+
+	if (fc->no_setxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	inarg.flags = flags;
+	req->in.h.opcode = FUSE_SETXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 3;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = strlen(name) + 1;
+	req->in.args[1].value = name;
+	req->in.args[2].size = size;
+	req->in.args[2].value = value;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_setxattr = 1;
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
+			     void *value, size_t size)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_getxattr_in inarg;
+	struct fuse_getxattr_out outarg;
+	ssize_t ret;
+
+	if (fc->no_getxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	req->in.h.opcode = FUSE_GETXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = strlen(name) + 1;
+	req->in.args[1].value = name;
+	/* This is really two different operations rolled into one */
+	req->out.numargs = 1;
+	if (size) {
+		req->out.argvar = 1;
+		req->out.args[0].size = size;
+		req->out.args[0].value = value;
+	} else {
+		req->out.args[0].size = sizeof(outarg);
+		req->out.args[0].value = &outarg;
+	}
+	request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret)
+		ret = size ? req->out.args[0].size : outarg.size;
+	else {
+		if (ret == -ENOSYS) {
+			fc->no_getxattr = 1;
+			ret = -EOPNOTSUPP;
+		}
+	}
+	fuse_put_request(fc, req);
+	return ret;
+}
+
+static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_getxattr_in inarg;
+	struct fuse_getxattr_out outarg;
+	ssize_t ret;
+
+	if (fc->no_listxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	req->in.h.opcode = FUSE_LISTXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	/* This is really two different operations rolled into one */
+	req->out.numargs = 1;
+	if (size) {
+		req->out.argvar = 1;
+		req->out.args[0].size = size;
+		req->out.args[0].value = list;
+	} else {
+		req->out.args[0].size = sizeof(outarg);
+		req->out.args[0].value = &outarg;
+	}
+	request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret)
+		ret = size ? req->out.args[0].size : outarg.size;
+	else {
+		if (ret == -ENOSYS) {
+			fc->no_listxattr = 1;
+			ret = -EOPNOTSUPP;
+		}
+	}
+	fuse_put_request(fc, req);
+	return ret;
+}
+
+static int fuse_removexattr(struct dentry *entry, const char *name)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	int err;
+
+	if (fc->no_removexattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.h.opcode = FUSE_REMOVEXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = strlen(name) + 1;
+	req->in.args[0].value = name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_removexattr = 1;
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static struct inode_operations fuse_dir_inode_operations = {
+	.lookup		= fuse_lookup,
+	.mkdir		= fuse_mkdir,
+	.symlink	= fuse_symlink,
+	.unlink		= fuse_unlink,
+	.rmdir		= fuse_rmdir,
+	.rename		= fuse_rename,
+	.link		= fuse_link,
+	.setattr	= fuse_setattr,
+	.create		= fuse_create,
+	.mknod		= fuse_mknod,
+	.permission	= fuse_permission,
+	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
+};
+
+static struct file_operations fuse_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= fuse_readdir,
+	.open		= fuse_dir_open,
+	.release	= fuse_dir_release,
+	.fsync		= fuse_dir_fsync,
+};
+
+static struct inode_operations fuse_common_inode_operations = {
+	.setattr	= fuse_setattr,
+	.permission	= fuse_permission,
+	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
+};
+
+static struct inode_operations fuse_symlink_inode_operations = {
+	.setattr	= fuse_setattr,
+	.follow_link	= fuse_follow_link,
+	.put_link	= fuse_put_link,
+	.readlink	= generic_readlink,
+	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
+};
+
+void fuse_init_common(struct inode *inode)
+{
+	inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse_init_dir(struct inode *inode)
+{
+	inode->i_op = &fuse_dir_inode_operations;
+	inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse_init_symlink(struct inode *inode)
+{
+	inode->i_op = &fuse_symlink_inode_operations;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 00000000000..657ab11c173
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,559 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static struct file_operations fuse_direct_io_file_operations;
+
+int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_open_in inarg;
+	struct fuse_open_out outarg;
+	struct fuse_file *ff;
+	int err;
+
+	/* VFS checks this, but only _after_ ->open() */
+	if (file->f_flags & O_DIRECT)
+		return -EINVAL;
+
+	err = generic_file_open(inode, file);
+	if (err)
+		return err;
+
+	/* If opening the root node, no lookup has been performed on
+	   it, so the attributes must be refreshed */
+	if (get_node_id(inode) == FUSE_ROOT_ID) {
+		int err = fuse_do_getattr(inode);
+		if (err)
+		 	return err;
+	}
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	err = -ENOMEM;
+	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+	if (!ff)
+		goto out_put_request;
+
+	ff->release_req = fuse_request_alloc();
+	if (!ff->release_req) {
+		kfree(ff);
+		goto out_put_request;
+	}
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+	req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (err) {
+		fuse_request_free(ff->release_req);
+		kfree(ff);
+	} else {
+		if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
+			file->f_op = &fuse_direct_io_file_operations;
+		if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
+			invalidate_inode_pages(inode->i_mapping);
+		ff->fh = outarg.fh;
+		file->private_data = ff;
+	}
+
+ out_put_request:
+	fuse_put_request(fc, req);
+	return err;
+}
+
+int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req = ff->release_req;
+	struct fuse_release_in *inarg = &req->misc.release_in;
+
+	inarg->fh = ff->fh;
+	inarg->flags = file->f_flags & ~O_EXCL;
+	req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_release_in);
+	req->in.args[0].value = inarg;
+	request_send_background(fc, req);
+	kfree(ff);
+
+	/* Return value is ignored by VFS */
+	return 0;
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+	return fuse_open_common(inode, file, 0);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+	return fuse_release_common(inode, file, 0);
+}
+
+static int fuse_flush(struct file *file)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req;
+	struct fuse_flush_in inarg;
+	int err;
+
+	if (fc->no_flush)
+		return 0;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	req->in.h.opcode = FUSE_FLUSH;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_flush = 1;
+		err = 0;
+	}
+	return err;
+}
+
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+		      int isdir)
+{
+	struct inode *inode = de->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req;
+	struct fuse_fsync_in inarg;
+	int err;
+
+	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+		return 0;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	inarg.fsync_flags = datasync ? 1 : 0;
+	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		if (isdir)
+			fc->no_fsyncdir = 1;
+		else
+			fc->no_fsync = 1;
+		err = 0;
+	}
+	return err;
+}
+
+static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+{
+	return fuse_fsync_common(file, de, datasync, 0);
+}
+
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+			     struct inode *inode, loff_t pos, size_t count,
+			     int isdir)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_read_in inarg;
+
+	memset(&inarg, 0, sizeof(struct fuse_read_in));
+	inarg.fh = ff->fh;
+	inarg.offset = pos;
+	inarg.size = count;
+	req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_read_in);
+	req->in.args[0].value = &inarg;
+	req->out.argpages = 1;
+	req->out.argvar = 1;
+	req->out.numargs = 1;
+	req->out.args[0].size = count;
+	request_send(fc, req);
+	return req->out.args[0].size;
+}
+
+static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
+				    struct inode *inode, loff_t pos,
+				    size_t count)
+{
+	return fuse_send_read_common(req, file, inode, pos, count, 0);
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
+	struct fuse_req *req = fuse_get_request(fc);
+	int err = -EINTR;
+	if (!req)
+		goto out;
+
+	req->out.page_zeroing = 1;
+	req->num_pages = 1;
+	req->pages[0] = page;
+	fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err)
+		SetPageUptodate(page);
+	fuse_invalidate_attr(inode); /* atime changed */
+ out:
+	unlock_page(page);
+	return err;
+}
+
+static int fuse_send_readpages(struct fuse_req *req, struct file *file,
+			       struct inode *inode)
+{
+	loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
+	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+	unsigned i;
+	req->out.page_zeroing = 1;
+	fuse_send_read(req, file, inode, pos, count);
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		if (!req->out.h.error)
+			SetPageUptodate(page);
+		unlock_page(page);
+	}
+	return req->out.h.error;
+}
+
+struct fuse_readpages_data {
+	struct fuse_req *req;
+	struct file *file;
+	struct inode *inode;
+};
+
+static int fuse_readpages_fill(void *_data, struct page *page)
+{
+	struct fuse_readpages_data *data = _data;
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (req->num_pages &&
+	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+		int err = fuse_send_readpages(req, data->file, inode);
+		if (err) {
+			unlock_page(page);
+			return err;
+		}
+		fuse_reset_request(req);
+	}
+	req->pages[req->num_pages] = page;
+	req->num_pages ++;
+	return 0;
+}
+
+static int fuse_readpages(struct file *file, struct address_space *mapping,
+			  struct list_head *pages, unsigned nr_pages)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_readpages_data data;
+	int err;
+	data.file = file;
+	data.inode = inode;
+	data.req = fuse_get_request(fc);
+	if (!data.req)
+		return -EINTR;
+
+	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+	if (!err && data.req->num_pages)
+		err = fuse_send_readpages(data.req, file, inode);
+	fuse_put_request(fc, data.req);
+	fuse_invalidate_attr(inode); /* atime changed */
+	return err;
+}
+
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+			      struct inode *inode, loff_t pos, size_t count)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_write_in inarg;
+	struct fuse_write_out outarg;
+
+	memset(&inarg, 0, sizeof(struct fuse_write_in));
+	inarg.fh = ff->fh;
+	inarg.offset = pos;
+	inarg.size = count;
+	req->in.h.opcode = FUSE_WRITE;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.argpages = 1;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(struct fuse_write_in);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = count;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(struct fuse_write_out);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	return outarg.size;
+}
+
+static int fuse_prepare_write(struct file *file, struct page *page,
+			      unsigned offset, unsigned to)
+{
+	/* No op */
+	return 0;
+}
+
+static int fuse_commit_write(struct file *file, struct page *page,
+			     unsigned offset, unsigned to)
+{
+	int err;
+	size_t nres;
+	unsigned count = to - offset;
+	struct inode *inode = page->mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->num_pages = 1;
+	req->pages[0] = page;
+	req->page_offset = offset;
+	nres = fuse_send_write(req, file, inode, pos, count);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err && nres != count)
+		err = -EIO;
+	if (!err) {
+		pos += count;
+		if (pos > i_size_read(inode))
+			i_size_write(inode, pos);
+
+		if (offset == 0 && to == PAGE_CACHE_SIZE) {
+			clear_page_dirty(page);
+			SetPageUptodate(page);
+		}
+	}
+	fuse_invalidate_attr(inode);
+	return err;
+}
+
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+	unsigned i;
+
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		if (write)
+			set_page_dirty_lock(page);
+		put_page(page);
+	}
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+			       unsigned nbytes, int write)
+{
+	unsigned long user_addr = (unsigned long) buf;
+	unsigned offset = user_addr & ~PAGE_MASK;
+	int npages;
+
+	/* This doesn't work with nfsd */
+	if (!current->mm)
+		return -EPERM;
+
+	nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+	down_read(&current->mm->mmap_sem);
+	npages = get_user_pages(current, current->mm, user_addr, npages, write,
+				0, req->pages, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (npages < 0)
+		return npages;
+
+	req->num_pages = npages;
+	req->page_offset = offset;
+	return 0;
+}
+
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos, int write)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	size_t nmax = write ? fc->max_write : fc->max_read;
+	loff_t pos = *ppos;
+	ssize_t res = 0;
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	while (count) {
+		size_t tmp;
+		size_t nres;
+		size_t nbytes = min(count, nmax);
+		int err = fuse_get_user_pages(req, buf, nbytes, !write);
+		if (err) {
+			res = err;
+			break;
+		}
+		tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+		nbytes = min(nbytes, tmp);
+		if (write)
+			nres = fuse_send_write(req, file, inode, pos, nbytes);
+		else
+			nres = fuse_send_read(req, file, inode, pos, nbytes);
+		fuse_release_user_pages(req, !write);
+		if (req->out.h.error) {
+			if (!res)
+				res = req->out.h.error;
+			break;
+		} else if (nres > nbytes) {
+			res = -EIO;
+			break;
+		}
+		count -= nres;
+		res += nres;
+		pos += nres;
+		buf += nres;
+		if (nres != nbytes)
+			break;
+		if (count)
+			fuse_reset_request(req);
+	}
+	fuse_put_request(fc, req);
+	if (res > 0) {
+		if (write && pos > i_size_read(inode))
+			i_size_write(inode, pos);
+		*ppos = pos;
+	}
+	fuse_invalidate_attr(inode);
+
+	return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+				     size_t count, loff_t *ppos)
+{
+	return fuse_direct_io(file, buf, count, ppos, 0);
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	ssize_t res;
+	/* Don't allow parallel writes to the same file */
+	down(&inode->i_sem);
+	res = fuse_direct_io(file, buf, count, ppos, 1);
+	up(&inode->i_sem);
+	return res;
+}
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & VM_SHARED)) {
+		if ((vma->vm_flags & VM_WRITE))
+			return -ENODEV;
+		else
+			vma->vm_flags &= ~VM_MAYWRITE;
+	}
+	return generic_file_mmap(file, vma);
+}
+
+static int fuse_set_page_dirty(struct page *page)
+{
+	printk("fuse_set_page_dirty: should not happen\n");
+	dump_stack();
+	return 0;
+}
+
+static struct file_operations fuse_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+	.mmap		= fuse_file_mmap,
+	.open		= fuse_open,
+	.flush		= fuse_flush,
+	.release	= fuse_release,
+	.fsync		= fuse_fsync,
+	.sendfile	= generic_file_sendfile,
+};
+
+static struct file_operations fuse_direct_io_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= fuse_direct_read,
+	.write		= fuse_direct_write,
+	.open		= fuse_open,
+	.flush		= fuse_flush,
+	.release	= fuse_release,
+	.fsync		= fuse_fsync,
+	/* no mmap and sendfile */
+};
+
+static struct address_space_operations fuse_file_aops  = {
+	.readpage	= fuse_readpage,
+	.prepare_write	= fuse_prepare_write,
+	.commit_write	= fuse_commit_write,
+	.readpages	= fuse_readpages,
+	.set_page_dirty	= fuse_set_page_dirty,
+};
+
+void fuse_init_file_inode(struct inode *inode)
+{
+	inode->i_fop = &fuse_file_operations;
+	inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 00000000000..5cb456f572c
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,451 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include <linux/fuse.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <asm/semaphore.h>
+
+/** Max number of pages that can be used in a single read request */
+#define FUSE_MAX_PAGES_PER_REQ 32
+
+/** If more requests are outstanding, then the operation will block */
+#define FUSE_MAX_OUTSTANDING 10
+
+/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
+    module will check permissions based on the file mode.  Otherwise no
+    permission checking is done in the kernel */
+#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
+
+/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
+    doing the mount will be allowed to access the filesystem */
+#define FUSE_ALLOW_OTHER         (1 << 1)
+
+
+/** FUSE inode */
+struct fuse_inode {
+	/** Inode data */
+	struct inode inode;
+
+	/** Unique ID, which identifies the inode between userspace
+	 * and kernel */
+	u64 nodeid;
+
+	/** Number of lookups on this inode */
+	u64 nlookup;
+
+	/** The request used for sending the FORGET message */
+	struct fuse_req *forget_req;
+
+	/** Time in jiffies until the file attributes are valid */
+	unsigned long i_time;
+};
+
+/** FUSE specific file data */
+struct fuse_file {
+	/** Request reserved for flush and release */
+	struct fuse_req *release_req;
+
+	/** File handle used by userspace */
+	u64 fh;
+};
+
+/** One input argument of a request */
+struct fuse_in_arg {
+	unsigned size;
+	const void *value;
+};
+
+/** The request input */
+struct fuse_in {
+	/** The request header */
+	struct fuse_in_header h;
+
+	/** True if the data for the last argument is in req->pages */
+	unsigned argpages:1;
+
+	/** Number of arguments */
+	unsigned numargs;
+
+	/** Array of arguments */
+	struct fuse_in_arg args[3];
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+	unsigned size;
+	void *value;
+};
+
+/** The request output */
+struct fuse_out {
+	/** Header returned from userspace */
+	struct fuse_out_header h;
+
+	/** Last argument is variable length (can be shorter than
+	    arg->size) */
+	unsigned argvar:1;
+
+	/** Last argument is a list of pages to copy data to */
+	unsigned argpages:1;
+
+	/** Zero partially or not copied pages */
+	unsigned page_zeroing:1;
+
+	/** Number or arguments */
+	unsigned numargs;
+
+	/** Array of arguments */
+	struct fuse_arg args[3];
+};
+
+struct fuse_req;
+struct fuse_conn;
+
+/**
+ * A request to the client
+ */
+struct fuse_req {
+	/** This can be on either unused_list, pending or processing
+	    lists in fuse_conn */
+	struct list_head list;
+
+	/** Entry on the background list */
+	struct list_head bg_entry;
+
+	/** refcount */
+	atomic_t count;
+
+	/** True if the request has reply */
+	unsigned isreply:1;
+
+	/** The request is preallocated */
+	unsigned preallocated:1;
+
+	/** The request was interrupted */
+	unsigned interrupted:1;
+
+	/** Request is sent in the background */
+	unsigned background:1;
+
+	/** Data is being copied to/from the request */
+	unsigned locked:1;
+
+	/** Request has been sent to userspace */
+	unsigned sent:1;
+
+	/** The request is finished */
+	unsigned finished:1;
+
+	/** The request input */
+	struct fuse_in in;
+
+	/** The request output */
+	struct fuse_out out;
+
+	/** Used to wake up the task waiting for completion of request*/
+	wait_queue_head_t waitq;
+
+	/** Data for asynchronous requests */
+	union {
+		struct fuse_forget_in forget_in;
+		struct fuse_release_in release_in;
+		struct fuse_init_in_out init_in_out;
+	} misc;
+
+	/** page vector */
+	struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+
+	/** number of pages in vector */
+	unsigned num_pages;
+
+	/** offset of data on first page */
+	unsigned page_offset;
+
+	/** Inode used in the request */
+	struct inode *inode;
+
+	/** Second inode used in the request (or NULL) */
+	struct inode *inode2;
+
+	/** File used in the request (or NULL) */
+	struct file *file;
+};
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+	/** Reference count */
+	int count;
+
+	/** The user id for this mount */
+	uid_t user_id;
+
+	/** The group id for this mount */
+	gid_t group_id;
+
+	/** The fuse mount flags for this mount */
+	unsigned flags;
+
+	/** Maximum read size */
+	unsigned max_read;
+
+	/** Maximum write size */
+	unsigned max_write;
+
+	/** Readers of the connection are waiting on this */
+	wait_queue_head_t waitq;
+
+	/** The list of pending requests */
+	struct list_head pending;
+
+	/** The list of requests being processed */
+	struct list_head processing;
+
+	/** Requests put in the background (RELEASE or any other
+	    interrupted request) */
+	struct list_head background;
+
+	/** Controls the maximum number of outstanding requests */
+	struct semaphore outstanding_sem;
+
+	/** This counts the number of outstanding requests if
+	    outstanding_sem would go negative */
+	unsigned outstanding_debt;
+
+	/** RW semaphore for exclusion with fuse_put_super() */
+	struct rw_semaphore sbput_sem;
+
+	/** The list of unused requests */
+	struct list_head unused_list;
+
+	/** The next unique request id */
+	u64 reqctr;
+
+	/** Mount is active */
+	unsigned mounted : 1;
+
+	/** Connection established */
+	unsigned connected : 1;
+
+	/** Connection failed (version mismatch) */
+	unsigned conn_error : 1;
+
+	/** Is fsync not implemented by fs? */
+	unsigned no_fsync : 1;
+
+	/** Is fsyncdir not implemented by fs? */
+	unsigned no_fsyncdir : 1;
+
+	/** Is flush not implemented by fs? */
+	unsigned no_flush : 1;
+
+	/** Is setxattr not implemented by fs? */
+	unsigned no_setxattr : 1;
+
+	/** Is getxattr not implemented by fs? */
+	unsigned no_getxattr : 1;
+
+	/** Is listxattr not implemented by fs? */
+	unsigned no_listxattr : 1;
+
+	/** Is removexattr not implemented by fs? */
+	unsigned no_removexattr : 1;
+
+	/** Backing dev info */
+	struct backing_dev_info bdi;
+};
+
+static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
+{
+	return (struct fuse_conn **) &sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+	return *get_fuse_conn_super_p(sb);
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+	return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+	return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+	return get_fuse_inode(inode)->nodeid;
+}
+
+/** Device operations */
+extern struct file_operations fuse_dev_operations;
+
+/**
+ * This is the single global spinlock which protects FUSE's structures
+ *
+ * The following data is protected by this lock:
+ *
+ *  - the private_data field of the device file
+ *  - the s_fs_info field of the super block
+ *  - unused_list, pending, processing lists in fuse_conn
+ *  - background list in fuse_conn
+ *  - the unique request ID counter reqctr in fuse_conn
+ *  - the sb (super_block) field in fuse_conn
+ *  - the file (device file) field in fuse_conn
+ */
+extern spinlock_t fuse_lock;
+
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+			int generation, struct fuse_attr *attr);
+
+/**
+ * Send FORGET command
+ */
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+		      unsigned long nodeid, u64 nlookup);
+
+/**
+ * Send READ or READDIR request
+ */
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+			     struct inode *inode, loff_t pos, size_t count,
+			     int isdir);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+		      int isdir);
+
+/**
+ * Initialize file operations on a regular file
+ */
+void fuse_init_file_inode(struct inode *inode);
+
+/**
+ * Initialize inode operations on regular files and special files
+ */
+void fuse_init_common(struct inode *inode);
+
+/**
+ * Initialize inode and file operations on a directory
+ */
+void fuse_init_dir(struct inode *inode);
+
+/**
+ * Initialize inode operations on a symlink
+ */
+void fuse_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+
+/**
+ * Check if the connection can be released, and if yes, then free the
+ * connection structure
+ */
+void fuse_release_conn(struct fuse_conn *fc);
+
+/**
+ * Initialize the client device
+ */
+int fuse_dev_init(void);
+
+/**
+ * Cleanup the client device
+ */
+void fuse_dev_cleanup(void);
+
+/**
+ * Allocate a request
+ */
+struct fuse_req *fuse_request_alloc(void);
+
+/**
+ * Free a request
+ */
+void fuse_request_free(struct fuse_req *req);
+
+/**
+ * Reinitialize a request, the preallocated flag is left unmodified
+ */
+void fuse_reset_request(struct fuse_req *req);
+
+/**
+ * Reserve a preallocated request
+ */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc);
+
+/**
+ * Decrement reference count of a request.  If count goes to zero put
+ * on unused list (preallocated) or free request (not preallocated).
+ */
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous)
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request with no reply
+ */
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request in the background
+ */
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Release inodes and file associated with background request
+ */
+void fuse_release_background(struct fuse_req *req);
+
+/**
+ * Get the attributes of a file
+ */
+int fuse_do_getattr(struct inode *inode);
+
+/**
+ * Invalidate inode attributes
+ */
+void fuse_invalidate_attr(struct inode *inode);
+
+/**
+ * Send the INIT message
+ */
+void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 00000000000..e69a546844d
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,591 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/statfs.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+spinlock_t fuse_lock;
+static kmem_cache_t *fuse_inode_cachep;
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+struct fuse_mount_data {
+	int fd;
+	unsigned rootmode;
+	unsigned user_id;
+	unsigned group_id;
+	unsigned fd_present : 1;
+	unsigned rootmode_present : 1;
+	unsigned user_id_present : 1;
+	unsigned group_id_present : 1;
+	unsigned flags;
+	unsigned max_read;
+};
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+	struct inode *inode;
+	struct fuse_inode *fi;
+
+	inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+	if (!inode)
+		return NULL;
+
+	fi = get_fuse_inode(inode);
+	fi->i_time = jiffies - 1;
+	fi->nodeid = 0;
+	fi->nlookup = 0;
+	fi->forget_req = fuse_request_alloc();
+	if (!fi->forget_req) {
+		kmem_cache_free(fuse_inode_cachep, inode);
+		return NULL;
+	}
+
+	return inode;
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	if (fi->forget_req)
+		fuse_request_free(fi->forget_req);
+	kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_read_inode(struct inode *inode)
+{
+	/* No op */
+}
+
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+		      unsigned long nodeid, u64 nlookup)
+{
+	struct fuse_forget_in *inarg = &req->misc.forget_in;
+	inarg->nlookup = nlookup;
+	req->in.h.opcode = FUSE_FORGET;
+	req->in.h.nodeid = nodeid;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_forget_in);
+	req->in.args[0].value = inarg;
+	request_send_noreply(fc, req);
+}
+
+static void fuse_clear_inode(struct inode *inode)
+{
+	if (inode->i_sb->s_flags & MS_ACTIVE) {
+		struct fuse_conn *fc = get_fuse_conn(inode);
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
+		fi->forget_req = NULL;
+	}
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+{
+	if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
+		invalidate_inode_pages(inode->i_mapping);
+
+	inode->i_ino     = attr->ino;
+	inode->i_mode    = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
+	inode->i_nlink   = attr->nlink;
+	inode->i_uid     = attr->uid;
+	inode->i_gid     = attr->gid;
+	i_size_write(inode, attr->size);
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_blocks  = attr->blocks;
+	inode->i_atime.tv_sec   = attr->atime;
+	inode->i_atime.tv_nsec  = attr->atimensec;
+	inode->i_mtime.tv_sec   = attr->mtime;
+	inode->i_mtime.tv_nsec  = attr->mtimensec;
+	inode->i_ctime.tv_sec   = attr->ctime;
+	inode->i_ctime.tv_nsec  = attr->ctimensec;
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+	inode->i_mode = attr->mode & S_IFMT;
+	i_size_write(inode, attr->size);
+	if (S_ISREG(inode->i_mode)) {
+		fuse_init_common(inode);
+		fuse_init_file_inode(inode);
+	} else if (S_ISDIR(inode->i_mode))
+		fuse_init_dir(inode);
+	else if (S_ISLNK(inode->i_mode))
+		fuse_init_symlink(inode);
+	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+		fuse_init_common(inode);
+		init_special_inode(inode, inode->i_mode,
+				   new_decode_dev(attr->rdev));
+	} else {
+		/* Don't let user create weird files */
+		inode->i_mode = S_IFREG;
+		fuse_init_common(inode);
+		fuse_init_file_inode(inode);
+	}
+}
+
+static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+{
+	unsigned long nodeid = *(unsigned long *) _nodeidp;
+	if (get_node_id(inode) == nodeid)
+		return 1;
+	else
+		return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+	unsigned long nodeid = *(unsigned long *) _nodeidp;
+	get_fuse_inode(inode)->nodeid = nodeid;
+	return 0;
+}
+
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+			int generation, struct fuse_attr *attr)
+{
+	struct inode *inode;
+	struct fuse_inode *fi;
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	int retried = 0;
+
+ retry:
+	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
+	if (!inode)
+		return NULL;
+
+	if ((inode->i_state & I_NEW)) {
+		inode->i_flags |= S_NOATIME|S_NOCMTIME;
+		inode->i_generation = generation;
+		inode->i_data.backing_dev_info = &fc->bdi;
+		fuse_init_inode(inode, attr);
+		unlock_new_inode(inode);
+	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+		BUG_ON(retried);
+		/* Inode has changed type, any I/O on the old should fail */
+		make_bad_inode(inode);
+		iput(inode);
+		retried = 1;
+		goto retry;
+	}
+
+	fi = get_fuse_inode(inode);
+	fi->nlookup ++;
+	fuse_change_attributes(inode, attr);
+	return inode;
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	down_write(&fc->sbput_sem);
+	while (!list_empty(&fc->background))
+		fuse_release_background(list_entry(fc->background.next,
+						   struct fuse_req, bg_entry));
+
+	spin_lock(&fuse_lock);
+	fc->mounted = 0;
+	fc->user_id = 0;
+	fc->group_id = 0;
+	fc->flags = 0;
+	/* Flush all readers on this fs */
+	wake_up_all(&fc->waitq);
+	up_write(&fc->sbput_sem);
+	fuse_release_conn(fc);
+	spin_unlock(&fuse_lock);
+}
+
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+	stbuf->f_type    = FUSE_SUPER_MAGIC;
+	stbuf->f_bsize   = attr->bsize;
+	stbuf->f_blocks  = attr->blocks;
+	stbuf->f_bfree   = attr->bfree;
+	stbuf->f_bavail  = attr->bavail;
+	stbuf->f_files   = attr->files;
+	stbuf->f_ffree   = attr->ffree;
+	stbuf->f_namelen = attr->namelen;
+	/* fsid is left zero */
+}
+
+static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	struct fuse_req *req;
+	struct fuse_statfs_out outarg;
+	int err;
+
+        req = fuse_get_request(fc);
+	if (!req)
+		return -EINTR;
+
+	req->in.numargs = 0;
+	req->in.h.opcode = FUSE_STATFS;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (!err)
+		convert_fuse_statfs(buf, &outarg.st);
+	fuse_put_request(fc, req);
+	return err;
+}
+
+enum {
+	OPT_FD,
+	OPT_ROOTMODE,
+	OPT_USER_ID,
+	OPT_GROUP_ID,
+	OPT_DEFAULT_PERMISSIONS,
+	OPT_ALLOW_OTHER,
+	OPT_MAX_READ,
+	OPT_ERR
+};
+
+static match_table_t tokens = {
+	{OPT_FD,			"fd=%u"},
+	{OPT_ROOTMODE,			"rootmode=%o"},
+	{OPT_USER_ID,			"user_id=%u"},
+	{OPT_GROUP_ID,			"group_id=%u"},
+	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
+	{OPT_ALLOW_OTHER,		"allow_other"},
+	{OPT_MAX_READ,			"max_read=%u"},
+	{OPT_ERR,			NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+{
+	char *p;
+	memset(d, 0, sizeof(struct fuse_mount_data));
+	d->max_read = ~0;
+
+	while ((p = strsep(&opt, ",")) != NULL) {
+		int token;
+		int value;
+		substring_t args[MAX_OPT_ARGS];
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case OPT_FD:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->fd = value;
+			d->fd_present = 1;
+			break;
+
+		case OPT_ROOTMODE:
+			if (match_octal(&args[0], &value))
+				return 0;
+			d->rootmode = value;
+			d->rootmode_present = 1;
+			break;
+
+		case OPT_USER_ID:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->user_id = value;
+			d->user_id_present = 1;
+			break;
+
+		case OPT_GROUP_ID:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->group_id = value;
+			d->group_id_present = 1;
+			break;
+
+		case OPT_DEFAULT_PERMISSIONS:
+			d->flags |= FUSE_DEFAULT_PERMISSIONS;
+			break;
+
+		case OPT_ALLOW_OTHER:
+			d->flags |= FUSE_ALLOW_OTHER;
+			break;
+
+		case OPT_MAX_READ:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->max_read = value;
+			break;
+
+		default:
+			return 0;
+		}
+	}
+
+	if (!d->fd_present || !d->rootmode_present ||
+	    !d->user_id_present || !d->group_id_present)
+		return 0;
+
+	return 1;
+}
+
+static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+
+	seq_printf(m, ",user_id=%u", fc->user_id);
+	seq_printf(m, ",group_id=%u", fc->group_id);
+	if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+		seq_puts(m, ",default_permissions");
+	if (fc->flags & FUSE_ALLOW_OTHER)
+		seq_puts(m, ",allow_other");
+	if (fc->max_read != ~0)
+		seq_printf(m, ",max_read=%u", fc->max_read);
+	return 0;
+}
+
+static void free_conn(struct fuse_conn *fc)
+{
+	while (!list_empty(&fc->unused_list)) {
+		struct fuse_req *req;
+		req = list_entry(fc->unused_list.next, struct fuse_req, list);
+		list_del(&req->list);
+		fuse_request_free(req);
+	}
+	kfree(fc);
+}
+
+/* Must be called with the fuse lock held */
+void fuse_release_conn(struct fuse_conn *fc)
+{
+	fc->count--;
+	if (!fc->count)
+		free_conn(fc);
+}
+
+static struct fuse_conn *new_conn(void)
+{
+	struct fuse_conn *fc;
+
+	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+	if (fc != NULL) {
+		int i;
+		memset(fc, 0, sizeof(*fc));
+		init_waitqueue_head(&fc->waitq);
+		INIT_LIST_HEAD(&fc->pending);
+		INIT_LIST_HEAD(&fc->processing);
+		INIT_LIST_HEAD(&fc->unused_list);
+		INIT_LIST_HEAD(&fc->background);
+		sema_init(&fc->outstanding_sem, 0);
+		init_rwsem(&fc->sbput_sem);
+		for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
+			struct fuse_req *req = fuse_request_alloc();
+			if (!req) {
+				free_conn(fc);
+				return NULL;
+			}
+			list_add(&req->list, &fc->unused_list);
+		}
+		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+		fc->bdi.unplug_io_fn = default_unplug_io_fn;
+		fc->reqctr = 0;
+	}
+	return fc;
+}
+
+static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
+{
+	struct fuse_conn *fc;
+
+	if (file->f_op != &fuse_dev_operations)
+		return ERR_PTR(-EINVAL);
+	fc = new_conn();
+	if (fc == NULL)
+		return ERR_PTR(-ENOMEM);
+	spin_lock(&fuse_lock);
+	if (file->private_data) {
+		free_conn(fc);
+		fc = ERR_PTR(-EINVAL);
+	} else {
+		file->private_data = fc;
+		*get_fuse_conn_super_p(sb) = fc;
+		fc->mounted = 1;
+		fc->connected = 1;
+		fc->count = 2;
+	}
+	spin_unlock(&fuse_lock);
+	return fc;
+}
+
+static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
+{
+	struct fuse_attr attr;
+	memset(&attr, 0, sizeof(attr));
+
+	attr.mode = mode;
+	attr.ino = FUSE_ROOT_ID;
+	return fuse_iget(sb, 1, 0, &attr);
+}
+
+static struct super_operations fuse_super_operations = {
+	.alloc_inode    = fuse_alloc_inode,
+	.destroy_inode  = fuse_destroy_inode,
+	.read_inode	= fuse_read_inode,
+	.clear_inode	= fuse_clear_inode,
+	.put_super	= fuse_put_super,
+	.statfs		= fuse_statfs,
+	.show_options	= fuse_show_options,
+};
+
+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct fuse_conn *fc;
+	struct inode *root;
+	struct fuse_mount_data d;
+	struct file *file;
+	int err;
+
+	if (!parse_fuse_opt((char *) data, &d))
+		return -EINVAL;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = FUSE_SUPER_MAGIC;
+	sb->s_op = &fuse_super_operations;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	file = fget(d.fd);
+	if (!file)
+		return -EINVAL;
+
+	fc = get_conn(file, sb);
+	fput(file);
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
+
+	fc->flags = d.flags;
+	fc->user_id = d.user_id;
+	fc->group_id = d.group_id;
+	fc->max_read = d.max_read;
+	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
+		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+	fc->max_write = FUSE_MAX_IN / 2;
+
+	err = -ENOMEM;
+	root = get_root_inode(sb, d.rootmode);
+	if (root == NULL)
+		goto err;
+
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		iput(root);
+		goto err;
+	}
+	fuse_send_init(fc);
+	return 0;
+
+ err:
+	spin_lock(&fuse_lock);
+	fuse_release_conn(fc);
+	spin_unlock(&fuse_lock);
+	return err;
+}
+
+static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
+				       int flags, const char *dev_name,
+				       void *raw_data)
+{
+	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+}
+
+static struct file_system_type fuse_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuse",
+	.get_sb		= fuse_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+				 unsigned long flags)
+{
+	struct inode * inode = foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+	int err;
+
+	err = register_filesystem(&fuse_fs_type);
+	if (err)
+		printk("fuse: failed to register filesystem\n");
+	else {
+		fuse_inode_cachep = kmem_cache_create("fuse_inode",
+						      sizeof(struct fuse_inode),
+						      0, SLAB_HWCACHE_ALIGN,
+						      fuse_inode_init_once, NULL);
+		if (!fuse_inode_cachep) {
+			unregister_filesystem(&fuse_fs_type);
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+static void fuse_fs_cleanup(void)
+{
+	unregister_filesystem(&fuse_fs_type);
+	kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static int __init fuse_init(void)
+{
+	int res;
+
+	printk("fuse init (API version %i.%i)\n",
+	       FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+	spin_lock_init(&fuse_lock);
+	res = fuse_fs_init();
+	if (res)
+		goto err;
+
+	res = fuse_dev_init();
+	if (res)
+		goto err_fs_cleanup;
+
+	return 0;
+
+ err_fs_cleanup:
+	fuse_fs_cleanup();
+ err:
+	return res;
+}
+
+static void __exit fuse_exit(void)
+{
+	printk(KERN_DEBUG "fuse exit\n");
+
+	fuse_fs_cleanup();
+	fuse_dev_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index a096c5a5666..3d5cdc6847c 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -13,8 +13,6 @@
 
 #include "btree.h"
 
-#define REF_PAGES	0
-
 void hfs_bnode_read(struct hfs_bnode *node, void *buf,
 		int off, int len)
 {
@@ -289,9 +287,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 			page_cache_release(page);
 			goto fail;
 		}
-#if !REF_PAGES
 		page_cache_release(page);
-#endif
 		node->page[i] = page;
 	}
 
@@ -449,13 +445,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
 {
 	if (node) {
 		atomic_inc(&node->refcnt);
-#if REF_PAGES
-		{
-		int i;
-		for (i = 0; i < node->tree->pages_per_bnode; i++)
-			get_page(node->page[i]);
-		}
-#endif
 		dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
 	}
@@ -472,20 +461,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
 		if (!atomic_read(&node->refcnt))
 			BUG();
-		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) {
-#if REF_PAGES
-			for (i = 0; i < tree->pages_per_bnode; i++)
-				put_page(node->page[i]);
-#endif
+		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
 			return;
-		}
 		for (i = 0; i < tree->pages_per_bnode; i++) {
 			if (!node->page[i])
 				continue;
 			mark_page_accessed(node->page[i]);
-#if REF_PAGES
-			put_page(node->page[i]);
-#endif
 		}
 
 		if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 65dedefcabf..2fcd679f023 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -20,12 +20,12 @@
  *
  * Given the ID of the parent and the name build a search key.
  */
-void hfs_cat_build_key(btree_key *key, u32 parent, struct qstr *name)
+void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, struct qstr *name)
 {
 	key->cat.reserved = 0;
 	key->cat.ParID = cpu_to_be32(parent);
 	if (name) {
-		hfs_triv2mac(&key->cat.CName, name);
+		hfs_asc2mac(sb, &key->cat.CName, name);
 		key->key_len = 6 + key->cat.CName.len;
 	} else {
 		memset(&key->cat.CName, 0, sizeof(struct hfs_name));
@@ -62,13 +62,14 @@ static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode)
 	}
 }
 
-static int hfs_cat_build_thread(hfs_cat_rec *rec, int type,
+static int hfs_cat_build_thread(struct super_block *sb,
+				hfs_cat_rec *rec, int type,
 				u32 parentid, struct qstr *name)
 {
 	rec->type = type;
 	memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved));
 	rec->thread.ParID = cpu_to_be32(parentid);
-	hfs_triv2mac(&rec->thread.CName, name);
+	hfs_asc2mac(sb, &rec->thread.CName, name);
 	return sizeof(struct hfs_cat_thread);
 }
 
@@ -93,8 +94,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
 	sb = dir->i_sb;
 	hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
 
-	hfs_cat_build_key(fd.search_key, cnid, NULL);
-	entry_size = hfs_cat_build_thread(&entry, S_ISDIR(inode->i_mode) ?
+	hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
+	entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ?
 			HFS_CDR_THD : HFS_CDR_FTH,
 			dir->i_ino, str);
 	err = hfs_brec_find(&fd);
@@ -107,7 +108,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
 	if (err)
 		goto err2;
 
-	hfs_cat_build_key(fd.search_key, dir->i_ino, str);
+	hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
 	entry_size = hfs_cat_build_record(&entry, cnid, inode);
 	err = hfs_brec_find(&fd);
 	if (err != -ENOENT) {
@@ -127,7 +128,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
 	return 0;
 
 err1:
-	hfs_cat_build_key(fd.search_key, cnid, NULL);
+	hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
 	if (!hfs_brec_find(&fd))
 		hfs_brec_remove(&fd);
 err2:
@@ -176,7 +177,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
 	hfs_cat_rec rec;
 	int res, len, type;
 
-	hfs_cat_build_key(fd->search_key, cnid, NULL);
+	hfs_cat_build_key(sb, fd->search_key, cnid, NULL);
 	res = hfs_brec_read(fd, &rec, sizeof(rec));
 	if (res)
 		return res;
@@ -211,7 +212,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
 	sb = dir->i_sb;
 	hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
 
-	hfs_cat_build_key(fd.search_key, dir->i_ino, str);
+	hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
 	res = hfs_brec_find(&fd);
 	if (res)
 		goto out;
@@ -239,7 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
 	if (res)
 		goto out;
 
-	hfs_cat_build_key(fd.search_key, cnid, NULL);
+	hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
 	res = hfs_brec_find(&fd);
 	if (!res) {
 		res = hfs_brec_remove(&fd);
@@ -280,7 +281,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
 	dst_fd = src_fd;
 
 	/* find the old dir entry and read the data */
-	hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name);
+	hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
 	err = hfs_brec_find(&src_fd);
 	if (err)
 		goto out;
@@ -289,7 +290,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
 			    src_fd.entrylength);
 
 	/* create new dir entry with the data from the old entry */
-	hfs_cat_build_key(dst_fd.search_key, dst_dir->i_ino, dst_name);
+	hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
 	err = hfs_brec_find(&dst_fd);
 	if (err != -ENOENT) {
 		if (!err)
@@ -305,7 +306,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
 	mark_inode_dirty(dst_dir);
 
 	/* finally remove the old entry */
-	hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name);
+	hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
 	err = hfs_brec_find(&src_fd);
 	if (err)
 		goto out;
@@ -321,7 +322,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
 		goto out;
 
 	/* remove old thread entry */
-	hfs_cat_build_key(src_fd.search_key, cnid, NULL);
+	hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL);
 	err = hfs_brec_find(&src_fd);
 	if (err)
 		goto out;
@@ -330,8 +331,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
 		goto out;
 
 	/* create new thread entry */
-	hfs_cat_build_key(dst_fd.search_key, cnid, NULL);
-	entry_size = hfs_cat_build_thread(&entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD,
+	hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+	entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD,
 					dst_dir->i_ino, dst_name);
 	err = hfs_brec_find(&dst_fd);
 	if (err != -ENOENT) {
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index c55998262ae..e1f24befba5 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -28,7 +28,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
 	dentry->d_op = &hfs_dentry_operations;
 
 	hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
-	hfs_cat_build_key(fd.search_key, dir->i_ino, &dentry->d_name);
+	hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
 	res = hfs_brec_read(&fd, &rec, sizeof(rec));
 	if (res) {
 		hfs_find_exit(&fd);
@@ -56,7 +56,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	int len, err;
-	char strbuf[HFS_NAMELEN + 1];
+	char strbuf[HFS_MAX_NAMELEN];
 	union hfs_cat_rec entry;
 	struct hfs_find_data fd;
 	struct hfs_readdir_data *rd;
@@ -66,7 +66,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		return 0;
 
 	hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
-	hfs_cat_build_key(fd.search_key, inode->i_ino, NULL);
+	hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
 	err = hfs_brec_find(&fd);
 	if (err)
 		goto out;
@@ -111,7 +111,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
 		type = entry.type;
-		len = hfs_mac2triv(strbuf, &fd.key->cat.CName);
+		len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
 		if (type == HFS_CDR_DIR) {
 			if (fd.entrylength < sizeof(struct hfs_cat_dir)) {
 				printk("HFS: small dir entry\n");
@@ -307,7 +307,8 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			   old_dir, &old_dentry->d_name,
 			   new_dir, &new_dentry->d_name);
 	if (!res)
-		hfs_cat_build_key((btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
+		hfs_cat_build_key(old_dir->i_sb,
+				  (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
 				  new_dir->i_ino, &new_dentry->d_name);
 	return res;
 }
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index df6b33adee3..88099ab1a18 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -25,6 +25,7 @@
 #define HFS_SECTOR_SIZE		512    /* size of an HFS sector */
 #define HFS_SECTOR_SIZE_BITS	9      /* log_2(HFS_SECTOR_SIZE) */
 #define HFS_NAMELEN		31     /* maximum length of an HFS filename */
+#define HFS_MAX_NAMELEN		128
 #define HFS_MAX_VALENCE		32767U
 
 /* Meanings of the drAtrb field of the MDB,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 0dc8ef8e14d..aae019aadf8 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -141,6 +141,8 @@ struct hfs_sb_info {
 
 	int session, part;
 
+	struct nls_table *nls_io, *nls_disk;
+
 	struct semaphore bitmap_lock;
 
 	unsigned long flags;
@@ -168,7 +170,7 @@ extern int hfs_cat_create(u32, struct inode *, struct qstr *, struct inode *);
 extern int hfs_cat_delete(u32, struct inode *, struct qstr *);
 extern int hfs_cat_move(u32, struct inode *, struct qstr *,
 			struct inode *, struct qstr *);
-extern void hfs_cat_build_key(btree_key *, u32, struct qstr *);
+extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
 
 /* dir.c */
 extern struct file_operations hfs_dir_operations;
@@ -222,8 +224,8 @@ extern int hfs_strcmp(const unsigned char *, unsigned int,
 extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
 
 /* trans.c */
-extern void hfs_triv2mac(struct hfs_name *, struct qstr *);
-extern int hfs_mac2triv(char *, const struct hfs_name *);
+extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
+extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
 
 extern struct timezone sys_tz;
 
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 75191232609..3f680c5675b 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -46,7 +46,7 @@ static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, hfs_get_block);
 }
 
-static int hfs_releasepage(struct page *page, int mask)
+static int hfs_releasepage(struct page *page, gfp_t mask)
 {
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
@@ -160,7 +160,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
 
 	init_MUTEX(&HFS_I(inode)->extents_lock);
 	INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
-	hfs_cat_build_key((btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
+	hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
 	inode->i_ino = HFS_SB(sb)->next_id++;
 	inode->i_mode = mode;
 	inode->i_uid = current->fsuid;
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 217e32f37e0..0a473f79c89 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -10,6 +10,7 @@
 
 #include <linux/cdrom.h>
 #include <linux/genhd.h>
+#include <linux/nls.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
@@ -343,6 +344,11 @@ void hfs_mdb_put(struct super_block *sb)
 	brelse(HFS_SB(sb)->mdb_bh);
 	brelse(HFS_SB(sb)->alt_mdb_bh);
 
+	if (HFS_SB(sb)->nls_io)
+		unload_nls(HFS_SB(sb)->nls_io);
+	if (HFS_SB(sb)->nls_disk)
+		unload_nls(HFS_SB(sb)->nls_disk);
+
 	kfree(HFS_SB(sb));
 	sb->s_fs_info = NULL;
 }
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ab783f6afa3..c5074aeafca 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -15,8 +15,11 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
+#include <linux/mount.h>
 #include <linux/init.h>
+#include <linux/nls.h>
 #include <linux/parser.h>
+#include <linux/seq_file.h>
 #include <linux/vfs.h>
 
 #include "hfs_fs.h"
@@ -111,6 +114,32 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
+static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+{
+	struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb);
+
+	if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
+		seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
+	if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
+		seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
+	seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid);
+	if (sbi->s_file_umask != 0133)
+		seq_printf(seq, ",file_umask=%o", sbi->s_file_umask);
+	if (sbi->s_dir_umask != 0022)
+		seq_printf(seq, ",dir_umask=%o", sbi->s_dir_umask);
+	if (sbi->part >= 0)
+		seq_printf(seq, ",part=%u", sbi->part);
+	if (sbi->session >= 0)
+		seq_printf(seq, ",session=%u", sbi->session);
+	if (sbi->nls_disk)
+		seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);
+	if (sbi->nls_io)
+		seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
+	if (sbi->s_quiet)
+		seq_printf(seq, ",quiet");
+	return 0;
+}
+
 static struct inode *hfs_alloc_inode(struct super_block *sb)
 {
 	struct hfs_inode_info *i;
@@ -133,11 +162,13 @@ static struct super_operations hfs_super_operations = {
 	.write_super	= hfs_write_super,
 	.statfs		= hfs_statfs,
 	.remount_fs     = hfs_remount,
+	.show_options	= hfs_show_options,
 };
 
 enum {
 	opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask,
 	opt_part, opt_session, opt_type, opt_creator, opt_quiet,
+	opt_codepage, opt_iocharset,
 	opt_err
 };
 
@@ -152,6 +183,8 @@ static match_table_t tokens = {
 	{ opt_type, "type=%s" },
 	{ opt_creator, "creator=%s" },
 	{ opt_quiet, "quiet" },
+	{ opt_codepage, "codepage=%s" },
+	{ opt_iocharset, "iocharset=%s" },
 	{ opt_err, NULL }
 };
 
@@ -257,11 +290,46 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
 		case opt_quiet:
 			hsb->s_quiet = 1;
 			break;
+		case opt_codepage:
+			if (hsb->nls_disk) {
+				printk("HFS+-fs: unable to change codepage\n");
+				return 0;
+			}
+			p = match_strdup(&args[0]);
+			hsb->nls_disk = load_nls(p);
+			if (!hsb->nls_disk) {
+				printk("HFS+-fs: unable to load codepage \"%s\"\n", p);
+				kfree(p);
+				return 0;
+			}
+			kfree(p);
+			break;
+		case opt_iocharset:
+			if (hsb->nls_io) {
+				printk("HFS: unable to change iocharset\n");
+				return 0;
+			}
+			p = match_strdup(&args[0]);
+			hsb->nls_io = load_nls(p);
+			if (!hsb->nls_io) {
+				printk("HFS: unable to load iocharset \"%s\"\n", p);
+				kfree(p);
+				return 0;
+			}
+			kfree(p);
+			break;
 		default:
 			return 0;
 		}
 	}
 
+	if (hsb->nls_disk && !hsb->nls_io) {
+		hsb->nls_io = load_nls_default();
+		if (!hsb->nls_io) {
+			printk("HFS: unable to load default iocharset\n");
+			return 0;
+		}
+	}
 	hsb->s_dir_umask &= 0777;
 	hsb->s_file_umask &= 0577;
 
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index fb9720abbad..e673a88b8ae 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -9,12 +9,15 @@
  * with ':' vs. '/' as the path-element separator.
  */
 
+#include <linux/types.h>
+#include <linux/nls.h>
+
 #include "hfs_fs.h"
 
 /*================ Global functions ================*/
 
 /*
- * hfs_mac2triv()
+ * hfs_mac2asc()
  *
  * Given a 'Pascal String' (a string preceded by a length byte) in
  * the Macintosh character set produce the corresponding filename using
@@ -27,23 +30,58 @@
  * by ':' which never appears in HFS filenames.	 All other characters
  * are passed unchanged from input to output.
  */
-int hfs_mac2triv(char *out, const struct hfs_name *in)
+int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
 {
-	const char *p;
-	char c;
-	int i, len;
+	struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
+	struct nls_table *nls_io = HFS_SB(sb)->nls_io;
+	const char *src;
+	char *dst;
+	int srclen, dstlen, size;
+
+	src = in->name;
+	srclen = in->len;
+	dst = out;
+	dstlen = HFS_MAX_NAMELEN;
+	if (nls_io) {
+		wchar_t ch;
 
-	len = in->len;
-	p = in->name;
-	for (i = 0; i < len; i++) {
-		c = *p++;
-		*out++ = c == '/' ? ':' : c;
+		while (srclen > 0) {
+			if (nls_disk) {
+				size = nls_disk->char2uni(src, srclen, &ch);
+				if (size <= 0) {
+					ch = '?';
+					size = 1;
+				}
+				src += size;
+				srclen -= size;
+			} else {
+				ch = *src++;
+				srclen--;
+			}
+			if (ch == '/')
+				ch = ':';
+			size = nls_io->uni2char(ch, dst, dstlen);
+			if (size < 0) {
+				if (size == -ENAMETOOLONG)
+					goto out;
+				*dst = '?';
+				size = 1;
+			}
+			dst += size;
+			dstlen -= size;
+		}
+	} else {
+		char ch;
+
+		while (--srclen >= 0)
+			*dst++ = (ch = *src++) == '/' ? ':' : ch;
 	}
-	return i;
+out:
+	return dst - out;
 }
 
 /*
- * hfs_triv2mac()
+ * hfs_asc2mac()
  *
  * Given an ASCII string (not null-terminated) and its length,
  * generate the corresponding filename in the Macintosh character set
@@ -54,19 +92,57 @@ int hfs_mac2triv(char *out, const struct hfs_name *in)
  * This routine is a inverse to hfs_mac2triv().
  * A ':' is replaced by a '/'.
  */
-void hfs_triv2mac(struct hfs_name *out, struct qstr *in)
+void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, struct qstr *in)
 {
+	struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
+	struct nls_table *nls_io = HFS_SB(sb)->nls_io;
 	const char *src;
-	char *dst, c;
-	int i, len;
+	char *dst;
+	int srclen, dstlen, size;
 
-	out->len = len = min((unsigned int)HFS_NAMELEN, in->len);
 	src = in->name;
+	srclen = in->len;
 	dst = out->name;
-	for (i = 0; i < len; i++) {
-		c = *src++;
-		*dst++ = c == ':' ? '/' : c;
+	dstlen = HFS_NAMELEN;
+	if (nls_io) {
+		wchar_t ch;
+
+		while (srclen > 0) {
+			size = nls_io->char2uni(src, srclen, &ch);
+			if (size < 0) {
+				ch = '?';
+				size = 1;
+			}
+			src += size;
+			srclen -= size;
+			if (ch == ':')
+				ch = '/';
+			if (nls_disk) {
+				size = nls_disk->uni2char(ch, dst, dstlen);
+				if (size < 0) {
+					if (size == -ENAMETOOLONG)
+						goto out;
+					*dst = '?';
+					size = 1;
+				}
+				dst += size;
+				dstlen -= size;
+			} else {
+				*dst++ = ch > 0xff ? '?' : ch;
+				dstlen--;
+			}
+		}
+	} else {
+		char ch;
+
+		if (dstlen > srclen)
+			dstlen = srclen;
+		while (--dstlen >= 0)
+			*dst++ = (ch = *src++) == ':' ? '/' : ch;
 	}
-	for (; i < HFS_NAMELEN; i++)
+out:
+	out->len = dst - (char *)out->name;
+	dstlen = HFS_NAMELEN - out->len;
+	while (--dstlen >= 0)
 		*dst++ = 0;
 }
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 8868d3b766f..b85abc6e6f8 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -18,8 +18,6 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
 
-#define REF_PAGES	0
-
 /* Copy a specified range of bytes from the raw data of a node */
 void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
 {
@@ -450,9 +448,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 			page_cache_release(page);
 			goto fail;
 		}
-#if !REF_PAGES
 		page_cache_release(page);
-#endif
 		node->page[i] = page;
 	}
 
@@ -612,13 +608,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
 {
 	if (node) {
 		atomic_inc(&node->refcnt);
-#if REF_PAGES
-		{
-		int i;
-		for (i = 0; i < node->tree->pages_per_bnode; i++)
-			get_page(node->page[i]);
-		}
-#endif
 		dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
 	}
@@ -635,20 +624,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
 		       node->tree->cnid, node->this, atomic_read(&node->refcnt));
 		if (!atomic_read(&node->refcnt))
 			BUG();
-		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) {
-#if REF_PAGES
-			for (i = 0; i < tree->pages_per_bnode; i++)
-				put_page(node->page[i]);
-#endif
+		if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
 			return;
-		}
 		for (i = 0; i < tree->pages_per_bnode; i++) {
 			if (!node->page[i])
 				continue;
 			mark_page_accessed(node->page[i]);
-#if REF_PAGES
-			put_page(node->page[i]);
-#endif
 		}
 
 		if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 533094a570d..2bc0cdd30e5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -343,8 +343,9 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
 ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
 /* options.c */
-int parse_options(char *, struct hfsplus_sb_info *);
-void fill_defaults(struct hfsplus_sb_info *);
+int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
+void hfsplus_fill_defaults(struct hfsplus_sb_info *);
+int hfsplus_show_options(struct seq_file *, struct vfsmount *);
 
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d5642705f63..f205773ddfb 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -40,7 +40,7 @@ static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, hfsplus_get_block);
 }
 
-static int hfsplus_releasepage(struct page *page, int mask)
+static int hfsplus_releasepage(struct page *page, gfp_t mask)
 {
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 1cca0102c98..cca0818aa4c 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -13,6 +13,8 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/nls.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
 #include "hfsplus_fs.h"
 
 enum {
@@ -38,7 +40,7 @@ static match_table_t tokens = {
 };
 
 /* Initialize an options object to reasonable defaults */
-void fill_defaults(struct hfsplus_sb_info *opts)
+void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
 {
 	if (!opts)
 		return;
@@ -63,7 +65,7 @@ static inline int match_fourchar(substring_t *arg, u32 *result)
 
 /* Parse options from mount. Returns 0 on failure */
 /* input is the options passed to mount() as a string */
-int parse_options(char *input, struct hfsplus_sb_info *sbi)
+int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -160,3 +162,23 @@ done:
 
 	return 1;
 }
+
+int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
+{
+	struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb);
+
+	if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
+		seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
+	if (sbi->type != HFSPLUS_DEF_CR_TYPE)
+		seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
+	seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid);
+	if (sbi->part >= 0)
+		seq_printf(seq, ",part=%u", sbi->part);
+	if (sbi->session >= 0)
+		seq_printf(seq, ",session=%u", sbi->session);
+	if (sbi->nls)
+		seq_printf(seq, ",nls=%s", sbi->nls->charset);
+	if (sbi->flags & HFSPLUS_SB_NODECOMPOSE)
+		seq_printf(seq, ",nodecompose");
+	return 0;
+}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d55ad67b8e4..452fc1fdbd3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -50,6 +50,7 @@ static void hfsplus_read_inode(struct inode *inode)
 	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
 	HFSPLUS_I(inode).flags = 0;
 	HFSPLUS_I(inode).rsrc_inode = NULL;
+	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
 
 	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) {
 	read_inode:
@@ -217,8 +218,7 @@ static void hfsplus_put_super(struct super_block *sb)
 		vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
 		vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
 		mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
-		ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh);
-		wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
+		sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
 	}
 
 	hfs_btree_close(HFSPLUS_SB(sb).cat_tree);
@@ -277,6 +277,7 @@ static struct super_operations hfsplus_sops = {
 	.write_super	= hfsplus_write_super,
 	.statfs		= hfsplus_statfs,
 	.remount_fs	= hfsplus_remount,
+	.show_options	= hfsplus_show_options,
 };
 
 static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
@@ -297,8 +298,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	memset(sbi, 0, sizeof(HFSPLUS_SB(sb)));
 	sb->s_fs_info = sbi;
 	INIT_HLIST_HEAD(&sbi->rsrc_inodes);
-	fill_defaults(sbi);
-	if (!parse_options(data, sbi)) {
+	hfsplus_fill_defaults(sbi);
+	if (!hfsplus_parse_options(data, sbi)) {
 		if (!silent)
 			printk("HFS+-fs: unable to parse mount options\n");
 		err = -EINVAL;
@@ -415,8 +416,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
 	vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
 	mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
-	ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh);
-	wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
+	sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
 
 	if (!HFSPLUS_SB(sb).hidden_dir) {
 		printk("HFS+: create hidden dir...\n");
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 67bca0d4a33..cca3fb693f9 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -49,7 +49,6 @@ struct hostfs_iattr {
 	struct timespec	ia_atime;
 	struct timespec	ia_mtime;
 	struct timespec	ia_ctime;
-	unsigned int	ia_attr_flags;
 };
 
 extern int stat_file(const char *path, unsigned long long *inode_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a00..dd711310626 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 
 static void hostfs_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	if(HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
@@ -792,11 +793,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	return(err);
 }
 
-void hostfs_truncate(struct inode *ino)
-{
-	not_implemented();
-}
-
 int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
 {
 	char *name;
@@ -893,7 +889,6 @@ static struct inode_operations hostfs_iops = {
 	.rmdir		= hostfs_rmdir,
 	.mknod		= hostfs_mknod,
 	.rename		= hostfs_rename,
-	.truncate	= hostfs_truncate,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
 	.getattr	= hostfs_getattr,
@@ -909,7 +904,6 @@ static struct inode_operations hostfs_dir_iops = {
 	.rmdir		= hostfs_rmdir,
 	.mknod		= hostfs_mknod,
 	.rename		= hostfs_rename,
-	.truncate	= hostfs_truncate,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
 	.getattr	= hostfs_getattr,
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa53..e3d17e9ea6c 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
 
 void hpfs_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	lock_kernel();
 	hpfs_remove_fnode(inode->i_sb, inode->i_ino);
 	unlock_kernel();
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index ff150fedb98..52930915bad 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -38,7 +38,7 @@ struct hppfs_inode_info {
 
 static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
 {
-	return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
+	return container_of(inode, struct hppfs_inode_info, vfs_inode);
 }
 
 #define HPPFS_SUPER_MAGIC 0xb00000ee
@@ -662,42 +662,36 @@ static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct file *proc_file;
 	struct dentry *proc_dentry;
-	int (*readlink)(struct dentry *, char *, int);
-	int err, n;
+	int ret;
 
 	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
 	proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
-	err = PTR_ERR(proc_dentry);
-	if(IS_ERR(proc_dentry))
-		return(err);
+	if (IS_ERR(proc_file))
+		return PTR_ERR(proc_file);
 
-	readlink = proc_dentry->d_inode->i_op->readlink;
-	n = (*readlink)(proc_dentry, buffer, buflen);
+	ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen);
 
 	fput(proc_file);
 
-	return(n);
+	return ret;
 }
 
-static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct file *proc_file;
 	struct dentry *proc_dentry;
-	int (*follow_link)(struct dentry *, struct nameidata *);
-	int err, n;
+	void *ret;
 
 	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
 	proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
-	err = PTR_ERR(proc_dentry);
-	if(IS_ERR(proc_dentry))
-		return(err);
+	if (IS_ERR(proc_file))
+		return proc_file;
 
-	follow_link = proc_dentry->d_inode->i_op->follow_link;
-	n = (*follow_link)(proc_dentry, nd);
+	ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
 
 	fput(proc_file);
 
-	return(n);
+	return ret;
 }
 
 static struct inode_operations hppfs_dir_iops = {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3a9b6d179cb..e026c807e6b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,10 +45,58 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = {
 
 int sysctl_hugetlb_shm_group;
 
+static void huge_pagevec_release(struct pagevec *pvec)
+{
+	int i;
+
+	for (i = 0; i < pagevec_count(pvec); ++i)
+		put_page(pvec->pages[i]);
+
+	pagevec_reinit(pvec);
+}
+
+/*
+ * huge_pages_needed tries to determine the number of new huge pages that
+ * will be required to fully populate this VMA.  This will be equal to
+ * the size of the VMA in huge pages minus the number of huge pages
+ * (covered by this VMA) that are found in the page cache.
+ *
+ * Result is in bytes to be compatible with is_hugepage_mem_enough()
+ */
+unsigned long
+huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	int i;
+	struct pagevec pvec;
+	unsigned long start = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
+	pgoff_t next = vma->vm_pgoff;
+	pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT);
+
+	pagevec_init(&pvec, 0);
+	while (next < endpg) {
+		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
+			break;
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+			if (page->index > next)
+				next = page->index;
+			if (page->index >= endpg)
+				break;
+			next++;
+			hugepages--;
+		}
+		huge_pagevec_release(&pvec);
+	}
+	return hugepages << HPAGE_SHIFT;
+}
+
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
+	unsigned long bytes;
 	loff_t len, vma_len;
 	int ret;
 
@@ -67,6 +115,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
 		return -EINVAL;
 
+	bytes = huge_pages_needed(mapping, vma);
+	if (!is_hugepage_mem_enough(bytes))
+		return -ENOMEM;
+
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	down(&inode->i_sem);
@@ -79,10 +131,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 		goto out;
 
-	ret = hugetlb_prefault(mapping, vma);
-	if (ret)
-		goto out;
-
+	ret = 0;
+	hugetlb_prefault_arch_hook(vma->vm_mm);
 	if (inode->i_size < len)
 		inode->i_size = len;
 out:
@@ -92,7 +142,7 @@ out:
 }
 
 /*
- * Called under down_write(mmap_sem), page_table_lock is not held
+ * Called under down_write(mmap_sem).
  */
 
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -171,16 +221,6 @@ static int hugetlbfs_commit_write(struct file *file,
 	return -EINVAL;
 }
 
-static void huge_pagevec_release(struct pagevec *pvec)
-{
-	int i;
-
-	for (i = 0; i < pagevec_count(pvec); ++i)
-		put_page(pvec->pages[i]);
-
-	pagevec_reinit(pvec);
-}
-
 static void truncate_huge_page(struct page *page)
 {
 	clear_page_dirty(page);
@@ -224,52 +264,35 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 
 static void hugetlbfs_delete_inode(struct inode *inode)
 {
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
-
-	hlist_del_init(&inode->i_hash);
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
 	if (inode->i_data.nrpages)
 		truncate_hugepages(&inode->i_data, 0);
-
-	security_inode_delete(inode);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
 	clear_inode(inode);
-	destroy_inode(inode);
 }
 
 static void hugetlbfs_forget_inode(struct inode *inode)
 {
-	struct super_block *super_block = inode->i_sb;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(super_block);
+	struct super_block *sb = inode->i_sb;
 
-	if (hlist_unhashed(&inode->i_hash))
-		goto out_truncate;
-
-	if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
-		list_del(&inode->i_list);
-		list_add(&inode->i_list, &inode_unused);
-	}
-	inodes_stat.nr_unused++;
-	if (!super_block || (super_block->s_flags & MS_ACTIVE)) {
+	if (!hlist_unhashed(&inode->i_hash)) {
+		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
+			list_move(&inode->i_list, &inode_unused);
+		inodes_stat.nr_unused++;
+		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+			spin_unlock(&inode_lock);
+			return;
+		}
+		inode->i_state |= I_WILL_FREE;
 		spin_unlock(&inode_lock);
-		return;
+		/*
+		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
+		 * in our backing_dev_info.
+		 */
+		write_inode_now(inode, 1);
+		spin_lock(&inode_lock);
+		inode->i_state &= ~I_WILL_FREE;
+		inodes_stat.nr_unused--;
+		hlist_del_init(&inode->i_hash);
 	}
-
-	/* write_inode_now() ? */
-	inodes_stat.nr_unused--;
-	hlist_del_init(&inode->i_hash);
-out_truncate:
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
 	inode->i_state |= I_FREEING;
@@ -277,13 +300,6 @@ out_truncate:
 	spin_unlock(&inode_lock);
 	if (inode->i_data.nrpages)
 		truncate_hugepages(&inode->i_data, 0);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
 	clear_inode(inode);
 	destroy_inode(inode);
 }
@@ -291,7 +307,7 @@ out_truncate:
 static void hugetlbfs_drop_inode(struct inode *inode)
 {
 	if (!inode->i_nlink)
-		hugetlbfs_delete_inode(inode);
+		generic_delete_inode(inode);
 	else
 		hugetlbfs_forget_inode(inode);
 }
@@ -308,7 +324,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 
 	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
 		unsigned long h_vm_pgoff;
-		unsigned long v_length;
 		unsigned long v_offset;
 
 		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
@@ -319,11 +334,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 		if (h_vm_pgoff >= h_pgoff)
 			v_offset = 0;
 
-		v_length = vma->vm_end - vma->vm_start;
-
-		zap_hugepage_range(vma,
-				vma->vm_start + v_offset,
-				v_length - v_offset);
+		unmap_hugepage_range(vma,
+				vma->vm_start + v_offset, vma->vm_end);
 	}
 }
 
@@ -379,17 +391,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 					gid_t gid, int mode, dev_t dev)
 {
 	struct inode *inode;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		if (!sbinfo->free_inodes) {
-			spin_unlock(&sbinfo->stat_lock);
-			return NULL;
-		}
-		sbinfo->free_inodes--;
-		spin_unlock(&sbinfo->stat_lock);
-	}
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -531,29 +532,51 @@ static void hugetlbfs_put_super(struct super_block *sb)
 	}
 }
 
+static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
+{
+	if (sbinfo->free_inodes >= 0) {
+		spin_lock(&sbinfo->stat_lock);
+		if (unlikely(!sbinfo->free_inodes)) {
+			spin_unlock(&sbinfo->stat_lock);
+			return 0;
+		}
+		sbinfo->free_inodes--;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+
+	return 1;
+}
+
+static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
+{
+	if (sbinfo->free_inodes >= 0) {
+		spin_lock(&sbinfo->stat_lock);
+		sbinfo->free_inodes++;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+}
+
+
 static kmem_cache_t *hugetlbfs_inode_cachep;
 
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 {
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
 	struct hugetlbfs_inode_info *p;
 
+	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
+		return NULL;
 	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
-	if (!p)
+	if (unlikely(!p)) {
+		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
+	}
 	return &p->vfs_inode;
 }
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
-{
-	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
-
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(&ei->vfs_inode);
-}
-
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
+	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
@@ -565,6 +588,16 @@ static struct address_space_operations hugetlbfs_aops = {
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 };
 
+
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(&ei->vfs_inode);
+}
+
 struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
@@ -592,6 +625,7 @@ static struct super_operations hugetlbfs_ops = {
 	.alloc_inode    = hugetlbfs_alloc_inode,
 	.destroy_inode  = hugetlbfs_destroy_inode,
 	.statfs		= hugetlbfs_statfs,
+	.delete_inode	= hugetlbfs_delete_inode,
 	.drop_inode	= hugetlbfs_drop_inode,
 	.put_super	= hugetlbfs_put_super,
 };
diff --git a/fs/inode.c b/fs/inode.c
index e57f1724db3..d8d04bd72b5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -475,7 +475,7 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(int nr, unsigned int gfp_mask)
+static int shrink_icache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		/*
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 
-	if (inode->i_data.nrpages)
-		truncate_inode_pages(&inode->i_data, 0);
-
 	security_inode_delete(inode);
 
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
 		if (!is_bad_inode(inode))
 			DQUOT_INIT(inode);
-		/* s_op->delete_inode internally recalls clear_inode() */
+		/* Filesystems implementing their own
+		 * s_op->delete_inode are required to call
+		 * truncate_inode_pages and clear_inode()
+		 * internally */
 		delete(inode);
-	} else
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
+	}
 	spin_lock(&inode_lock);
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_lock);
@@ -1086,6 +1088,7 @@ static void generic_forget_inode(struct inode *inode)
 	if (inode->i_data.nrpages)
 		truncate_inode_pages(&inode->i_data, 0);
 	clear_inode(inode);
+	wake_up_inode(inode);
 	destroy_inode(inode);
 }
 
@@ -1195,9 +1198,6 @@ void update_atime(struct inode *inode)
 	if (!timespec_equal(&inode->i_atime, &now)) {
 		inode->i_atime = now;
 		mark_inode_dirty_sync(inode);
-	} else {
-		if (!timespec_equal(&inode->i_atime, &now))
-			inode->i_atime = now;
 	}
 }
 
diff --git a/fs/inotify.c b/fs/inotify.c
index 27ebcac5e07..9fbaebfdf40 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -37,6 +37,7 @@
 #include <asm/ioctls.h>
 
 static atomic_t inotify_cookie;
+static atomic_t inotify_watches;
 
 static kmem_cache_t *watch_cachep;
 static kmem_cache_t *event_cachep;
@@ -175,6 +176,7 @@ static inline void put_inotify_dev(struct inotify_device *dev)
 	if (atomic_dec_and_test(&dev->count)) {
 		atomic_dec(&dev->user->inotify_devs);
 		free_uid(dev->user);
+		idr_destroy(&dev->idr);
 		kfree(dev);
 	}
 }
@@ -353,7 +355,7 @@ static int inotify_dev_get_wd(struct inotify_device *dev,
 	do {
 		if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
 			return -ENOSPC;
-		ret = idr_get_new_above(&dev->idr, watch, dev->last_wd, &watch->wd);
+		ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd);
 	} while (ret == -EAGAIN);
 
 	return ret;
@@ -402,7 +404,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 		return ERR_PTR(ret);
 	}
 
-	dev->last_wd = ret;
+	dev->last_wd = watch->wd;
 	watch->mask = mask;
 	atomic_set(&watch->count, 0);
 	INIT_LIST_HEAD(&watch->d_list);
@@ -422,6 +424,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 	get_inotify_watch(watch);
 
 	atomic_inc(&dev->user->inotify_watches);
+	atomic_inc(&inotify_watches);
 
 	return watch;
 }
@@ -454,6 +457,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 	list_del(&watch->d_list);
 
 	atomic_dec(&dev->user->inotify_watches);
+	atomic_dec(&inotify_watches);
 	idr_remove(&dev->idr, watch->wd);
 	put_inotify_watch(watch);
 }
@@ -532,6 +536,9 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 	struct dentry *parent;
 	struct inode *inode;
 
+	if (!atomic_read (&inotify_watches))
+		return;
+
 	spin_lock(&dentry->d_lock);
 	parent = dentry->d_parent;
 	inode = parent->d_inode;
@@ -925,6 +932,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	struct nameidata nd;
 	struct file *filp;
 	int ret, fput_needed;
+	int mask_add = 0;
 
 	filp = fget_light(fd, &fput_needed);
 	if (unlikely(!filp))
@@ -947,6 +955,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	down(&inode->inotify_sem);
 	down(&dev->sem);
 
+	if (mask & IN_MASK_ADD)
+		mask_add = 1;
+
 	/* don't let user-space set invalid bits: we don't want flags set */
 	mask &= IN_ALL_EVENTS;
 	if (unlikely(!mask)) {
@@ -960,7 +971,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	 */
 	old = inode_find_dev(inode, dev);
 	if (unlikely(old)) {
-		old->mask = mask;
+		if (mask_add)
+			old->mask |= mask;
+		else
+			old->mask = mask;
 		ret = old->wd;
 		goto out;
 	}
@@ -1043,6 +1057,7 @@ static int __init inotify_setup(void)
 	inotify_max_user_watches = 8192;
 
 	atomic_set(&inotify_cookie, 0);
+	atomic_set(&inotify_watches, 0);
 
 	watch_cachep = kmem_cache_create("inotify_watch_cache",
 					 sizeof(struct inotify_watch),
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 97e1f088ba0..d1c1f2b2c9d 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -62,6 +62,8 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 
 			break;
 		case IOPRIO_CLASS_IDLE:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
 			break;
 		default:
 			return -EINVAL;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 34a44e45168..4917315db73 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -129,8 +129,14 @@ static int zisofs_readpage(struct file *file, struct page *page)
 	cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
 	brelse(bh);
 
+	if (cstart > cend)
+		goto eio;
+		
 	csize = cend-cstart;
 
+	if (csize > deflateBound(1UL << zisofs_block_shift))
+		goto eio;
+
 	/* Now page[] contains an array of pages, any of which can be NULL,
 	   and the locks on which we hold.  We should now read the data and
 	   release the pages.  If the pages are NULL the decompressed data
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 5a97e346bd9..014a51fd00d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -204,7 +204,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 	int i;
 
 	spin_unlock(&journal->j_list_lock);
-	ll_rw_block(WRITE, *batch_count, bhs);
+	ll_rw_block(SWRITE, *batch_count, bhs);
 	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index dac720c837a..2a3e310f79e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -358,7 +358,7 @@ write_out_data:
 					jbd_debug(2, "submit %d writes\n",
 							bufs);
 					spin_unlock(&journal->j_list_lock);
-					ll_rw_block(WRITE, bufs, wbuf);
+					ll_rw_block(SWRITE, bufs, wbuf);
 					journal_brelse_array(wbuf, bufs);
 					bufs = 0;
 					goto write_out_data;
@@ -381,7 +381,7 @@ write_out_data:
 
 	if (bufs) {
 		spin_unlock(&journal->j_list_lock);
-		ll_rw_block(WRITE, bufs, wbuf);
+		ll_rw_block(SWRITE, bufs, wbuf);
 		journal_brelse_array(wbuf, bufs);
 		spin_lock(&journal->j_list_lock);
 	}
@@ -720,11 +720,17 @@ wait_for_iobuf:
 	J_ASSERT(commit_transaction->t_log_list == NULL);
 
 restart_loop:
+	/*
+	 * As there are other places (journal_unmap_buffer()) adding buffers
+	 * to this list we have to be careful and hold the j_list_lock.
+	 */
+	spin_lock(&journal->j_list_lock);
 	while (commit_transaction->t_forget) {
 		transaction_t *cp_transaction;
 		struct buffer_head *bh;
 
 		jh = commit_transaction->t_forget;
+		spin_unlock(&journal->j_list_lock);
 		bh = jh2bh(jh);
 		jbd_lock_bh_state(bh);
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
@@ -792,9 +798,25 @@ restart_loop:
 			journal_remove_journal_head(bh);  /* needs a brelse */
 			release_buffer_page(bh);
 		}
+		cond_resched_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+	/*
+	 * This is a bit sleazy.  We borrow j_list_lock to protect
+	 * journal->j_committing_transaction in __journal_remove_checkpoint.
+	 * Really, __journal_remove_checkpoint should be using j_state_lock but
+	 * it's a bit hassle to hold that across __journal_remove_checkpoint
+	 */
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&journal->j_list_lock);
+	/*
+	 * Now recheck if some buffers did not get attached to the transaction
+	 * while the lock was dropped...
+	 */
+	if (commit_transaction->t_forget) {
 		spin_unlock(&journal->j_list_lock);
-		if (cond_resched())
-			goto restart_loop;
+		spin_unlock(&journal->j_state_lock);
+		goto restart_loop;
 	}
 
 	/* Done with this transaction! */
@@ -803,14 +825,6 @@ restart_loop:
 
 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
 
-	/*
-	 * This is a bit sleazy.  We borrow j_list_lock to protect
-	 * journal->j_committing_transaction in __journal_remove_checkpoint.
-	 * Really, __jornal_remove_checkpoint should be using j_state_lock but
-	 * it's a bit hassle to hold that across __journal_remove_checkpoint
-	 */
-	spin_lock(&journal->j_state_lock);
-	spin_lock(&journal->j_list_lock);
 	commit_transaction->t_state = T_FINISHED;
 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
 	journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5e7b4394951..e4b516ac498 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features);
 EXPORT_SYMBOL(journal_create);
 EXPORT_SYMBOL(journal_load);
 EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_recover);
 EXPORT_SYMBOL(journal_update_superblock);
 EXPORT_SYMBOL(journal_abort);
 EXPORT_SYMBOL(journal_errno);
@@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers);
 EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
+static void __journal_abort_soft (journal_t *journal, int errno);
 
 /*
  * Helper function used to manage commit timeouts
@@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data)
 	wake_up_process(p);
 }
 
-/* Static check for data structure consistency.  There's no code
- * invoked --- we'll just get a linker failure if things aren't right.
- */
-void __journal_internal_check(void)
-{
-	extern void journal_bad_superblock_size(void);
-	if (sizeof(struct journal_superblock_s) != 1024)
-		journal_bad_superblock_size();
-}
-
 /*
  * kjournald: The main thread function used to manage a logging device
  * journal.
@@ -119,16 +109,12 @@ void __journal_internal_check(void)
  *    known as checkpointing, and this thread is responsible for that job.
  */
 
-journal_t *current_journal;		// AKPM: debug
-
-int kjournald(void *arg)
+static int kjournald(void *arg)
 {
 	journal_t *journal = (journal_t *) arg;
 	transaction_t *transaction;
 	struct timer_list timer;
 
-	current_journal = journal;
-
 	daemonize("kjournald");
 
 	/* Set up an interval timer which can be used to trigger a
@@ -193,6 +179,8 @@ loop:
 		if (transaction && time_after_eq(jiffies,
 						transaction->t_expires))
 			should_sleep = 0;
+		if (journal->j_flags & JFS_UNMOUNT)
+ 			should_sleep = 0;
 		if (should_sleep) {
 			spin_unlock(&journal->j_state_lock);
 			schedule();
@@ -969,7 +957,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 	if (wait)
 		sync_dirty_buffer(bh);
 	else
-		ll_rw_block(WRITE, 1, &bh);
+		ll_rw_block(SWRITE, 1, &bh);
 
 out:
 	/* If we have just flushed the log (by marking s_start==0), then
@@ -1439,7 +1427,7 @@ int journal_wipe(journal_t *journal, int write)
  * device this journal is present.
  */
 
-const char *journal_dev_name(journal_t *journal, char *buffer)
+static const char *journal_dev_name(journal_t *journal, char *buffer)
 {
 	struct block_device *bdev;
 
@@ -1485,7 +1473,7 @@ void __journal_abort_hard(journal_t *journal)
 
 /* Soft abort: record the abort error status in the journal superblock,
  * but don't do any other IO. */
-void __journal_abort_soft (journal_t *journal, int errno)
+static void __journal_abort_soft (journal_t *journal, int errno)
 {
 	if (journal->j_flags & JFS_ABORT)
 		return;
@@ -1618,7 +1606,7 @@ int journal_blocks_per_page(struct inode *inode)
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies. 
  */
-void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry)
+void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 {
 	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
 }
@@ -1880,7 +1868,7 @@ EXPORT_SYMBOL(journal_enable_debug);
 
 static struct proc_dir_entry *proc_jbd_debug;
 
-int read_jbd_debug(char *page, char **start, off_t off,
+static int read_jbd_debug(char *page, char **start, off_t off,
 			  int count, int *eof, void *data)
 {
 	int ret;
@@ -1890,7 +1878,7 @@ int read_jbd_debug(char *page, char **start, off_t off,
 	return ret;
 }
 
-int write_jbd_debug(struct file *file, const char __user *buffer,
+static int write_jbd_debug(struct file *file, const char __user *buffer,
 			   unsigned long count, void *data)
 {
 	char buf[32];
@@ -1979,6 +1967,14 @@ static int __init journal_init(void)
 {
 	int ret;
 
+/* Static check for data structure consistency.  There's no code
+ * invoked --- we'll just get a linker failure if things aren't right.
+ */
+	extern void journal_bad_superblock_size(void);
+	if (sizeof(struct journal_superblock_s) != 1024)
+		journal_bad_superblock_size();
+
+
 	ret = journal_init_caches();
 	if (ret != 0)
 		journal_destroy_caches();
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d327a598f86..a5614418346 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block)
 		(block << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq)
+static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+			      tid_t seq)
 {
 	struct list_head *hash_list;
 	struct jbd_revoke_record_s *record;
@@ -613,7 +614,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
 	set_buffer_dirty(bh);
-	ll_rw_block(WRITE, 1, &bh);
+	ll_rw_block(SWRITE, 1, &bh);
 }
 #endif
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 77b7662b840..13cb05bf604 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
  */
 static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-	struct buffer_head *bh = jh2bh(jh);
 	int jlist;
 
-	if (buffer_dirty(bh)) {
-		/* If this buffer is one which might reasonably be dirty
-		 * --- ie. data, or not part of this journal --- then
-		 * we're OK to leave it alone, but otherwise we need to
-		 * move the dirty bit to the journal's own internal
-		 * JBDDirty bit. */
-		jlist = jh->b_jlist;
-
-		if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-		    jlist == BJ_Shadow || jlist == BJ_Forget) {
-			if (test_clear_buffer_dirty(jh2bh(jh))) {
-				set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-			}
-		}
+	/* If this buffer is one which might reasonably be dirty
+	 * --- ie. data, or not part of this journal --- then
+	 * we're OK to leave it alone, but otherwise we need to
+	 * move the dirty bit to the journal's own internal
+	 * JBDDirty bit. */
+	jlist = jh->b_jlist;
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		struct buffer_head *bh = jh2bh(jh);
+
+		if (test_clear_buffer_dirty(bh))
+			set_buffer_jbddirty(bh);
 	}
 }
 
@@ -574,9 +572,14 @@ repeat:
 			if (jh->b_next_transaction)
 				J_ASSERT_JH(jh, jh->b_next_transaction ==
 							transaction);
-			JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-			jbd_unexpected_dirty_buffer(jh);
- 		}
+		}
+		/*
+		 * In any case we need to clean the dirty flag and we must
+		 * do it under the buffer lock to be sure we don't race
+		 * with running write-out.
+		 */
+		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+		jbd_unexpected_dirty_buffer(jh);
  	}
 
 	unlock_buffer(bh);
@@ -1337,8 +1340,7 @@ int journal_stop(handle_t *handle)
 	if (handle->h_sync) {
 		do {
 			old_handle_count = transaction->t_handle_count;
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(1);
+			schedule_timeout_uninterruptible(1);
 		} while (old_handle_count != transaction->t_handle_count);
 	}
 
@@ -1619,7 +1621,7 @@ out:
  * while the data is part of a transaction.  Yes?
  */
 int journal_try_to_free_buffers(journal_t *journal, 
-				struct page *page, int unused_gfp_mask)
+				struct page *page, gfp_t unused_gfp_mask)
 {
 	struct buffer_head *head;
 	struct buffer_head *bh;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index bfbeb4c86e0..3dcc6d2162c 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1629,9 +1629,6 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
 }
 
 
-extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
-extern loff_t generic_file_llseek(struct file *, loff_t, int) __attribute__((weak));
-
 static struct file_operations jffs_file_operations =
 {
 	.open		= generic_file_open,
@@ -1747,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
 	D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
 		  inode->i_ino));
 
+	truncate_inode_pages(&inode->i_data, 0);
 	lock_kernel();
 	inode->i_size = 0;
 	inode->i_blocks = 0;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 456d7e6e29c..27f199e94cf 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino)
 {
 	struct jffs_file *f;
 	int i = ino % c->hash_len;
-	struct list_head *tmp;
 
 	D3(printk("jffs_find_file(): ino: %u\n", ino));
 
-	for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) {
-		f = list_entry(tmp, struct jffs_file, hash);
+	list_for_each_entry(f, &c->hash[i], hash) {
 		if (ino != f->ino)
 			continue;
 		D3(printk("jffs_find_file(): Found file with ino "
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
 	int result = 0;
 
 	for (pos = 0; pos < c->hash_len; pos++) {
-		struct list_head *p, *next;
-		for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) {
-			/* We need a reference to the next file in the
-			   list because `func' might remove the current
-			   file `f'.  */
-			next = p->next;
-			r = func(list_entry(p, struct jffs_file, hash));
+		struct jffs_file *f, *next;
+
+		/* We must do _safe, because 'func' might remove the
+		   current file 'f' from the list.  */
+		list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
+			r = func(f);
 			if (r < 0)
 				return r;
 			result += r;
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c)
 
 	printk("JFFS: Dumping the file system's hash table...\n");
 	for (i = 0; i < c->hash_len; i++) {
-		struct list_head *p;
-		for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) {
-			struct jffs_file *f=list_entry(p,struct jffs_file,hash);
+		struct jffs_file *f;
+		list_for_each_entry(f, &c->hash[i], hash) {
 			printk("*** c->hash[%u]: \"%s\" "
 			       "(ino: %u, pino: %u)\n",
 			       i, (f->name ? f->name : ""),
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 0f224384f17..8210ac16a36 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -15,6 +15,7 @@
 #include <linux/jffs2.h>
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
+#include <linux/sched.h>
 #include "nodelist.h"
 
 
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index bd9ed9b0247..8279bf0133f 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -21,9 +21,6 @@
 #include <linux/jffs2.h>
 #include "nodelist.h"
 
-extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
-extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) __attribute__((weak));
-
 static int jffs2_commit_write (struct file *filp, struct page *pg,
 			       unsigned start, unsigned end);
 static int jffs2_prepare_write (struct file *filp, struct page *pg,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 65ab6b001dc..82ef484f5e1 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -18,7 +18,7 @@
 #include <linux/namei.h>
 #include "nodelist.h"
 
-static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
 
 struct inode_operations jffs2_symlink_inode_operations =
 {	
@@ -27,9 +27,10 @@ struct inode_operations jffs2_symlink_inode_operations =
 	.setattr =	jffs2_setattr
 };
 
-static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+	char *p = (char *)f->dents;
 	
 	/*
 	 * We don't acquire the f->sem mutex here since the only data we
@@ -45,19 +46,20 @@ static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 	 * nd_set_link() call.
 	 */
 	
-	if (!f->dents) {
+	if (!p) {
 		printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n");
-		return -EIO;
+		p = ERR_PTR(-EIO);
+	} else {
+		D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents));
 	}
-	D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents));
 
-	nd_set_link(nd, (char *)f->dents);
+	nd_set_link(nd, p);
 	
 	/*
 	 * We unlock the f->sem mutex but VFS will use the f->dents string. This is safe
 	 * since the only way that may cause f->dents to be changed is iput() operation.
 	 * But VFS will not use f->dents after iput() has been called.
 	 */
-	return 0;
+	return NULL;
 }
 
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 996d922e503..316133c626b 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -18,6 +18,8 @@
 #include <linux/mtd/mtd.h>
 #include <linux/crc32.h>
 #include <linux/mtd/nand.h>
+#include <linux/jiffies.h>
+
 #include "nodelist.h"
 
 /* For testing write failures */
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e892dab40c2..461e4934ca7 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -23,6 +23,7 @@
 #include <linux/quotaops.h>
 #include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
+#include "jfs_txnmgr.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
 
@@ -75,7 +76,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
+		       struct posix_acl *acl)
 {
 	char *ea_name;
 	struct jfs_inode_info *ji = JFS_IP(inode);
@@ -110,7 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		if (rc < 0)
 			goto out;
 	}
-	rc = __jfs_setxattr(inode, ea_name, value, size, 0);
+	rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0);
 out:
 	kfree(value);
 
@@ -143,7 +145,7 @@ int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return generic_permission(inode, mask, jfs_check_acl);
 }
 
-int jfs_init_acl(struct inode *inode, struct inode *dir)
+int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	struct posix_acl *clone;
@@ -159,7 +161,7 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
 
 	if (acl) {
 		if (S_ISDIR(inode->i_mode)) {
-			rc = jfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+			rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
 			if (rc)
 				goto cleanup;
 		}
@@ -173,7 +175,8 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
 		if (rc >= 0) {
 			inode->i_mode = mode;
 			if (rc > 0)
-				rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+				rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS,
+						 clone);
 		}
 		posix_acl_release(clone);
 cleanup:
@@ -202,8 +205,15 @@ static int jfs_acl_chmod(struct inode *inode)
 		return -ENOMEM;
 
 	rc = posix_acl_chmod_masq(clone, inode->i_mode);
-	if (!rc)
-		rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+	if (!rc) {
+		tid_t tid = txBegin(inode->i_sb, 0);
+		down(&JFS_IP(inode)->commit_sem);
+		rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
+		if (!rc)
+			rc = txCommit(tid, 1, &inode, 0);
+		txEnd(tid);
+		up(&JFS_IP(inode)->commit_sem);
+	}
 
 	posix_acl_release(clone);
 	return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 2137138c59b..9f942ca8e4e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,17 +128,22 @@ void jfs_delete_inode(struct inode *inode)
 {
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
-	if (test_cflag(COMMIT_Freewmap, inode))
-		jfs_free_zero_link(inode);
+	if (!is_bad_inode(inode) &&
+	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
+		truncate_inode_pages(&inode->i_data, 0);
 
-	diFree(inode);
+		if (test_cflag(COMMIT_Freewmap, inode))
+			jfs_free_zero_link(inode);
 
-	/*
-	 * Free the inode from the quota allocation.
-	 */
-	DQUOT_INIT(inode);
-	DQUOT_FREE_INODE(inode);
-	DQUOT_DROP(inode);
+		diFree(inode);
+
+		/*
+		 * Free the inode from the quota allocation.
+		 */
+		DQUOT_INIT(inode);
+		DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
+	}
 
 	clear_inode(inode);
 }
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index a3acd3eec05..a76293767c7 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -21,8 +21,16 @@
 #ifdef CONFIG_JFS_POSIX_ACL
 
 int jfs_permission(struct inode *, int, struct nameidata *);
-int jfs_init_acl(struct inode *, struct inode *);
+int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
-#endif		/* CONFIG_JFS_POSIX_ACL */
+#else
+
+static inline int jfs_init_acl(tid_t tid, struct inode *inode,
+			       struct inode *dir)
+{
+	return 0;
+}
+
+#endif
 #endif		/* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c739626f5bf..68000a50ceb 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -74,7 +74,7 @@
 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 			int nblocks);
 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
-static void dbBackSplit(dmtree_t * tp, int leafno);
+static int dbBackSplit(dmtree_t * tp, int leafno);
 static int dbJoin(dmtree_t * tp, int leafno, int newval);
 static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
 static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
@@ -305,7 +305,6 @@ int dbSync(struct inode *ipbmap)
 	filemap_fdatawrite(ipbmap->i_mapping);
 	filemap_fdatawait(ipbmap->i_mapping);
 
-	ipbmap->i_state |= I_DIRTY;
 	diWriteSpecial(ipbmap, 0);
 
 	return (0);
@@ -2467,7 +2466,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
 		 * that it is at the front of a binary buddy system.
 		 */
 		if (oldval == NOFREE) {
-			dbBackSplit((dmtree_t *) dcp, leafno);
+			rc = dbBackSplit((dmtree_t *) dcp, leafno);
+			if (rc)
+				return rc;
 			oldval = dcp->stree[ti];
 		}
 		dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
@@ -2627,7 +2628,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
  *
  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
  */
-static void dbBackSplit(dmtree_t * tp, int leafno)
+static int dbBackSplit(dmtree_t * tp, int leafno)
 {
 	int budsz, bud, w, bsz, size;
 	int cursz;
@@ -2662,7 +2663,10 @@ static void dbBackSplit(dmtree_t * tp, int leafno)
 		 */
 		for (w = leafno, bsz = budsz;; bsz <<= 1,
 		     w = (w < bud) ? w : bud) {
-			assert(bsz < le32_to_cpu(tp->dmt_nleafs));
+			if (bsz >= le32_to_cpu(tp->dmt_nleafs)) {
+				jfs_err("JFS: block map error in dbBackSplit");
+				return -EIO;
+			}
 
 			/* determine the buddy.
 			 */
@@ -2681,7 +2685,11 @@ static void dbBackSplit(dmtree_t * tp, int leafno)
 		}
 	}
 
-	assert(leaf[leafno] == size);
+	if (leaf[leafno] != size) {
+		jfs_err("JFS: wrong leaf value in dbBackSplit");
+		return -EIO;
+	}
+	return 0;
 }
 
 
@@ -3055,7 +3063,7 @@ static int cntlz(u32 value)
  * RETURN VALUES:
  *      log2 number of blocks
  */
-int blkstol2(s64 nb)
+static int blkstol2(s64 nb)
 {
 	int l2nb;
 	s64 mask;		/* meant to be signed */
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 86ccac80f0a..72a5588faec 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -37,6 +37,9 @@
 #define JFS_ERR_CONTINUE   0x00000004   /* continue */
 #define JFS_ERR_PANIC      0x00000008   /* panic */
 
+#define	JFS_USRQUOTA	0x00000010
+#define	JFS_GRPQUOTA	0x00000020
+
 /* platform option (conditional compilation) */
 #define JFS_AIX		0x80000000	/* AIX support */
 /*	POSIX name/directory  support */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 4021d46da7e..28201b194f5 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -57,6 +57,12 @@
 #include "jfs_debug.h"
 
 /*
+ * __mark_inode_dirty expects inodes to be hashed.  Since we don't want
+ * special inodes in the fileset inode space, we hash them to a dummy head
+ */
+static HLIST_HEAD(aggregate_hash);
+
+/*
  * imap locks
  */
 /* iag free list lock */
@@ -491,6 +497,8 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	/* release the page */
 	release_metapage(mp);
 
+	hlist_add_head(&ip->i_hash, &aggregate_hash);
+
 	return (ip);
 }
 
@@ -514,8 +522,6 @@ void diWriteSpecial(struct inode *ip, int secondary)
 	ino_t inum = ip->i_ino;
 	struct metapage *mp;
 
-	ip->i_state &= ~I_DIRTY;
-
 	if (secondary)
 		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
 	else
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 22815e88e7c..d27bac6acaa 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -191,7 +191,7 @@ static int lbmIOWait(struct lbuf * bp, int flag);
 static bio_end_io_t lbmIODone;
 static void lbmStartIO(struct lbuf * bp);
 static void lmGCwrite(struct jfs_log * log, int cant_block);
-static int lmLogSync(struct jfs_log * log, int nosyncwait);
+static int lmLogSync(struct jfs_log * log, int hard_sync);
 
 
 
@@ -915,19 +915,17 @@ static void lmPostGC(struct lbuf * bp)
  *	if new sync address is available
  *	(normally the case if sync() is executed by back-ground
  *	process).
- *	if not, explicitly run jfs_blogsync() to initiate
- *	getting of new sync address.
  *	calculate new value of i_nextsync which determines when
  *	this code is called again.
  *
  * PARAMETERS:	log	- log structure
- * 		nosyncwait - 1 if called asynchronously
+ * 		hard_sync - 1 to force all metadata to be written
  *
  * RETURN:	0
  *			
  * serialization: LOG_LOCK() held on entry/exit
  */
-static int lmLogSync(struct jfs_log * log, int nosyncwait)
+static int lmLogSync(struct jfs_log * log, int hard_sync)
 {
 	int logsize;
 	int written;		/* written since last syncpt */
@@ -941,11 +939,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	unsigned long flags;
 
 	/* push dirty metapages out to disk */
-	list_for_each_entry(sbi, &log->sb_list, log_list) {
-		filemap_flush(sbi->ipbmap->i_mapping);
-		filemap_flush(sbi->ipimap->i_mapping);
-		filemap_flush(sbi->direct_inode->i_mapping);
-	}
+	if (hard_sync)
+		list_for_each_entry(sbi, &log->sb_list, log_list) {
+			filemap_fdatawrite(sbi->ipbmap->i_mapping);
+			filemap_fdatawrite(sbi->ipimap->i_mapping);
+			filemap_fdatawrite(sbi->direct_inode->i_mapping);
+		}
+	else
+		list_for_each_entry(sbi, &log->sb_list, log_list) {
+			filemap_flush(sbi->ipbmap->i_mapping);
+			filemap_flush(sbi->ipimap->i_mapping);
+			filemap_flush(sbi->direct_inode->i_mapping);
+		}
 
 	/*
 	 *      forward syncpt
@@ -1021,10 +1026,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 		/* next syncpt trigger = written + more */
 		log->nextsync = written + more;
 
-	/* return if lmLogSync() from outside of transaction, e.g., sync() */
-	if (nosyncwait)
-		return lsn;
-
 	/* if number of bytes written from last sync point is more
 	 * than 1/4 of the log size, stop new transactions from
 	 * starting until all current transactions are completed
@@ -1049,11 +1050,12 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
  *
  * FUNCTION:	write log SYNCPT record for specified log
  *
- * PARAMETERS:	log	- log structure
+ * PARAMETERS:	log	  - log structure
+ * 		hard_sync - set to 1 to force metadata to be written
  */
-void jfs_syncpt(struct jfs_log *log)
+void jfs_syncpt(struct jfs_log *log, int hard_sync)
 {	LOG_LOCK(log);
-	lmLogSync(log, 1);
+	lmLogSync(log, hard_sync);
 	LOG_UNLOCK(log);
 }
 
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 747114cd38b..e4978b5b65e 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -510,6 +510,6 @@ extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
 extern int jfsIOWait(void *);
 extern void jfs_flush_journal(struct jfs_log * log, int wait);
-extern void jfs_syncpt(struct jfs_log *log);
+extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
 
 #endif				/* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 13d7e3f1feb..8a53981f9f2 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -86,7 +86,7 @@ struct meta_anchor {
 	atomic_t io_count;
 	struct metapage *mp[MPS_PER_PAGE];
 };
-#define mp_anchor(page) ((struct meta_anchor *)page->private)
+#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
 
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 {
@@ -108,7 +108,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 		if (!a)
 			return -ENOMEM;
 		memset(a, 0, sizeof(struct meta_anchor));
-		page->private = (unsigned long)a;
+		set_page_private(page, (unsigned long)a);
 		SetPagePrivate(page);
 		kmap(page);
 	}
@@ -136,7 +136,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 	a->mp[index] = NULL;
 	if (--a->mp_count == 0) {
 		kfree(a);
-		page->private = 0;
+		set_page_private(page, 0);
 		ClearPagePrivate(page);
 		kunmap(page);
 	}
@@ -156,13 +156,13 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *))
 #else
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 {
-	return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+	return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
 }
 
 static inline int insert_metapage(struct page *page, struct metapage *mp)
 {
 	if (mp) {
-		page->private = (unsigned long)mp;
+		set_page_private(page, (unsigned long)mp);
 		SetPagePrivate(page);
 		kmap(page);
 	}
@@ -171,7 +171,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 
 static inline void remove_metapage(struct page *page, struct metapage *mp)
 {
-	page->private = 0;
+	set_page_private(page, 0);
 	ClearPagePrivate(page);
 	kunmap(page);
 }
@@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 	}
 }
 
-static inline struct metapage *alloc_metapage(unsigned int gfp_mask)
+static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
 {
 	return mempool_alloc(metapage_mempool, gfp_mask);
 }
@@ -395,6 +395,12 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 
 		if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
 			redirty = 1;
+			/*
+			 * Make sure this page isn't blocked indefinitely.
+			 * If the journal isn't undergoing I/O, push it
+			 */
+			if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
+				jfs_flush_journal(mp->log, 0);
 			continue;
 		}
 
@@ -534,7 +540,7 @@ add_failed:
 	return -EIO;
 }
 
-static int metapage_releasepage(struct page *page, int gfp_mask)
+static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 {
 	struct metapage *mp;
 	int busy = 0;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 121c981ff45..b660c93c92d 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -552,6 +552,11 @@ void txEnd(tid_t tid)
 		 * synchronize with logsync barrier
 		 */
 		if (test_bit(log_SYNCBARRIER, &log->flag)) {
+			TXN_UNLOCK();
+
+			/* write dirty metadata & forward log syncpt */
+			jfs_syncpt(log, 1);
+
 			jfs_info("log barrier off: 0x%x", log->lsn);
 
 			/* enable new transactions start */
@@ -560,11 +565,6 @@ void txEnd(tid_t tid)
 			/* wakeup all waitors for logsync barrier */
 			TXN_WAKEUP(&log->syncwait);
 
-			TXN_UNLOCK();
-
-			/* forward log syncpt */
-			jfs_syncpt(log);
-
 			goto wakeup;
 		}
 	}
@@ -657,7 +657,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 				/* only anonymous txn.
 				 * Remove from anon_list
 				 */
+				TXN_LOCK();
 				list_del_init(&jfs_ip->anon_inode_list);
+				TXN_UNLOCK();
 			}
 			jfs_ip->atlhead = tlck->next;
 		} else {
@@ -723,6 +725,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	else
 		tlck->flag = tlckINODELOCK;
 
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
+
 	tlck->type = 0;
 
 	/* bind the tlock and the page */
@@ -1007,6 +1012,8 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 
 	/* bind the tlock and the object */
 	tlck->flag = tlckINODELOCK;
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
 	tlck->ip = ip;
 	tlck->mp = NULL;
 
@@ -1075,6 +1082,8 @@ struct linelock *txLinelock(struct linelock * tlock)
 	linelock->flag = tlckLINELOCK;
 	linelock->maxcnt = TLOCKLONG;
 	linelock->index = 0;
+	if (tlck->flag & tlckDIRECTORY)
+		linelock->flag |= tlckDIRECTORY;
 
 	/* append linelock after tlock */
 	linelock->next = tlock->next;
@@ -2068,8 +2077,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
  *
  * function:    log from maplock of freed data extents;
  */
-void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-	    struct tlock * tlck)
+static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+		   struct tlock * tlck)
 {
 	struct pxd_lock *pxdlock;
 	int i, nlock;
@@ -2207,7 +2216,7 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
  * function: synchronously write pages locked by transaction
  *              after txLog() but before txUpdateMap();
  */
-void txForce(struct tblock * tblk)
+static void txForce(struct tblock * tblk)
 {
 	struct tlock *tlck;
 	lid_t lid, next;
@@ -2356,7 +2365,7 @@ static void txUpdateMap(struct tblock * tblk)
 			 */
 			else {	/* (maplock->flag & mlckFREE) */
 
-				if (S_ISDIR(tlck->ip->i_mode))
+				if (tlck->flag & tlckDIRECTORY)
 					txFreeMap(ipimap, maplock,
 						  tblk, COMMIT_PWMAP);
 				else
@@ -2387,7 +2396,6 @@ static void txUpdateMap(struct tblock * tblk)
 	 */
 	if (tblk->xflag & COMMIT_CREATE) {
 		diUpdatePMap(ipimap, tblk->ino, FALSE, tblk);
-		ipimap->i_state |= I_DIRTY;
 		/* update persistent block allocation map
 		 * for the allocation of inode extent;
 		 */
@@ -2398,7 +2406,6 @@ static void txUpdateMap(struct tblock * tblk)
 	} else if (tblk->xflag & COMMIT_DELETE) {
 		ip = tblk->u.ip;
 		diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk);
-		ipimap->i_state |= I_DIRTY;
 		iput(ip);
 	}
 }
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 59ad0f6b723..0e4dc4514c4 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -122,6 +122,7 @@ extern struct tlock *TxLock;	/* transaction lock table */
 #define tlckLOG			0x0800
 /* updateMap state */
 #define	tlckUPDATEMAP		0x0080
+#define	tlckDIRECTORY		0x0040
 /* freeLock state */
 #define tlckFREELOCK		0x0008
 #define tlckWRITEPAGE		0x0004
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index a1052f3f0be..25e9990bccd 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -52,8 +52,8 @@ struct jfs_ea_list {
 #define	END_EALIST(ealist) \
 	((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
 
-extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t,
-			  int);
+extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *,
+			  size_t, int);
 extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t,
 			int);
 extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
@@ -61,4 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
 extern int jfs_removexattr(struct dentry *, const char *);
 
+#ifdef CONFIG_JFS_SECURITY
+extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+#else
+static inline int jfs_init_security(tid_t tid, struct inode *inode,
+				    struct inode *dir)
+{
+	return 0;
+}
+#endif
+
 #endif	/* H_JFS_XATTR */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index a7fe2f2b969..e72f4ebb6e9 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3516,16 +3516,10 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 	/* process entries backward from last index */
 	index = le16_to_cpu(p->header.nextindex) - 1;
 
-	if (p->header.flag & BT_INTERNAL)
-		goto getChild;
-
-	/*
-	 *      leaf page
-	 */
 
-	/* Since this is the rightmost leaf, and we may have already freed
-	 * a page that was formerly to the right, let's make sure that the
-	 * next pointer is zero.
+	/* Since this is the rightmost page at this level, and we may have
+	 * already freed a page that was formerly to the right, let's make
+	 * sure that the next pointer is zero.
 	 */
 	if (p->header.next) {
 		if (log)
@@ -3539,6 +3533,12 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 		p->header.next = 0;
 	}
 
+	if (p->header.flag & BT_INTERNAL)
+		goto getChild;
+
+	/*
+	 *      leaf page
+	 */
 	freed = 0;
 
 	/* does region covered by leaf page precede Teof ? */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 1cae14e741e..1abe7343f92 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -39,6 +39,24 @@ struct dentry_operations jfs_ci_dentry_operations;
 static s64 commitZeroLink(tid_t, struct inode *);
 
 /*
+ * NAME:	free_ea_wmap(inode)
+ *
+ * FUNCTION:	free uncommitted extended attributes from working map 
+ *
+ */
+static inline void free_ea_wmap(struct inode *inode)
+{
+	dxd_t *ea = &JFS_IP(inode)->ea;
+
+	if (ea->flag & DXD_EXTENT) {
+		/* free EA pages from cache */
+		invalidate_dxd_metapages(inode, *ea);
+		dbFree(inode, addressDXD(ea), lengthDXD(ea));
+	}
+	ea->flag = 0;
+}
+
+/*
  * NAME:	jfs_create(dip, dentry, mode)
  *
  * FUNCTION:	create a regular file in the parent directory <dip>
@@ -89,8 +107,19 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_acl(tid, ip, dip);
+	if (rc)
+		goto out3;
+
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_create: dtSearch returned %d", rc);
+		txAbort(tid, 0);
 		goto out3;
 	}
 
@@ -139,6 +168,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -147,11 +177,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
-
       out1:
 
 	jfs_info("jfs_create: rc:%d", rc);
@@ -216,8 +241,19 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_acl(tid, ip, dip);
+	if (rc)
+		goto out3;
+
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_mkdir: dtSearch returned %d", rc);
+		txAbort(tid, 0);
 		goto out3;
 	}
 
@@ -267,6 +303,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -275,10 +312,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
 
       out1:
 
@@ -885,6 +918,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc)
+		goto out3;
+
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
 	tblk->ino = ip->i_ino;
@@ -1000,6 +1037,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -1008,11 +1046,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
-
       out1:
 	jfs_info("jfs_symlink: rc:%d", rc);
 	return rc;
@@ -1328,8 +1361,20 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	down(&JFS_IP(dir)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
-	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
+	rc = jfs_init_acl(tid, ip, dir);
+	if (rc)
+		goto out3;
+
+	rc = jfs_init_security(tid, ip, dir);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
+	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+		txAbort(tid, 0);
 		goto out3;
+	}
 
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
@@ -1337,8 +1382,10 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	tblk->u.ixpxd = JFS_IP(ip)->ixpxd;
 
 	ino = ip->i_ino;
-	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
+	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+		txAbort(tid, 0);
 		goto out3;
+	}
 
 	ip->i_op = &jfs_file_inode_operations;
 	jfs_ip->dev = new_encode_dev(rdev);
@@ -1360,6 +1407,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	up(&JFS_IP(ip)->commit_sem);
 	up(&JFS_IP(dir)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -1368,11 +1416,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
       out1:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dir);
-#endif
-
       out:
 	jfs_info("jfs_mknod: returning %d", rc);
 	return rc;
@@ -1390,6 +1433,8 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 
 	jfs_info("jfs_lookup: name = %s", name);
 
+	if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
+		dentry->d_op = &jfs_ci_dentry_operations;
 
 	if ((name[0] == '.') && (len == 1))
 		inum = dip->i_ino;
@@ -1417,9 +1462,6 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 		return ERR_PTR(-EACCES);
 	}
 
-	if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
-		dentry->d_op = &jfs_ci_dentry_operations;
-
 	dentry = d_splice_alias(ip, dentry);
 
 	if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ee32211288c..4226af3ea91 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -23,9 +23,11 @@
 #include <linux/parser.h>
 #include <linux/completion.h>
 #include <linux/vfs.h>
+#include <linux/mount.h>
 #include <linux/moduleparam.h>
 #include <linux/posix_acl.h>
 #include <asm/uaccess.h>
+#include <linux/seq_file.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -114,6 +116,8 @@ static void jfs_destroy_inode(struct inode *inode)
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
 
+	BUG_ON(!list_empty(&ji->anon_inode_list));
+
 	spin_lock_irq(&ji->ag_lock);
 	if (ji->active_ag != -1) {
 		struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
@@ -190,7 +194,8 @@ static void jfs_put_super(struct super_block *sb)
 
 enum {
 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
-	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err,
+	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
+	Opt_usrquota, Opt_grpquota
 };
 
 static match_table_t tokens = {
@@ -202,8 +207,8 @@ static match_table_t tokens = {
 	{Opt_errors, "errors=%s"},
 	{Opt_ignore, "noquota"},
 	{Opt_ignore, "quota"},
-	{Opt_ignore, "usrquota"},
-	{Opt_ignore, "grpquota"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_err, NULL}
 };
 
@@ -291,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			}
 			break;
 		}
+
+#if defined(CONFIG_QUOTA)
+		case Opt_quota:
+		case Opt_usrquota:
+			*flag |= JFS_USRQUOTA;
+			break;
+		case Opt_grpquota:
+			*flag |= JFS_GRPQUOTA;
+			break;
+#else
+		case Opt_usrquota:
+		case Opt_grpquota:
+		case Opt_quota:
+			printk(KERN_ERR
+			       "JFS: quota operations not supported\n");
+			break;
+#endif
+
 		default:
 			printk("jfs: Unrecognized mount option \"%s\" "
 					" or missing value\n", p);
@@ -419,6 +442,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode->i_nlink = 1;
 	inode->i_size = sb->s_bdev->bd_inode->i_size;
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
+	insert_inode_hash(inode);
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	sbi->direct_inode = inode;
@@ -531,12 +555,32 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
 	/* log == NULL indicates read-only mount */
 	if (log) {
 		jfs_flush_journal(log, wait);
-		jfs_syncpt(log);
+		jfs_syncpt(log, 0);
 	}
 
 	return 0;
 }
 
+static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
+
+	if (sbi->flag & JFS_NOINTEGRITY)
+		seq_puts(seq, ",nointegrity");
+	else
+		seq_puts(seq, ",integrity");
+
+#if defined(CONFIG_QUOTA)
+	if (sbi->flag & JFS_USRQUOTA)
+		seq_puts(seq, ",usrquota");
+
+	if (sbi->flag & JFS_GRPQUOTA)
+		seq_puts(seq, ",grpquota");
+#endif
+
+	return 0;
+}
+
 static struct super_operations jfs_super_operations = {
 	.alloc_inode	= jfs_alloc_inode,
 	.destroy_inode	= jfs_destroy_inode,
@@ -550,6 +594,7 @@ static struct super_operations jfs_super_operations = {
 	.unlockfs       = jfs_unlockfs,
 	.statfs		= jfs_statfs,
 	.remount_fs	= jfs_remount,
+	.show_options	= jfs_show_options
 };
 
 static struct export_operations jfs_export_operations = {
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 287d8d6c3cf..16477b3835e 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -22,11 +22,11 @@
 #include "jfs_inode.h"
 #include "jfs_xattr.h"
 
-static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char *s = JFS_IP(dentry->d_inode)->i_inline;
 	nd_set_link(nd, s);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations jfs_symlink_inode_operations = {
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 554ec739e49..23aa5066b5a 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -21,6 +21,7 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
+#include <linux/security.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
@@ -633,12 +634,12 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf)
 	}
 }
 
-static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
+static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
+		  int new_size)
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
 	unsigned long old_blocks, new_blocks;
 	int rc = 0;
-	tid_t tid;
 
 	if (new_size == 0) {
 		ea_release(inode, ea_buf);
@@ -664,9 +665,6 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
 	if (rc)
 		return rc;
 
-	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
-
 	old_blocks = new_blocks = 0;
 
 	if (ji->ea.flag & DXD_EXTENT) {
@@ -695,11 +693,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
 		DQUOT_FREE_BLOCK(inode, old_blocks);
 
 	inode->i_ctime = CURRENT_TIME;
-	rc = txCommit(tid, 1, &inode, 0);
-	txEnd(tid);
-	up(&ji->commit_sem);
 
-	return rc;
+	return 0;
 }
 
 /*
@@ -810,8 +805,8 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	return permission(inode, MAY_WRITE, NULL);
 }
 
-int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
-		   size_t value_len, int flags)
+int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
+		   const void *value, size_t value_len, int flags)
 {
 	struct jfs_ea_list *ealist;
 	struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL;
@@ -825,9 +820,6 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 	int rc;
 	int length;
 
-	if ((rc = can_set_xattr(inode, name, value, value_len)))
-		return rc;
-
 	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
 		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
 				  GFP_KERNEL);
@@ -939,7 +931,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 
 	ealist->size = cpu_to_le32(new_size);
 
-	rc = ea_put(inode, &ea_buf, new_size);
+	rc = ea_put(tid, inode, &ea_buf, new_size);
 
 	goto out;
       release:
@@ -955,12 +947,29 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		 size_t value_len, int flags)
 {
+	struct inode *inode = dentry->d_inode;
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	int rc;
+	tid_t tid;
+
+	if ((rc = can_set_xattr(inode, name, value, value_len)))
+		return rc;
+
 	if (value == NULL) {	/* empty EA, do not remove */
 		value = "";
 		value_len = 0;
 	}
 
-	return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
+	tid = txBegin(inode->i_sb, 0);
+	down(&ji->commit_sem);
+	rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+			    flags);
+	if (!rc)
+		rc = txCommit(tid, 1, &inode, 0);
+	txEnd(tid);
+	up(&ji->commit_sem);
+
+	return rc;
 }
 
 static int can_get_xattr(struct inode *inode, const char *name)
@@ -1122,5 +1131,56 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 
 int jfs_removexattr(struct dentry *dentry, const char *name)
 {
-	return __jfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+	struct inode *inode = dentry->d_inode;
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	int rc;
+	tid_t tid;
+
+	if ((rc = can_set_xattr(inode, name, NULL, 0)))
+		return rc;
+
+	tid = txBegin(inode->i_sb, 0);
+	down(&ji->commit_sem);
+	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+	if (!rc)
+		rc = txCommit(tid, 1, &inode, 0);
+	txEnd(tid);
+	up(&ji->commit_sem);
+
+	return rc;
+}
+
+#ifdef CONFIG_JFS_SECURITY
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+{
+	int rc;
+	size_t len;
+	void *value;
+	char *suffix;
+	char *name;
+
+	rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+	if (rc) {
+		if (rc == -EOPNOTSUPP)
+			return 0;
+		return rc;
+	}
+	name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix),
+		       GFP_NOFS);
+	if (!name) {
+		rc = -ENOMEM;
+		goto kmalloc_failed;
+	}
+	strcpy(name, XATTR_SECURITY_PREFIX);
+	strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
+
+	rc = __jfs_setxattr(tid, inode, name, value, len, 0);
+
+	kfree(name);
+kmalloc_failed:
+	kfree(suffix);
+	kfree(value);
+
+	return rc;
 }
+#endif
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 14b3ce87fa2..87332f30141 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void)
 			return call;
 		}
 		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(5*HZ);
+		schedule_timeout_interruptible(5*HZ);
 	}
 	return NULL;
 }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82c77df81c5..c4c8601096e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host)
 
 	/* If we've already created an RPC client, check whether
 	 * RPC rebind is required
-	 * Note: why keep rebinding if we're on a tcp connection?
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
 		xprt = clnt->cl_xprt;
-		if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) {
+		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			clnt->cl_port = 0;
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 			dprintk("lockd: next rebind in %ld jiffies\n",
@@ -189,7 +188,6 @@ nlm_bind_host(struct nlm_host *host)
 			goto forgetit;
 
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->nocong = 1;	/* No congestion control for NLM */
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index de7536358c7..62f4a385177 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -30,6 +30,36 @@
 static struct nlm_file *	nlm_files[FILE_NRHASH];
 static DECLARE_MUTEX(nlm_file_sema);
 
+#ifdef NFSD_DEBUG
+static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
+{
+	u32 *fhp = (u32*)f->data;
+
+	/* print the first 32 bytes of the fh */
+	dprintk("lockd: %s (%08x %08x %08x %08x %08x %08x %08x %08x)\n",
+		msg, fhp[0], fhp[1], fhp[2], fhp[3],
+		fhp[4], fhp[5], fhp[6], fhp[7]);
+}
+
+static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
+{
+	struct inode *inode = file->f_file->f_dentry->d_inode;
+
+	dprintk("lockd: %s %s/%ld\n",
+		msg, inode->i_sb->s_id, inode->i_ino);
+}
+#else
+static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
+{
+	return;
+}
+
+static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
+{
+	return;
+}
+#endif
+
 static inline unsigned int file_hash(struct nfs_fh *f)
 {
 	unsigned int tmp=0;
@@ -55,11 +85,8 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 	struct nlm_file	*file;
 	unsigned int	hash;
 	u32		nfserr;
-	u32		*fhp = (u32*)f->data;
-
-	dprintk("lockd: nlm_file_lookup(%08x %08x %08x %08x %08x %08x)\n",
-		fhp[0], fhp[1], fhp[2], fhp[3], fhp[4], fhp[5]);
 
+	nlm_debug_print_fh("nlm_file_lookup", f);
 
 	hash = file_hash(f);
 
@@ -70,8 +97,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 		if (!nfs_compare_fh(&file->f_handle, f))
 			goto found;
 
-	dprintk("lockd: creating file for (%08x %08x %08x %08x %08x %08x)\n",
-		fhp[0], fhp[1], fhp[2], fhp[3], fhp[4], fhp[5]);
+	nlm_debug_print_fh("creating file for", f);
 
 	nfserr = nlm_lck_denied_nolocks;
 	file = (struct nlm_file *) kmalloc(sizeof(*file), GFP_KERNEL);
@@ -124,11 +150,10 @@ out_free:
 static inline void
 nlm_delete_file(struct nlm_file *file)
 {
-	struct inode *inode = file->f_file->f_dentry->d_inode;
 	struct nlm_file	**fp, *f;
 
-	dprintk("lockd: closing file %s/%ld\n",
-		inode->i_sb->s_id, inode->i_ino);
+	nlm_debug_print_file("closing file", file);
+
 	fp = nlm_files + file->f_hash;
 	while ((f = *fp) != NULL) {
 		if (f == file) {
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179f..a1e8b224801 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -124,6 +124,7 @@
 #include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/rcupdate.h>
 
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -315,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
 	   POSIX-2001 defines it. */
 	start += l->l_start;
-	end = start + l->l_len - 1;
-	if (l->l_len < 0) {
+	if (start < 0)
+		return -EINVAL;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		end = start + l->l_len - 1;
+		fl->fl_end = end;
+	} else if (l->l_len < 0) {
 		end = start - 1;
+		fl->fl_end = end;
 		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
 	}
-
-	if (start < 0)
-		return -EINVAL;
-	if (l->l_len > 0 && end < 0)
-		return -EOVERFLOW;
-
 	fl->fl_start = start;	/* we record the absolute position */
-	fl->fl_end = end;
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
@@ -361,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		return -EINVAL;
 	}
 
-	if (((start += l->l_start) < 0) || (l->l_len < 0))
+	start += l->l_start;
+	if (start < 0)
 		return -EINVAL;
-	fl->fl_end = start + l->l_len - 1;
-	if (l->l_len > 0 && fl->fl_end < 0)
-		return -EOVERFLOW;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		fl->fl_end = start + l->l_len - 1;
+	} else if (l->l_len < 0) {
+		fl->fl_end = start - 1;
+		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
+	}
 	fl->fl_start = start;	/* we record the absolute position */
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
@@ -828,12 +837,16 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 		/* Detect adjacent or overlapping regions (if same lock type)
 		 */
 		if (request->fl_type == fl->fl_type) {
+			/* In all comparisons of start vs end, use
+			 * "start - 1" rather than "end + 1". If end
+			 * is OFFSET_MAX, end + 1 will become negative.
+			 */
 			if (fl->fl_end < request->fl_start - 1)
 				goto next_lock;
 			/* If the next lock in the list has entirely bigger
 			 * addresses than the new one, insert the lock here.
 			 */
-			if (fl->fl_start > request->fl_end + 1)
+			if (fl->fl_start - 1 > request->fl_end)
 				break;
 
 			/* If we come here, the new and old lock are of the
@@ -2198,21 +2211,24 @@ void steal_locks(fl_owner_t from)
 {
 	struct files_struct *files = current->files;
 	int i, j;
+	struct fdtable *fdt;
 
 	if (from == files)
 		return;
 
 	lock_kernel();
 	j = 0;
+	rcu_read_lock();
+	fdt = files_fdtable(files);
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= files->max_fdset || i >= files->max_fds)
+		if (i >= fdt->max_fdset || i >= fdt->max_fds)
 			break;
-		set = files->open_fds->fds_bits[j++];
+		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
 			if (set & 1) {
-				struct file *file = files->fd[i];
+				struct file *file = fdt->fd[i];
 				if (file)
 					__steal_locks(file, from);
 			}
@@ -2220,6 +2236,7 @@ void steal_locks(fl_owner_t from)
 			set >>= 1;
 		}
 	}
+	rcu_read_unlock();
 	unlock_kernel();
 }
 EXPORT_SYMBOL(steal_locks);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b002a088857..298997f1747 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -116,7 +116,7 @@ mb_cache_indexes(struct mb_cache *cache)
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask);
+static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
 
 
 static inline int
@@ -140,7 +140,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 
 
 static inline void
-__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask)
+__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 {
 	struct mb_cache *cache = ce->e_cache;
 
@@ -193,7 +193,7 @@ forget:
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask)
+mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d..790cc0d0e97 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
 
 static void minix_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	minix_truncate(inode);
 	minix_free_inode(inode);
diff --git a/fs/mpage.c b/fs/mpage.c
index bb9aebe9386..c5adcdddf3c 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -102,7 +102,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
 static struct bio *
 mpage_alloc(struct block_device *bdev,
 		sector_t first_sector, int nr_vecs,
-		unsigned int __nocast gfp_flags)
+		gfp_t gfp_flags)
 {
 	struct bio *bio;
 
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 154f511c724..626a367bcd8 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -454,10 +454,10 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
 {
 	struct buffer_head *dotdot_bh;
 	struct msdos_dir_entry *dotdot_de;
-	loff_t dotdot_i_pos;
 	struct inode *old_inode, *new_inode;
 	struct fat_slot_info old_sinfo, sinfo;
 	struct timespec ts;
+	loff_t dotdot_i_pos, new_i_pos;
 	int err, old_attrs, is_dir, update_dotdot, corrupt = 0;
 
 	old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
@@ -516,28 +516,24 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
 	if (new_inode) {
 		if (err)
 			goto out;
-		if (MSDOS_I(new_inode)->i_pos != sinfo.i_pos) {
-			/* WTF??? Cry and fail. */
-			printk(KERN_WARNING "msdos_rename: fs corrupted\n");
-			goto out;
-		}
-
 		if (is_dir) {
 			err = fat_dir_empty(new_inode);
 			if (err)
 				goto out;
 		}
+		new_i_pos = MSDOS_I(new_inode)->i_pos;
 		fat_detach(new_inode);
 	} else {
 		err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0,
 				      &ts, &sinfo);
 		if (err)
 			goto out;
+		new_i_pos = sinfo.i_pos;
 	}
 	new_dir->i_version++;
 
 	fat_detach(old_inode);
-	fat_attach(old_inode, sinfo.i_pos);
+	fat_attach(old_inode, new_i_pos);
 	if (is_hid)
 		MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN;
 	else
@@ -604,7 +600,7 @@ error_inode:
 	fat_attach(old_inode, old_sinfo.i_pos);
 	MSDOS_I(old_inode)->i_attrs = old_attrs;
 	if (new_inode) {
-		fat_attach(new_inode, sinfo.i_pos);
+		fat_attach(new_inode, new_i_pos);
 		if (corrupt)
 			corrupt |= fat_sync_inode(new_inode);
 	} else {
diff --git a/fs/namei.c b/fs/namei.c
index 4a27eb79811..c5769c4fcab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -28,6 +28,7 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/file.h>
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd)
 	mntput_no_expire(nd->mnt);
 }
 
+/**
+ * release_open_intent - free up open intent resources
+ * @nd: pointer to nameidata
+ */
+void release_open_intent(struct nameidata *nd)
+{
+	if (nd->intent.open.file->f_dentry == NULL)
+		put_filp(nd->intent.open.file);
+	else
+		fput(nd->intent.open.file);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -501,6 +514,7 @@ struct path {
 static inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int error;
+	void *cookie;
 	struct dentry *dentry = path->dentry;
 
 	touch_atime(path->mnt, dentry);
@@ -508,13 +522,15 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
 
 	if (path->mnt == nd->mnt)
 		mntget(path->mnt);
-	error = dentry->d_inode->i_op->follow_link(dentry, nd);
-	if (!error) {
+	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
+	error = PTR_ERR(cookie);
+	if (!IS_ERR(cookie)) {
 		char *s = nd_get_link(nd);
+		error = 0;
 		if (s)
 			error = __vfs_follow_link(nd, s);
 		if (dentry->d_inode->i_op->put_link)
-			dentry->d_inode->i_op->put_link(dentry, nd);
+			dentry->d_inode->i_op->put_link(dentry, nd, cookie);
 	}
 	dput(dentry);
 	mntput(path->mnt);
@@ -522,6 +538,22 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
 	return error;
 }
 
+static inline void dput_path(struct path *path, struct nameidata *nd)
+{
+	dput(path->dentry);
+	if (path->mnt != nd->mnt)
+		mntput(path->mnt);
+}
+
+static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+{
+	dput(nd->dentry);
+	if (nd->mnt != path->mnt)
+		mntput(nd->mnt);
+	nd->mnt = path->mnt;
+	nd->dentry = path->dentry;
+}
+
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
@@ -549,9 +581,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
 	nd->depth--;
 	return err;
 loop:
-	dput(path->dentry);
-	if (path->mnt != nd->mnt)
-		mntput(path->mnt);
+	dput_path(path, nd);
 	path_release(nd);
 	return err;
 }
@@ -733,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 		struct qstr this;
 		unsigned int c;
 
+		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode, nd);
 		if (err == -EAGAIN) { 
 			err = permission(inode, MAY_EXEC, nd);
@@ -785,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			if (err < 0)
 				break;
 		}
-		nd->flags |= LOOKUP_CONTINUE;
 		/* This does the actual lookups.. */
 		err = do_lookup(nd, &this, &next);
 		if (err)
@@ -810,13 +840,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			err = -ENOTDIR; 
 			if (!inode->i_op)
 				break;
-		} else {
-			dput(nd->dentry);
-			if (nd->mnt != next.mnt)
-				mntput(nd->mnt);
-			nd->mnt = next.mnt;
-			nd->dentry = next.dentry;
-		}
+		} else
+			path_to_nameidata(&next, nd);
 		err = -ENOTDIR; 
 		if (!inode->i_op->lookup)
 			break;
@@ -856,13 +881,8 @@ last_component:
 			if (err)
 				goto return_err;
 			inode = nd->dentry->d_inode;
-		} else {
-			dput(nd->dentry);
-			if (nd->mnt != next.mnt)
-				mntput(nd->mnt);
-			nd->mnt = next.mnt;
-			nd->dentry = next.dentry;
-		}
+		} else
+			path_to_nameidata(&next, nd);
 		err = -ENOENT;
 		if (!inode)
 			break;
@@ -898,9 +918,7 @@ return_reval:
 return_base:
 		return 0;
 out_dput:
-		dput(next.dentry);
-		if (nd->mnt != next.mnt)
-			mntput(next.mnt);
+		dput_path(&next, nd);
 		break;
 	}
 	path_release(nd);
@@ -1043,10 +1061,74 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
 out:
 	if (unlikely(current->audit_context
 		     && nd && nd->dentry && nd->dentry->d_inode))
-		audit_inode(name, nd->dentry->d_inode);
+		audit_inode(name, nd->dentry->d_inode, flags);
 	return retval;
 }
 
+static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	struct file *filp = get_empty_filp();
+	int err;
+
+	if (filp == NULL)
+		return -ENFILE;
+	nd->intent.open.file = filp;
+	nd->intent.open.flags = open_flags;
+	nd->intent.open.create_mode = create_mode;
+	err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd);
+	if (IS_ERR(nd->intent.open.file)) {
+		if (err == 0) {
+			err = PTR_ERR(nd->intent.open.file);
+			path_release(nd);
+		}
+	} else if (err != 0)
+		release_open_intent(nd);
+	return err;
+}
+
+/**
+ * path_lookup_open - lookup a file path with open intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ */
+int path_lookup_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	return __path_lookup_intent_open(name, lookup_flags, nd,
+			open_flags, 0);
+}
+
+/**
+ * path_lookup_create - lookup a file path with open + create intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ * @create_mode: create intent flags
+ */
+int path_lookup_create(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
+			open_flags, create_mode);
+}
+
+int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	char *tmp = getname(name);
+	int err = PTR_ERR(tmp);
+
+	if (!IS_ERR(tmp)) {
+		err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0);
+		putname(tmp);
+	}
+	return err;
+}
+
 /*
  * Restricted form of lookup. Doesn't follow links, single-component only,
  * needs parent already locked. Doesn't follow mounts.
@@ -1229,9 +1311,6 @@ static inline int may_create(struct inode *dir, struct dentry *child,
 }
 
 /* 
- * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
- * reasons.
- *
  * O_DIRECTORY translates into forcing a directory lookup.
  */
 static inline int lookup_flags(unsigned int f)
@@ -1241,9 +1320,6 @@ static inline int lookup_flags(unsigned int f)
 	if (f & O_NOFOLLOW)
 		retval &= ~LOOKUP_FOLLOW;
 	
-	if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
-		retval &= ~LOOKUP_FOLLOW;
-	
 	if (f & O_DIRECTORY)
 		retval |= LOOKUP_DIRECTORY;
 
@@ -1311,10 +1387,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		return error;
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_create(dir, dentry, mode);
-	}
 	return error;
 }
 
@@ -1413,27 +1487,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
  */
 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 {
-	int acc_mode, error = 0;
+	int acc_mode, error;
 	struct path path;
 	struct dentry *dir;
 	int count = 0;
 
 	acc_mode = ACC_MODE(flag);
 
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flag & O_TRUNC)
+		acc_mode |= MAY_WRITE;
+
 	/* Allow the LSM permission hook to distinguish append 
 	   access from general write access. */
 	if (flag & O_APPEND)
 		acc_mode |= MAY_APPEND;
 
-	/* Fill in the open() intent data */
-	nd->intent.open.flags = flag;
-	nd->intent.open.create_mode = mode;
-
 	/*
 	 * The simplest case - just a plain lookup.
 	 */
 	if (!(flag & O_CREAT)) {
-		error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
+		error = path_lookup_open(pathname, lookup_flags(flag), nd, flag);
 		if (error)
 			return error;
 		goto ok;
@@ -1442,7 +1516,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
+	error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode);
 	if (error)
 		return error;
 
@@ -1504,11 +1578,7 @@ do_last:
 	if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
 		goto do_link;
 
-	dput(nd->dentry);
-	nd->dentry = path.dentry;
-	if (nd->mnt != path.mnt)
-		mntput(nd->mnt);
-	nd->mnt = path.mnt;
+	path_to_nameidata(&path, nd);
 	error = -EISDIR;
 	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
 		goto exit;
@@ -1519,10 +1589,10 @@ ok:
 	return 0;
 
 exit_dput:
-	dput(path.dentry);
-	if (nd->mnt != path.mnt)
-		mntput(path.mnt);
+	dput_path(&path, nd);
 exit:
+	if (!IS_ERR(nd->intent.open.file))
+		release_open_intent(nd);
 	path_release(nd);
 	return error;
 
@@ -1554,19 +1624,19 @@ do_link:
 	if (nd->last_type != LAST_NORM)
 		goto exit;
 	if (nd->last.name[nd->last.len]) {
-		putname(nd->last.name);
+		__putname(nd->last.name);
 		goto exit;
 	}
 	error = -ELOOP;
 	if (count++==32) {
-		putname(nd->last.name);
+		__putname(nd->last.name);
 		goto exit;
 	}
 	dir = nd->dentry;
 	down(&dir->d_inode->i_sem);
 	path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
 	path.mnt = nd->mnt;
-	putname(nd->last.name);
+	__putname(nd->last.name);
 	goto do_last;
 }
 
@@ -1636,10 +1706,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_mknod(dir, dentry, mode, dev);
-	}
 	return error;
 }
 
@@ -1709,10 +1777,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
-	if (!error) {
+	if (!error)
 		fsnotify_mkdir(dir, dentry->d_name.name);
-		security_inode_post_mkdir(dir,dentry, mode);
-	}
 	return error;
 }
 
@@ -1801,7 +1867,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	}
 	up(&dentry->d_inode->i_sem);
 	if (!error) {
-		fsnotify_rmdir(dentry, dentry->d_inode, dir);
 		d_delete(dentry);
 	}
 	dput(dentry);
@@ -1874,7 +1939,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
 	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-		fsnotify_unlink(dentry, dir);
 		d_delete(dentry);
 	}
 
@@ -1950,10 +2014,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_symlink(dir, dentry, oldname);
-	}
 	return error;
 }
 
@@ -2023,10 +2085,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	DQUOT_INIT(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	up(&old_dentry->d_inode->i_sem);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, new_dentry->d_name.name);
-		security_inode_post_link(old_dentry, dir, new_dentry);
-	}
 	return error;
 }
 
@@ -2145,11 +2205,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 			d_rehash(new_dentry);
 		dput(new_dentry);
 	}
-	if (!error) {
+	if (!error)
 		d_move(old_dentry,new_dentry);
-		security_inode_post_rename(old_dir, old_dentry,
-					   new_dir, new_dentry);
-	}
 	return error;
 }
 
@@ -2175,7 +2232,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		/* The following d_move() should become unconditional */
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
 			d_move(old_dentry, new_dentry);
-		security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
 	}
 	if (target)
 		up(&target->i_sem);
@@ -2218,7 +2274,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
 	if (!error) {
 		const char *new_name = old_dentry->d_name.name;
-		fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, new_dentry->d_inode);
+		fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+			      new_dentry->d_inode, old_dentry->d_inode);
 	}
 	fsnotify_oldname_free(old_name);
 
@@ -2345,15 +2402,17 @@ out:
 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
 	struct nameidata nd;
-	int res;
+	void *cookie;
+
 	nd.depth = 0;
-	res = dentry->d_inode->i_op->follow_link(dentry, &nd);
-	if (!res) {
-		res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+	cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
+	if (!IS_ERR(cookie)) {
+		int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
 		if (dentry->d_inode->i_op->put_link)
-			dentry->d_inode->i_op->put_link(dentry, &nd);
+			dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
+		cookie = ERR_PTR(res);
 	}
-	return res;
+	return PTR_ERR(cookie);
 }
 
 int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2396,23 +2455,20 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 	return res;
 }
 
-int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
 {
-	struct page *page;
+	struct page *page = NULL;
 	nd_set_link(nd, page_getlink(dentry, &page));
-	return 0;
+	return page;
 }
 
-void page_put_link(struct dentry *dentry, struct nameidata *nd)
+void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
-	if (!IS_ERR(nd_get_link(nd))) {
-		struct page *page;
-		page = find_get_page(dentry->d_inode->i_mapping, 0);
-		if (!page)
-			BUG();
+	struct page *page = cookie;
+
+	if (page) {
 		kunmap(page);
 		page_cache_release(page);
-		page_cache_release(page);
 	}
 }
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 587eb0d707e..2fa9fdf7d6f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -40,7 +40,7 @@ static inline int sysfs_init(void)
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
 static struct list_head *mount_hashtable;
-static int hash_mask, hash_bits;
+static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache; 
 
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
@@ -160,7 +160,7 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
 		mnt->mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt_root;
 		mnt->mnt_parent = mnt;
-		mnt->mnt_namespace = old->mnt_namespace;
+		mnt->mnt_namespace = current->namespace;
 
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
 static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct vfsmount *res, *p, *q, *r, *s;
-	struct list_head *h;
 	struct nameidata nd;
 
 	res = q = clone_mnt(mnt, dentry);
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
-	for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) {
-		r = list_entry(h, struct vfsmount, mnt_child);
+	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
 		if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
 			continue;
 
@@ -1334,8 +1332,12 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
 	error = -EINVAL;
 	if (user_nd.mnt->mnt_root != user_nd.dentry)
 		goto out2; /* not a mountpoint */
+	if (user_nd.mnt->mnt_parent == user_nd.mnt)
+		goto out2; /* not attached */
 	if (new_nd.mnt->mnt_root != new_nd.dentry)
 		goto out2; /* not a mountpoint */
+	if (new_nd.mnt->mnt_parent == new_nd.mnt)
+		goto out2; /* not attached */
 	tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
 	spin_lock(&vfsmount_lock);
 	if (tmp != new_nd.mnt) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b3..8c8839203cd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 static void
 ncp_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (S_ISDIR(inode->i_mode)) {
 		DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
 	}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d7f7eb669d0..3976c177a7d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -31,11 +31,42 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	kfree(delegation);
 }
 
+static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct file_lock *fl;
+	int status;
+
+	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
+		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
+			continue;
+		if ((struct nfs_open_context *)fl->fl_file->private_data != ctx)
+			continue;
+		status = nfs4_lock_delegation_recall(state, fl);
+		if (status >= 0)
+			continue;
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d.\n",
+						__FUNCTION__, status);
+			case -NFS4ERR_EXPIRED:
+				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
+			case -NFS4ERR_STALE_CLIENTID:
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				goto out_err;
+		}
+	}
+	return 0;
+out_err:
+	return status;
+}
+
 static void nfs_delegation_claim_opens(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *ctx;
 	struct nfs4_state *state;
+	int err;
 
 again:
 	spin_lock(&inode->i_lock);
@@ -47,9 +78,12 @@ again:
 			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&inode->i_lock);
-		if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
-			return;
+		err = nfs4_open_delegation_recall(ctx->dentry, state);
+		if (err >= 0)
+			err = nfs_delegation_claim_locks(ctx, state);
 		put_nfs_open_context(ctx);
+		if (err != 0)
+			return;
 		goto again;
 	}
 	spin_unlock(&inode->i_lock);
@@ -85,6 +119,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_delegation *delegation;
 	int status = 0;
 
+	/* Ensure we first revalidate the attributes and page cache! */
+	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
+		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
 	delegation = nfs_alloc_delegation();
 	if (delegation == NULL)
 		return -ENOMEM;
@@ -138,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int nfs_inode_return_delegation(struct inode *inode)
+int __nfs_inode_return_delegation(struct inode *inode)
 {
 	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3f6c45a29d6..2fcc30de924 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -25,7 +25,7 @@ struct nfs_delegation {
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int nfs_inode_return_delegation(struct inode *inode);
+int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
@@ -38,6 +38,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
@@ -47,11 +48,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags)
 		return 1;
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	int err = 0;
+
+	if (NFS_I(inode)->delegation != NULL)
+		err = __nfs_inode_return_delegation(inode);
+	return err;
+}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b38a57e78a6..7370583b61e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -182,14 +182,16 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
-			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 			desc->plus = 0;
 			goto again;
 		}
 		goto error;
 	}
 	SetPageUptodate(page);
-	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&inode->i_lock);
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_sem or some other mechanism.
@@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
-	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&inode->i_lock);
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
@@ -528,6 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	my_entry.eof = 0;
 	my_entry.fh = &fh;
 	my_entry.fattr = &fattr;
+	nfs_fattr_init(&fattr);
 	desc->entry = &my_entry;
 
 	while(!desc->entry->eof) {
@@ -545,7 +550,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
-			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 			nfs_zap_caches(inode);
 			desc->plus = 0;
 			desc->entry->eof = 0;
@@ -561,8 +566,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 	}
 	unlock_kernel();
-	if (desc->error < 0)
-		return desc->error;
 	if (res < 0)
 		return res;
 	return 0;
@@ -608,7 +611,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 {
 	if (IS_ROOT(dentry))
 		return 1;
-	if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
+	if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0
 			|| nfs_attribute_timeout(dir))
 		return 0;
 	return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
@@ -799,6 +802,7 @@ static int nfs_dentry_delete(struct dentry *dentry)
  */
 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
+	nfs_inode_return_delegation(inode);
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
 		lock_kernel();
 		inode->i_nlink--;
@@ -849,12 +853,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	lock_kernel();
-	/* Revalidate parent directory attribute cache */
-	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
-	if (error < 0) {
-		res = ERR_PTR(error);
-		goto out_unlock;
-	}
 
 	/* If we're doing an exclusive create, optimize away the lookup */
 	if (nfs_is_exclusive_create(dir, nd))
@@ -912,7 +910,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *res = NULL;
-	struct inode *inode = NULL;
 	int error;
 
 	/* Check that we are indeed trying to open this file */
@@ -926,8 +923,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	/* Let vfs_create() deal with O_EXCL */
-	if (nd->intent.open.flags & O_EXCL)
-		goto no_entry;
+	if (nd->intent.open.flags & O_EXCL) {
+		d_add(dentry, NULL);
+		goto out;
+	}
 
 	/* Open the file on the server */
 	lock_kernel();
@@ -935,37 +934,36 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
 	if (error < 0) {
 		res = ERR_PTR(error);
+		unlock_kernel();
 		goto out;
 	}
 
 	if (nd->intent.open.flags & O_CREAT) {
 		nfs_begin_data_update(dir);
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 		nfs_end_data_update(dir);
 	} else
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
+	if (IS_ERR(res)) {
+		error = PTR_ERR(res);
 		switch (error) {
 			/* Make a negative dentry */
 			case -ENOENT:
-				inode = NULL;
-				break;
+				res = NULL;
+				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
+			case -ENOTDIR:
+				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
-				res = ERR_PTR(error);
 				goto out;
 		}
-	}
-no_entry:
-	res = d_add_unique(dentry, inode);
-	if (res != NULL)
+	} else if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1009,7 +1007,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 */
 	lock_kernel();
 	verifier = nfs_save_change_attribute(dir);
-	ret = nfs4_open_revalidate(dir, dentry, openflags);
+	ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
 	if (!ret)
 		nfs_set_verifier(dentry, verifier);
 	unlock_kernel();
@@ -1132,7 +1130,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
 	nfs_end_data_update(dir);
 	if (error != 0)
 		goto out_err;
@@ -1259,6 +1257,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 	sprintf(silly, ".nfs%*.*lx",
 		i_inosize, i_inosize, dentry->d_inode->i_ino);
 
+	/* Return delegation in anticipation of the rename */
+	nfs_inode_return_delegation(dentry->d_inode);
+
 	sdentry = NULL;
 	do {
 		char *suffix = silly + slen - countersize;
@@ -1327,6 +1328,7 @@ static int nfs_safe_remove(struct dentry *dentry)
 
 	nfs_begin_data_update(dir);
 	if (inode != NULL) {
+		nfs_inode_return_delegation(inode);
 		nfs_begin_data_update(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 		/* The VFS may want to delete this inode */
@@ -1433,17 +1435,14 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/*
-	 * Drop the dentry in advance to force a new lookup.
-	 * Since nfs_proc_link doesn't return a file handle,
-	 * we can't use the existing dentry.
-	 */
 	lock_kernel();
-	d_drop(dentry);
-
 	nfs_begin_data_update(dir);
 	nfs_begin_data_update(inode);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+	if (error == 0) {
+		atomic_inc(&inode->i_count);
+		d_instantiate(dentry, inode);
+	}
 	nfs_end_data_update(inode);
 	nfs_end_data_update(dir);
 	unlock_kernel();
@@ -1507,9 +1506,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	if (!new_inode)
 		goto go_ahead;
-	if (S_ISDIR(new_inode->i_mode))
-		goto out;
-	else if (atomic_read(&new_dentry->d_count) > 2) {
+	if (S_ISDIR(new_inode->i_mode)) {
+		error = -EISDIR;
+		if (!S_ISDIR(old_inode->i_mode))
+			goto out;
+	} else if (atomic_read(&new_dentry->d_count) > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
@@ -1534,7 +1535,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 #endif
 			goto out;
 		}
-	}
+	} else
+		new_inode->i_nlink--;
 
 go_ahead:
 	/*
@@ -1544,6 +1546,7 @@ go_ahead:
 		nfs_wb_all(old_inode);
 		shrink_dcache_parent(old_dentry);
 	}
+	nfs_inode_return_delegation(old_inode);
 
 	if (new_inode)
 		d_delete(new_dentry);
@@ -1575,11 +1578,12 @@ out:
 
 int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
 {
-	struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache = &nfsi->cache_access;
 
 	if (cache->cred != cred
 			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
+			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
 		return -ENOENT;
 	memcpy(res, cache, sizeof(*res));
 	return 0;
@@ -1587,14 +1591,18 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
 
 void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 {
-	struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache = &nfsi->cache_access;
 
 	if (cache->cred != set->cred) {
 		if (cache->cred)
 			put_rpccred(cache->cred);
 		cache->cred = get_rpccred(set->cred);
 	}
-	NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
+	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
 	cache->jiffies = set->jiffies;
 	cache->mask = set->mask;
 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6537f2c4ae4..b497c71384e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -655,7 +655,6 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
-	struct dentry *dentry = file->f_dentry;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	struct iovec iov = {
@@ -664,7 +663,8 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
 	};
 
 	dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
+		file->f_dentry->d_parent->d_name.name,
+		file->f_dentry->d_name.name,
 		(unsigned long) count, (unsigned long) pos);
 
 	if (!is_sync_kiocb(iocb))
@@ -730,7 +730,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	struct file *file = iocb->ki_filp;
 	struct nfs_open_context *ctx =
 			(struct nfs_open_context *) file->private_data;
-	struct dentry *dentry = file->f_dentry;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	struct iovec iov = {
@@ -739,8 +738,9 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	};
 
 	dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		inode->i_ino, (unsigned long) count, (unsigned long) pos);
+		file->f_dentry->d_parent->d_name.name,
+		file->f_dentry->d_name.name, inode->i_ino,
+		(unsigned long) count, (unsigned long) pos);
 
 	if (!is_sync_kiocb(iocb))
 		goto out;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5621ba9885f..57d3e77d97e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -134,9 +134,11 @@ nfs_file_release(struct inode *inode, struct file *filp)
  */
 static int nfs_revalidate_file(struct inode *inode, struct file *filp)
 {
+	struct nfs_inode *nfsi = NFS_I(inode);
 	int retval = 0;
 
-	if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))
+			|| nfs_attribute_timeout(inode))
 		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	nfs_revalidate_mapping(inode, filp->f_mapping);
 	return 0;
@@ -164,7 +166,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 		goto force_reval;
 	if (nfsi->npages != 0)
 		return 0;
-	if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
 		return 0;
 force_reval:
 	return __nfs_revalidate_inode(server, inode);
@@ -203,8 +205,8 @@ nfs_file_flush(struct file *file)
 	if (!status) {
 		status = ctx->error;
 		ctx->error = 0;
-		if (!status && !nfs_have_delegation(inode, FMODE_READ))
-			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+		if (!status)
+			nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	}
 	unlock_kernel();
 	return status;
@@ -374,22 +376,31 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
+	struct file_lock *cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
-	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
-		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
-	else {
-		struct file_lock *cfl = posix_test_lock(filp, fl);
-
-		fl->fl_type = F_UNLCK;
-		if (cfl != NULL)
-			memcpy(fl, cfl, sizeof(*fl));
+	/* Try local locking first */
+	cfl = posix_test_lock(filp, fl);
+	if (cfl != NULL) {
+		locks_copy_lock(fl, cfl);
+		goto out;
 	}
+
+	if (nfs_have_delegation(inode, FMODE_READ))
+		goto out_noconflict;
+
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+		goto out_noconflict;
+
+	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+out:
 	unlock_kernel();
 	return status;
+out_noconflict:
+	fl->fl_type = F_UNLCK;
+	goto out;
 }
 
 static int do_vfs_lock(struct file *file, struct file_lock *fl)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4845911f1c6..24d2fbf549b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
 {
 	dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	nfs_wb_all(inode);
 	/*
 	 * The following should never happen...
@@ -356,6 +358,35 @@ out_no_root:
 	return no_root_error;
 }
 
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
 /*
  * Create an RPC client handle.
  */
@@ -365,22 +396,12 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
-	int			tcp   = (data->flags & NFS_MOUNT_TCP);
-
-	/* Initialize timeout values */
-	timeparms.to_initval = data->timeo * HZ / 10;
-	timeparms.to_retries = data->retrans;
-	timeparms.to_maxval  = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
-	timeparms.to_exponential = 1;
+	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 
-	if (!timeparms.to_initval)
-		timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
-	if (!timeparms.to_retries)
-		timeparms.to_retries = 5;
+	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
 	/* create transport and client */
-	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
-				 &server->addr, &timeparms);
+	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
 		dprintk("%s: cannot create RPC transport. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
@@ -574,7 +595,6 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
 		{ NFS_MOUNT_POSIX, ",posix", "" },
-		{ NFS_MOUNT_TCP, ",tcp", ",udp" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
@@ -583,6 +603,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	};
 	struct proc_nfs_info *nfs_infop;
 	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	char buf[12];
+	char *proto;
 
 	seq_printf(m, ",v%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -601,6 +623,18 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		else
 			seq_puts(m, nfs_infop->nostr);
 	}
+	switch (nfss->client->cl_xprt->prot) {
+		case IPPROTO_TCP:
+			proto = "tcp";
+			break;
+		case IPPROTO_UDP:
+			proto = "udp";
+			break;
+		default:
+			snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
+			proto = buf;
+	}
+	seq_printf(m, ",proto=%s", proto);
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
 	return 0;
@@ -615,14 +649,18 @@ nfs_zap_caches(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	spin_lock(&inode->i_lock);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+
+	spin_unlock(&inode->i_lock);
 }
 
 static void nfs_zap_acl_cache(struct inode *inode)
@@ -632,7 +670,9 @@ static void nfs_zap_acl_cache(struct inode *inode)
 	clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
 	if (clear_acl_cache != NULL)
 		clear_acl_cache(inode);
-	NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL;
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
+	spin_unlock(&inode->i_lock);
 }
 
 /*
@@ -739,13 +779,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
-				NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
+				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
 		else
 			init_special_inode(inode, inode->i_mode, fattr->rdev);
 
-		nfsi->read_cache_jiffies = fattr->timestamp;
+		nfsi->read_cache_jiffies = fattr->time_start;
+		nfsi->last_updated = jiffies;
 		inode->i_atime = fattr->atime;
 		inode->i_mtime = fattr->mtime;
 		inode->i_ctime = fattr->ctime;
@@ -813,56 +854,88 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 			filemap_fdatawait(inode->i_mapping);
 		nfs_wb_all(inode);
 	}
+	/*
+	 * Return any delegations if we're going to change ACLs
+	 */
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		nfs_inode_return_delegation(inode);
 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
-	if (error == 0) {
+	if (error == 0)
 		nfs_refresh_inode(inode, &fattr);
+	nfs_end_data_update(inode);
+	unlock_kernel();
+	return error;
+}
+
+/**
+ * nfs_setattr_update_inode - Update inode metadata after a setattr call.
+ * @inode: pointer to struct inode
+ * @attr: pointer to struct iattr
+ *
+ * Note: we do this in the *proc.c in order to ensure that
+ *       it works for things like exclusive creates too.
+ */
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+{
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
 		if ((attr->ia_valid & ATTR_MODE) != 0) {
-			int mode;
-			mode = inode->i_mode & ~S_IALLUGO;
-			mode |= attr->ia_mode & S_IALLUGO;
+			int mode = attr->ia_mode & S_IALLUGO;
+			mode |= inode->i_mode & ~S_IALLUGO;
 			inode->i_mode = mode;
 		}
 		if ((attr->ia_valid & ATTR_UID) != 0)
 			inode->i_uid = attr->ia_uid;
 		if ((attr->ia_valid & ATTR_GID) != 0)
 			inode->i_gid = attr->ia_gid;
-		if ((attr->ia_valid & ATTR_SIZE) != 0) {
-			inode->i_size = attr->ia_size;
-			vmtruncate(inode, attr->ia_size);
-		}
+		spin_lock(&inode->i_lock);
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		spin_unlock(&inode->i_lock);
 	}
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-	nfs_end_data_update(inode);
-	unlock_kernel();
-	return error;
+	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		inode->i_size = attr->ia_size;
+		vmtruncate(inode, attr->ia_size);
+	}
+}
+
+static int nfs_wait_schedule(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
 }
 
 /*
  * Wait for the inode to get unlocked.
- * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
  */
-static int
-nfs_wait_on_inode(struct inode *inode, int flag)
+static int nfs_wait_on_inode(struct inode *inode)
 {
 	struct rpc_clnt	*clnt = NFS_CLIENT(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
-
+	sigset_t oldmask;
 	int error;
-	if (!(NFS_FLAGS(inode) & flag))
-		return 0;
-	atomic_inc(&inode->i_count);
-	error = nfs_wait_event(clnt, nfsi->nfs_i_wait,
-				!(NFS_FLAGS(inode) & flag));
-	iput(inode);
+
+	rpc_clnt_sigmask(clnt, &oldmask);
+	error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
+					nfs_wait_schedule, TASK_INTERRUPTIBLE);
+	rpc_clnt_sigunmask(clnt, &oldmask);
+
 	return error;
 }
 
+static void nfs_wake_up_inode(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
+}
+
 int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
+	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
 	if (__IS_FLG(inode, MS_NOATIME))
@@ -936,13 +1009,18 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 	spin_unlock(&inode->i_lock);
 }
 
-struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode)
+/*
+ * Given an inode, search for an open context with the desired characteristics
+ */
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *pos, *ctx = NULL;
 
 	spin_lock(&inode->i_lock);
 	list_for_each_entry(pos, &nfsi->open_files, list) {
+		if (cred != NULL && pos->cred != cred)
+			continue;
 		if ((pos->mode & mode) == mode) {
 			ctx = get_nfs_open_context(pos);
 			break;
@@ -984,15 +1062,11 @@ int nfs_open(struct inode *inode, struct file *filp)
 	ctx->mode = filp->f_mode;
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
-	if ((filp->f_mode & FMODE_WRITE) != 0)
-		nfs_begin_data_update(inode);
 	return 0;
 }
 
 int nfs_release(struct inode *inode, struct file *filp)
 {
-	if ((filp->f_mode & FMODE_WRITE) != 0)
-		nfs_end_data_update(inode);
 	nfs_file_clear_open_context(filp);
 	return 0;
 }
@@ -1008,7 +1082,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	struct nfs_fattr fattr;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	unsigned long verifier;
-	unsigned int flags;
+	unsigned long cache_validity;
 
 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
 		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
@@ -1019,18 +1093,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (NFS_STALE(inode))
  		goto out_nowait;
 
-	while (NFS_REVALIDATING(inode)) {
-		status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
-		if (status < 0)
-			goto out_nowait;
-		if (NFS_ATTRTIMEO(inode) == 0)
-			continue;
-		if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
-			continue;
-		status = NFS_STALE(inode) ? -ESTALE : 0;
-		goto out_nowait;
+	status = nfs_wait_on_inode(inode);
+	if (status < 0)
+		goto out;
+	if (NFS_STALE(inode)) {
+		status = -ESTALE;
+		/* Do we trust the cached ESTALE? */
+		if (NFS_ATTRTIMEO(inode) != 0) {
+			if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) {
+				/* no */
+			} else
+				goto out;
+		}
 	}
-	NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
 
 	/* Protect against RPC races by saving the change attribute */
 	verifier = nfs_save_change_attribute(inode);
@@ -1042,37 +1117,43 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		if (status == -ESTALE) {
 			nfs_zap_caches(inode);
 			if (!S_ISDIR(inode->i_mode))
-				NFS_FLAGS(inode) |= NFS_INO_STALE;
+				set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
 		}
 		goto out;
 	}
 
+	spin_lock(&inode->i_lock);
 	status = nfs_update_inode(inode, &fattr, verifier);
 	if (status) {
+		spin_unlock(&inode->i_lock);
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (long long)NFS_FILEID(inode), status);
 		goto out;
 	}
-	flags = nfsi->flags;
-	nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE;
+	cache_validity = nfsi->cache_validity;
+	nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+
 	/*
 	 * We may need to keep the attributes marked as invalid if
 	 * we raced with nfs_end_attr_update().
 	 */
-	if (verifier == nfsi->cache_change_attribute)
-		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
-	/* Do the page cache invalidation */
+	if (time_after_eq(verifier, nfsi->cache_change_attribute))
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+	spin_unlock(&inode->i_lock);
+
 	nfs_revalidate_mapping(inode, inode->i_mapping);
-	if (flags & NFS_INO_INVALID_ACL)
+
+	if (cache_validity & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
+
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));
 
-out:
-	NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
-	wake_up(&nfsi->nfs_i_wait);
+ out:
+	nfs_wake_up_inode(inode);
+
  out_nowait:
 	unlock_kernel();
 	return status;
@@ -1096,7 +1177,7 @@ int nfs_attribute_timeout(struct inode *inode)
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
-	if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
 	return __nfs_revalidate_inode(server, inode);
@@ -1111,19 +1192,23 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (nfsi->flags & NFS_INO_INVALID_DATA) {
+	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
 		if (S_ISREG(inode->i_mode)) {
 			if (filemap_fdatawrite(mapping) == 0)
 				filemap_fdatawait(mapping);
 			nfs_wb_all(inode);
 		}
 		invalidate_inode_pages2(mapping);
-		nfsi->flags &= ~NFS_INO_INVALID_DATA;
+
+		spin_lock(&inode->i_lock);
+		nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 		if (S_ISDIR(inode->i_mode)) {
 			memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 			/* This ensures we revalidate child dentries */
-			nfsi->cache_change_attribute++;
+			nfsi->cache_change_attribute = jiffies;
 		}
+		spin_unlock(&inode->i_lock);
+
 		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode));
@@ -1152,18 +1237,19 @@ void nfs_end_data_update(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Mark the attribute cache for revalidation */
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
-		/* Directories and symlinks: invalidate page cache too */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-			nfsi->flags |= NFS_INO_INVALID_DATA;
+		/* Directories and symlinks: invalidate page cache */
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+			spin_lock(&inode->i_lock);
+			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+			spin_unlock(&inode->i_lock);
+		}
 	}
-	nfsi->cache_change_attribute ++;
+	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
 }
 
 /**
- * nfs_refresh_inode - verify consistency of the inode attribute cache
+ * nfs_check_inode_attributes - verify consistency of the inode attribute cache
  * @inode - pointer to inode
  * @fattr - updated attributes
  *
@@ -1171,15 +1257,12 @@ void nfs_end_data_update(struct inode *inode)
  * so that fattr carries weak cache consistency data, then it may
  * also update the ctime/mtime/change_attribute.
  */
-int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t cur_size, new_isize;
 	int data_unstable;
 
-	/* Do we hold a delegation? */
-	if (nfs_have_delegation(inode, FMODE_READ))
-		return 0;
 
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
@@ -1189,19 +1272,21 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 				&& nfsi->change_attr == fattr->pre_change_attr)
 			nfsi->change_attr = fattr->change_attr;
 		if (nfsi->change_attr != fattr->change_attr) {
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 			if (!data_unstable)
-				nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+				nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 		}
 	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0) {
 		return 0;
+	}
 
 	/* Has the inode gone and changed behind our back? */
 	if (nfsi->fileid != fattr->fileid
-			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
 		return -EIO;
+	}
 
 	cur_size = i_size_read(inode);
  	new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1216,33 +1301,90 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
-			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 	if (cur_size != new_isize) {
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (nfsi->npages == 0)
-			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
-		nfsi->flags |= NFS_INO_INVALID_ATIME;
+		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
-	nfsi->read_cache_jiffies = fattr->timestamp;
+	nfsi->read_cache_jiffies = fattr->time_start;
 	return 0;
 }
 
+/**
+ * nfs_refresh_inode - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * Check that an RPC call that returned attributes has not overlapped with
+ * other recent updates of the inode metadata, then decide whether it is
+ * safe to do a full update of the inode attributes, or whether just to
+ * call nfs_check_inode_attributes.
+ */
+int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int status;
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		return 0;
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+	if (nfs_verify_change_attribute(inode, fattr->time_start))
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+	if (time_after(fattr->time_start, nfsi->last_updated))
+		status = nfs_update_inode(inode, fattr, fattr->time_start);
+	else
+		status = nfs_check_inode_attributes(inode, fattr);
+
+	spin_unlock(&inode->i_lock);
+	return status;
+}
+
+/**
+ * nfs_post_op_update_inode - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it.
+ */
+int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int status = 0;
+
+	spin_lock(&inode->i_lock);
+	if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+		goto out;
+	}
+	status = nfs_update_inode(inode, fattr, fattr->time_start);
+	if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute))
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
+	nfsi->cache_change_attribute = jiffies;
+out:
+	spin_unlock(&inode->i_lock);
+	return status;
+}
+
 /*
  * Many nfs protocol calls return the new file attributes after
  * an operation.  Here we update the inode to reflect the state
@@ -1287,10 +1429,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	/*
 	 * Update the read time so we don't revalidate too often.
 	 */
-	nfsi->read_cache_jiffies = fattr->timestamp;
+	nfsi->read_cache_jiffies = fattr->time_start;
+	nfsi->last_updated = jiffies;
 
 	/* Are we racing with known updates of the metadata on the server? */
-	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
+	data_unstable = ! (nfs_verify_change_attribute(inode, verifier) ||
+		(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE));
 
 	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1299,7 +1443,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 		/* Do we perhaps have any outstanding writes? */
 		if (nfsi->npages == 0) {
 			/* No, but did we race with nfs_end_data_update()? */
-			if (verifier  ==  nfsi->cache_change_attribute) {
+			if (time_after_eq(verifier,  nfsi->cache_change_attribute)) {
 				inode->i_size = new_isize;
 				invalid |= NFS_INO_INVALID_DATA;
 			}
@@ -1373,7 +1517,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 				|| S_ISLNK(inode->i_mode)))
 		invalid &= ~NFS_INO_INVALID_DATA;
 	if (!nfs_have_delegation(inode, FMODE_READ))
-		nfsi->flags |= invalid;
+		nfsi->cache_validity |= invalid;
 
 	return 0;
  out_changed:
@@ -1391,7 +1535,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	 */
 	nfs_invalidate_inode(inode);
  out_err:
-	NFS_FLAGS(inode) |= NFS_INO_STALE;
+	set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
 	return -ESTALE;
 }
 
@@ -1583,8 +1727,7 @@ static void nfs4_clear_inode(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	/* If we are holding a delegation, return it! */
-	if (nfsi->delegation != NULL)
-		nfs_inode_return_delegation(inode);
+	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
 	/* Now clear out any remaining state */
@@ -1613,7 +1756,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	struct rpc_clnt *clnt = NULL;
 	struct rpc_timeout timeparms;
 	rpc_authflavor_t authflavour;
-	int proto, err = -EIO;
+	int err = -EIO;
 
 	sb->s_blocksize_bits = 0;
 	sb->s_blocksize = 0;
@@ -1631,30 +1774,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmax = data->acdirmax*HZ;
 
 	server->rpc_ops = &nfs_v4_clientops;
-	/* Initialize timeout values */
 
-	timeparms.to_initval = data->timeo * HZ / 10;
-	timeparms.to_retries = data->retrans;
-	timeparms.to_exponential = 1;
-	if (!timeparms.to_retries)
-		timeparms.to_retries = 5;
-
-	proto = data->proto;
-	/* Which IP protocol do we use? */
-	switch (proto) {
-	case IPPROTO_TCP:
-		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
-		if (!timeparms.to_initval)
-			timeparms.to_initval = 600 * HZ / 10;
-		break;
-	case IPPROTO_UDP:
-		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
-		if (!timeparms.to_initval)
-			timeparms.to_initval = 11 * HZ / 10;
-		break;
-	default:
-		return -EINVAL;
-	}
+	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
@@ -1679,7 +1800,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	down_write(&clp->cl_sem);
 	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+		xprt = xprt_create_proto(data->proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
 			err = PTR_ERR(xprt);
@@ -1950,7 +2071,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
 	if (!nfsi)
 		return NULL;
-	nfsi->flags = 0;
+	nfsi->flags = 0UL;
+	nfsi->cache_validity = 0UL;
 #ifdef CONFIG_NFS_V3_ACL
 	nfsi->acl_access = ERR_PTR(-EAGAIN);
 	nfsi->acl_default = ERR_PTR(-EAGAIN);
@@ -1982,7 +2104,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		nfsi->ndirty = 0;
 		nfsi->ncommit = 0;
 		nfsi->npages = 0;
-		init_waitqueue_head(&nfsi->nfs_i_wait);
 		nfs4_init_once(nfsi);
 	}
 }
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d91b69044a4..59049e864ca 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -143,7 +143,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 		fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
 		fattr->rdev = 0;
 	}
-	fattr->timestamp = jiffies;
 	return p;
 }
 
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1b7a3ef2f81..6a5bbc0ae94 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 	nfs_begin_data_update(inode);
 	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
 			  &args, &fattr, 0);
-	NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
 	nfs_end_data_update(inode);
 	dprintk("NFS reply setacl: %d\n", status);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7851569b31c..92c870d19cc 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX)
 			break;
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+		schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!signalled());
 	rpc_clnt_sigunmask(clnt, &oldset);
@@ -79,7 +78,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
@@ -99,7 +98,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  getattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client, NFS3PROC_GETATTR,
 			  fhandle, fattr, 0);
 	dprintk("NFS reply getattr: %d\n", status);
@@ -118,8 +117,10 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int	status;
 
 	dprintk("NFS call  setattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	if (status == 0)
+		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
@@ -142,8 +143,8 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
-	dir_attr.valid = 0;
-	fattr->valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
 		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
@@ -173,7 +174,6 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 	int status;
 
 	dprintk("NFS call  access\n");
-	fattr.valid = 0;
 
 	if (mode & MAY_READ)
 		arg.access |= NFS3_ACCESS_READ;
@@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		if (mode & MAY_EXEC)
 			arg.access |= NFS3_ACCESS_EXECUTE;
 	}
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	if (status == 0) {
@@ -216,7 +217,7 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
 			  &args, &fattr, 0);
 	nfs_refresh_inode(inode, &fattr);
@@ -239,7 +240,7 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
 
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0)
 		nfs_refresh_inode(inode, fattr);
@@ -262,10 +263,10 @@ static int nfs3_proc_write(struct nfs_write_data *wdata)
 
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
 	if (status >= 0)
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 	dprintk("NFS reply write: %d\n", status);
 	return status < 0? status : wdata->res.count;
 }
@@ -284,10 +285,10 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
 
 	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
 			(long long) cdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status >= 0)
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 	dprintk("NFS reply commit: %d\n", status);
 	return status;
 }
@@ -298,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		 int flags)
+		 int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
@@ -328,10 +329,10 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	sattr->ia_mode &= ~current->fs->umask;
 
 again:
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -370,6 +371,8 @@ again:
 		 * not sure this buys us anything (and I'd have
 		 * to revamp the NFSv3 XDR code) */
 		status = nfs3_proc_setattr(dentry, &fattr, sattr);
+		if (status == 0)
+			nfs_setattr_update_inode(dentry->d_inode, sattr);
 		nfs_refresh_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
@@ -398,9 +401,9 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  remove %s\n", name->name);
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
 }
@@ -419,7 +422,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr
 	ptr->arg.fh = NFS_FH(dir->d_inode);
 	ptr->arg.name = name->name;
 	ptr->arg.len = name->len;
-	ptr->res.valid = 0;
+	nfs_fattr_init(&ptr->res);
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
 	msg->rpc_argp = &ptr->arg;
 	msg->rpc_resp = &ptr->res;
@@ -436,7 +439,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
-		nfs_refresh_inode(dir->d_inode, dir_attr);
+		nfs_post_op_update_inode(dir->d_inode, dir_attr);
 		kfree(msg->rpc_argp);
 	}
 	return 0;
@@ -462,11 +465,11 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	old_dir_attr.valid = 0;
-	new_dir_attr.valid = 0;
+	nfs_fattr_init(&old_dir_attr);
+	nfs_fattr_init(&new_dir_attr);
 	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
-	nfs_refresh_inode(old_dir, &old_dir_attr);
-	nfs_refresh_inode(new_dir, &new_dir_attr);
+	nfs_post_op_update_inode(old_dir, &old_dir_attr);
+	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
@@ -488,11 +491,11 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
-	nfs_refresh_inode(inode, &fattr);
+	nfs_post_op_update_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
 }
@@ -521,10 +524,10 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	if (path->len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	dir_attr.valid = 0;
-	fattr->valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -549,13 +552,13 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	int status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
-	dir_attr.valid = 0;
-	fattr.valid = 0;
 
 	sattr->ia_mode &= ~current->fs->umask;
 
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
 	status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -579,9 +582,9 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
 }
@@ -631,7 +634,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	dprintk("NFS call  readdir%s %d\n",
 			plus? "plus" : "", (unsigned int) cookie);
 
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply readdir: %d\n", status);
@@ -673,10 +676,10 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	sattr->ia_mode &= ~current->fs->umask;
 
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
 	status = nfs_instantiate(dentry, &fh, &fattr);
@@ -695,7 +698,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
-	stat->fattr->valid = 0;
+	nfs_fattr_init(stat->fattr);
 	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
@@ -708,7 +711,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
@@ -721,7 +724,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
@@ -732,7 +735,7 @@ extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 static void
 nfs3_read_done(struct rpc_task *task)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
@@ -772,7 +775,7 @@ nfs3_write_done(struct rpc_task *task)
 		return;
 	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_writeback_done(task);
 }
 
@@ -816,7 +819,7 @@ nfs3_commit_done(struct rpc_task *task)
 		return;
 	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_commit_done(task);
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index db4a904810a..0498bd36602 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -174,7 +174,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 
 	/* Update the mode bits */
 	fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
-	fattr->timestamp = jiffies;
 	return p;
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ec1a22d7b87..b7f262dcb6e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -93,25 +93,50 @@ struct nfs4_client {
 };
 
 /*
+ * struct rpc_sequence ensures that RPC calls are sent in the exact
+ * order that they appear on the list.
+ */
+struct rpc_sequence {
+	struct rpc_wait_queue	wait;	/* RPC call delay queue */
+	spinlock_t lock;		/* Protects the list */
+	struct list_head list;		/* Defines sequence of RPC calls */
+};
+
+#define NFS_SEQID_CONFIRMED 1
+struct nfs_seqid_counter {
+	struct rpc_sequence *sequence;
+	int flags;
+	u32 counter;
+};
+
+struct nfs_seqid {
+	struct nfs_seqid_counter *sequence;
+	struct list_head list;
+};
+
+static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
+{
+	if (seqid_mutating_err(-status))
+		seqid->flags |= NFS_SEQID_CONFIRMED;
+}
+
+/*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
  */
 struct nfs4_state_owner {
+	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
 	struct nfs4_client   *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
 	atomic_t	     so_count;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 	struct list_head     so_states;
 	struct list_head     so_delegations;
+	struct nfs_seqid_counter so_seqid;
+	struct rpc_sequence  so_sequence;
 };
 
 /*
@@ -132,7 +157,7 @@ struct nfs4_lock_state {
 	fl_owner_t		ls_owner;	/* POSIX lock owner */
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
-	u32			ls_seqid;
+	struct nfs_seqid_counter	ls_seqid;
 	u32			ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
@@ -153,7 +178,6 @@ struct nfs4_state {
 	struct inode *inode;		/* Pointer to the inode */
 
 	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
 	nfs4_stateid stateid;
@@ -190,9 +214,9 @@ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
 extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
@@ -223,13 +247,18 @@ extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
-extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
+extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
 
+extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
+extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_free_seqid(struct nfs_seqid *seqid);
+
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b76f80aedb..21482b2518f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -47,6 +47,7 @@
 #include <linux/nfs_page.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -56,10 +57,11 @@
 #define NFS4_POLL_RETRY_MIN	(1*HZ)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
@@ -185,8 +187,26 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 	if (cinfo->before == nfsi->change_attr && cinfo->atomic)
 		nfsi->change_attr = cinfo->after;
+	spin_unlock(&inode->i_lock);
+}
+
+/* Helper for asynchronous RPC calls */
+static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
+		rpc_action tk_exit, void *calldata)
+{
+	struct rpc_task *task;
+
+	if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
+		return -ENOMEM;
+
+	task->tk_calldata = calldata;
+	task->tk_action = tk_begin;
+	rpc_execute(task);
+	return 0;
 }
 
 static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
@@ -194,22 +214,22 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 	struct inode *inode = state->inode;
 
 	open_flags &= (FMODE_READ|FMODE_WRITE);
-	/* Protect against nfs4_find_state() */
+	/* Protect against nfs4_find_state_byowner() */
+	spin_lock(&state->owner->so_lock);
 	spin_lock(&inode->i_lock);
-	state->state |= open_flags;
-	/* NB! List reordering - see the reclaim code for why.  */
-	if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
-		list_move(&state->open_states, &state->owner->so_states);
+	memcpy(&state->stateid, stateid, sizeof(state->stateid));
+	if ((open_flags & FMODE_WRITE))
+		state->nwriters++;
 	if (open_flags & FMODE_READ)
 		state->nreaders++;
-	memcpy(&state->stateid, stateid, sizeof(state->stateid));
+	nfs4_state_set_mode_locked(state, state->state | open_flags);
 	spin_unlock(&inode->i_lock);
+	spin_unlock(&state->owner->so_lock);
 }
 
 /*
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
- * 	Assumes caller is holding the sp->so_sem
  */
 static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
@@ -218,7 +238,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(inode),
-		.seqid = sp->so_seqid,
 		.id = sp->so_id,
 		.open_flags = state->state,
 		.clientid = server->nfs4_state->cl_clientid,
@@ -245,8 +264,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 		}
 		o_arg.u.delegation_type = delegation->type;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, o_arg.seqid);
 	if (status == 0) {
 		memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
 		if (o_res.delegation_type != 0) {
@@ -256,6 +280,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 				nfs_async_inode_return_delegation(inode, &o_res.stateid);
 		}
 	}
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	/* Ensure we update the inode attributes */
 	NFS_CACHEINV(inode);
@@ -302,23 +327,35 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	};
 	int status = 0;
 
-	down(&sp->so_sema);
 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 		goto out;
 	if (state->state == 0)
 		goto out;
-	arg.seqid = sp->so_seqid;
+	arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (arg.seqid == NULL)
+		goto out;
 	arg.open_flags = state->state;
 	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	nfs_increment_open_seqid(status, arg.seqid);
+	if (status != 0)
+		goto out_free;
+	if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
+		status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
+				sp, &res.stateid, arg.seqid);
+		if (status != 0)
+			goto out_free;
+	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (status >= 0) {
 		memcpy(state->stateid.data, res.stateid.data,
 				sizeof(state->stateid.data));
 		clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
+out_free:
+	nfs_free_seqid(arg.seqid);
 out:
-	up(&sp->so_sema);
 	dput(parent);
 	return status;
 }
@@ -345,11 +382,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
 {
 	struct nfs_open_confirmargs arg = {
 		.fh             = fh,
-		.seqid          = sp->so_seqid,
+		.seqid          = seqid,
 		.stateid	= *stateid,
 	};
 	struct nfs_open_confirmres res;
@@ -362,7 +399,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf
 	int status;
 
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, seqid);
 	if (status >= 0)
 		memcpy(stateid, &res.stateid, sizeof(*stateid));
 	return status;
@@ -380,21 +419,41 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, stru
 	int status;
 
 	/* Update sequence id. The caller must serialize! */
-	o_arg->seqid = sp->so_seqid;
 	o_arg->id = sp->so_id;
 	o_arg->clientid = sp->so_client->cl_clientid;
 
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	if (status == 0) {
+		/* OPEN on anything except a regular file is disallowed in NFSv4 */
+		switch (o_res->f_attr->mode & S_IFMT) {
+			case S_IFREG:
+				break;
+			case S_IFLNK:
+				status = -ELOOP;
+				break;
+			case S_IFDIR:
+				status = -EISDIR;
+				break;
+			default:
+				status = -ENOTDIR;
+		}
+	}
+
+	nfs_increment_open_seqid(status, o_arg->seqid);
 	if (status != 0)
 		goto out;
-	update_changeattr(dir, &o_res->cinfo);
+	if (o_arg->open_flags & O_CREAT) {
+		update_changeattr(dir, &o_res->cinfo);
+		nfs_post_op_update_inode(dir, o_res->dir_attr);
+	} else
+		nfs_refresh_inode(dir, o_res->dir_attr);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
-				sp, &o_res->stateid);
+				sp, &o_res->stateid, o_arg->seqid);
 		if (status != 0)
 			goto out;
 	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 out:
@@ -441,9 +500,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct nfs_fattr        f_attr = {
-		.valid = 0,
-	};
+	struct nfs_fattr f_attr, dir_attr;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(dir),
 		.open_flags = state->state,
@@ -453,6 +510,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	};
 	struct nfs_openres o_res = {
 		.f_attr = &f_attr,
+		.dir_attr = &dir_attr,
 		.server = server,
 	};
 	int status = 0;
@@ -465,6 +523,12 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
 		goto out;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (o_arg.seqid == NULL)
+		goto out;
+	nfs_fattr_init(&f_attr);
+	nfs_fattr_init(&dir_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_nodeleg;
@@ -490,6 +554,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
 	}
 out_nodeleg:
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 out:
 	dput(parent);
@@ -564,7 +629,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
 		goto out_err;
 	}
-	down(&sp->so_sema);
 	state = nfs4_get_open_state(inode, sp);
 	if (state == NULL)
 		goto out_err;
@@ -589,7 +653,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	set_bit(NFS_DELEGATED_STATE, &state->flags);
 	update_open_stateid(state, &delegation->stateid, open_flags);
 out_ok:
-	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
@@ -600,11 +663,12 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
-		up(&sp->so_sema);
 		nfs4_put_state_owner(sp);
 	}
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
+	if (err != -EACCES)
+		nfs_inode_return_delegation(inode);
 	return err;
 }
 
@@ -635,9 +699,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_client *clp = server->nfs4_state;
 	struct inode *inode = NULL;
 	int                     status;
-	struct nfs_fattr        f_attr = {
-		.valid          = 0,
-	};
+	struct nfs_fattr f_attr, dir_attr;
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.open_flags	= flags,
@@ -648,6 +710,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	};
 	struct nfs_openres o_res = {
 		.f_attr         = &f_attr,
+		.dir_attr	= &dir_attr,
 		.server         = server,
 	};
 
@@ -665,8 +728,12 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	} else
 		o_arg.u.attrs = sattr;
 	/* Serialization for the sequence id */
-	down(&sp->so_sema);
 
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
+	nfs_fattr_init(&f_attr);
+	nfs_fattr_init(&dir_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_err;
@@ -681,7 +748,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	update_open_stateid(state, &o_res.stateid, flags);
 	if (o_res.delegation_type != 0)
 		nfs_inode_set_delegation(inode, cred, &o_res);
-	up(&sp->so_sema);
+	nfs_free_seqid(o_arg.seqid);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
@@ -690,7 +757,7 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
-		up(&sp->so_sema);
+		nfs_free_seqid(o_arg.seqid);
 		nfs4_put_state_owner(sp);
 	}
 	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
@@ -718,7 +785,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * It is actually a sign of a bug on the client or on the server.
 		 *
 		 * If we receive a BAD_SEQID error in the particular case of
-		 * doing an OPEN, we assume that nfs4_increment_seqid() will
+		 * doing an OPEN, we assume that nfs_increment_open_seqid() will
 		 * have unhashed the old state_owner for us, and that we can
 		 * therefore safely retry using a new one. We should still warn
 		 * the user though...
@@ -728,6 +795,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		/*
+		 * BAD_STATEID on OPEN means that the server cancelled our
+		 * state before it received the OPEN_CONFIRM.
+		 * Recover by retrying the request as per the discussion
+		 * on Page 181 of RFC3530.
+		 */
+		if (status == -NFS4ERR_BAD_STATEID) {
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
@@ -753,8 +830,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
                 .rpc_argp       = &arg,
                 .rpc_resp       = &res,
         };
+	int status;
 
-        fattr->valid = 0;
+	nfs_fattr_init(fattr);
 
 	if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
@@ -762,7 +840,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
-	return rpc_call_sync(server->client, &msg, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
+	return status;
 }
 
 static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
@@ -785,19 +864,30 @@ struct nfs4_closedata {
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
 	struct nfs_closeres res;
+	struct nfs_fattr fattr;
 };
 
+static void nfs4_free_closedata(struct nfs4_closedata *calldata)
+{
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+
+	nfs4_put_open_state(calldata->state);
+	nfs_free_seqid(calldata->arg.seqid);
+	nfs4_put_state_owner(sp);
+	kfree(calldata);
+}
+
 static void nfs4_close_done(struct rpc_task *task)
 {
 	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
 	struct nfs4_state *state = calldata->state;
-	struct nfs4_state_owner *sp = state->owner;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
-	nfs4_increment_seqid(task->tk_status, sp);
+	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(&state->stateid, &calldata->res.stateid,
@@ -805,7 +895,6 @@ static void nfs4_close_done(struct rpc_task *task)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			state->state = calldata->arg.open_flags;
 			nfs4_schedule_state_recovery(server->nfs4_state);
 			break;
 		default:
@@ -814,25 +903,50 @@ static void nfs4_close_done(struct rpc_task *task)
 				return;
 			}
 	}
-	state->state = calldata->arg.open_flags;
-	nfs4_put_open_state(state);
-	up(&sp->so_sema);
-	nfs4_put_state_owner(sp);
-	up_read(&server->nfs4_state->cl_sem);
-	kfree(calldata);
+	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
+	nfs4_free_closedata(calldata);
 }
 
-static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+static void nfs4_close_begin(struct rpc_task *task)
 {
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
 		.rpc_argp = &calldata->arg,
 		.rpc_resp = &calldata->res,
-		.rpc_cred = calldata->state->owner->so_cred,
+		.rpc_cred = state->owner->so_cred,
 	};
-	if (calldata->arg.open_flags != 0)
+	int mode = 0, old_mode;
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
+	if (status != 0)
+		return;
+	/* Recalculate the new open mode in case someone reopened the file
+	 * while we were waiting in line to be scheduled.
+	 */
+	spin_lock(&state->owner->so_lock);
+	spin_lock(&calldata->inode->i_lock);
+	mode = old_mode = state->state;
+	if (state->nreaders == 0)
+		mode &= ~FMODE_READ;
+	if (state->nwriters == 0)
+		mode &= ~FMODE_WRITE;
+	nfs4_state_set_mode_locked(state, mode);
+	spin_unlock(&calldata->inode->i_lock);
+	spin_unlock(&state->owner->so_lock);
+	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		nfs4_free_closedata(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	nfs_fattr_init(calldata->res.fattr);
+	if (mode != 0)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+	calldata->arg.open_flags = mode;
+	rpc_call_setup(task, &msg, 0);
 }
 
 /* 
@@ -846,42 +960,59 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_closedata *calldata;
-	int status;
+	int status = -ENOMEM;
 
-	/* Tell caller we're done */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		state->state = mode;
-		return 0;
-	}
-	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
-		return -ENOMEM;
+		goto out;
 	calldata->inode = inode;
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.stateid = &state->stateid;
 	/* Serialization for the sequence id */
-	calldata->arg.seqid = state->owner->so_seqid;
-	calldata->arg.open_flags = mode;
-	memcpy(&calldata->arg.stateid, &state->stateid,
-			sizeof(calldata->arg.stateid));
-	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
-	/*
-	 * Return -EINPROGRESS on success in order to indicate to the
-	 * caller that an asynchronous RPC call has been launched, and
-	 * that it will release the semaphores on completion.
-	 */
-	return (status == 0) ? -EINPROGRESS : status;
+	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+	if (calldata->arg.seqid == NULL)
+		goto out_free_calldata;
+	calldata->arg.bitmask = server->attr_bitmask;
+	calldata->res.fattr = &calldata->fattr;
+	calldata->res.server = server;
+
+	status = nfs4_call_async(server->client, nfs4_close_begin,
+			nfs4_close_done, calldata);
+	if (status == 0)
+		goto out;
+
+	nfs_free_seqid(calldata->arg.seqid);
+out_free_calldata:
+	kfree(calldata);
+out:
+	return status;
+}
+
+static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+{
+	struct file *filp;
+
+	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	if (!IS_ERR(filp)) {
+		struct nfs_open_context *ctx;
+		ctx = (struct nfs_open_context *)filp->private_data;
+		ctx->state = state;
+	} else
+		nfs4_close_state(state, nd->intent.open.flags);
 }
 
-struct inode *
+struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
+	struct dentry *res;
 
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
@@ -895,16 +1026,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	if (IS_ERR(cred))
-		return (struct inode *)cred;
+		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
-	if (IS_ERR(state))
-		return (struct inode *)state;
-	return state->inode;
+	if (IS_ERR(state)) {
+		if (PTR_ERR(state) == -ENOENT)
+			d_add(dentry, NULL);
+		return (struct dentry *)state;
+	}
+	res = d_add_unique(dentry, state->inode);
+	if (res != NULL)
+		dentry = res;
+	nfs4_intent_set_file(nd, dentry, state);
+	return res;
 }
 
 int
-nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
+nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -917,18 +1055,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
 	if (IS_ERR(state))
 		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
 	put_rpccred(cred);
-	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
-		return 1;
-	if (IS_ERR(state))
-		return 0;
+	if (IS_ERR(state)) {
+		switch (PTR_ERR(state)) {
+			case -EPERM:
+			case -EACCES:
+			case -EDQUOT:
+			case -ENOSPC:
+			case -EROFS:
+				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
+				return 1;
+			case -ENOENT:
+				if (dentry->d_inode == NULL)
+					return 1;
+		}
+		goto out_drop;
+	}
 	inode = state->inode;
+	iput(inode);
 	if (inode == dentry->d_inode) {
-		iput(inode);
+		nfs4_intent_set_file(nd, dentry, state);
 		return 1;
 	}
-	d_drop(dentry);
 	nfs4_close_state(state, openflags);
-	iput(inode);
+out_drop:
+	d_drop(dentry);
 	return 0;
 }
 
@@ -972,13 +1122,12 @@ static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh
 static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
 	struct nfs4_lookup_root_arg args = {
 		.bitmask = nfs4_fattr_bitmap,
 	};
 	struct nfs4_lookup_res res = {
 		.server = server,
-		.fattr = fattr,
+		.fattr = info->fattr,
 		.fh = fhandle,
 	};
 	struct rpc_message msg = {
@@ -986,7 +1135,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1049,7 +1198,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		q.len = p - q.name;
 
 		do {
-			fattr->valid = 0;
+			nfs_fattr_init(fattr);
 			status = nfs4_handle_exception(server,
 					rpc_call_sync(server->client, &msg, 0),
 					&exception);
@@ -1086,7 +1235,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = &res,
 	};
 	
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1125,28 +1274,27 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 {
 	struct rpc_cred *cred;
 	struct inode *inode = dentry->d_inode;
-	struct nfs4_state *state;
+	struct nfs_open_context *ctx;
+	struct nfs4_state *state = NULL;
 	int status;
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	
 	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	/* Search for an existing WRITE delegation first */
-	state = nfs4_open_delegated(inode, FMODE_WRITE, cred);
-	if (!IS_ERR(state)) {
-		/* NB: nfs4_open_delegated() bumps the inode->i_count */
-		iput(inode);
-	} else {
-		/* Search for an existing open(O_WRITE) stateid */
-		state = nfs4_find_state(inode, cred, FMODE_WRITE);
-	}
+
+	/* Search for an existing open(O_WRITE) file */
+	ctx = nfs_find_open_context(inode, cred, FMODE_WRITE);
+	if (ctx != NULL)
+		state = ctx->state;
 
 	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
 			NFS_FH(inode), sattr, state);
-	if (state != NULL)
-		nfs4_close_state(state, FMODE_WRITE);
+	if (status == 0)
+		nfs_setattr_update_inode(inode, sattr);
+	if (ctx != NULL)
+		put_nfs_open_context(ctx);
 	put_rpccred(cred);
 	return status;
 }
@@ -1172,7 +1320,7 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		.rpc_resp = &res,
 	};
 	
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	
 	dprintk("NFS call  lookup %s\n", name->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
@@ -1321,7 +1469,7 @@ static int _nfs4_proc_read(struct nfs_read_data *rdata)
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, flags);
 	if (!status)
 		renew_lease(server, timestamp);
@@ -1358,7 +1506,7 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata)
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, rpcflags);
 	dprintk("NFS reply write: %d\n", status);
 	return status;
@@ -1392,7 +1540,7 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata)
 	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
 			(long long) cdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply commit: %d\n", status);
 	return status;
@@ -1427,7 +1575,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
 
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags)
+                 int flags, struct nameidata *nd)
 {
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
@@ -1450,21 +1598,29 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
 		                     NFS_FH(state->inode), sattr, state);
 		if (status == 0)
-			goto out;
-	} else if (flags != 0)
-		goto out;
-	nfs4_close_state(state, flags);
+			nfs_setattr_update_inode(state->inode, sattr);
+	}
+	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+		nfs4_intent_set_file(nd, dentry, state);
+	else
+		nfs4_close_state(state, flags);
 out:
 	return status;
 }
 
 static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_remove_arg args = {
 		.fh = NFS_FH(dir),
 		.name = name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr dir_attr;
+	struct nfs4_remove_res	res = {
+		.server = server,
+		.dir_attr = &dir_attr,
 	};
-	struct nfs4_change_info	res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
 		.rpc_argp	= &args,
@@ -1472,9 +1628,12 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 	};
 	int			status;
 
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (status == 0)
-		update_changeattr(dir, &res);
+	nfs_fattr_init(res.dir_attr);
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (status == 0) {
+		update_changeattr(dir, &res.cinfo);
+		nfs_post_op_update_inode(dir, res.dir_attr);
+	}
 	return status;
 }
 
@@ -1492,12 +1651,14 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
 
 struct unlink_desc {
 	struct nfs4_remove_arg	args;
-	struct nfs4_change_info	res;
+	struct nfs4_remove_res	res;
+	struct nfs_fattr dir_attr;
 };
 
 static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
 		struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(dir->d_inode);
 	struct unlink_desc *up;
 
 	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
@@ -1506,6 +1667,9 @@ static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
 	
 	up->args.fh = NFS_FH(dir->d_inode);
 	up->args.name = name;
+	up->args.bitmask = server->attr_bitmask;
+	up->res.server = server;
+	up->res.dir_attr = &up->dir_attr;
 	
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 	msg->rpc_argp = &up->args;
@@ -1520,7 +1684,8 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 	
 	if (msg->rpc_resp != NULL) {
 		up = container_of(msg->rpc_resp, struct unlink_desc, res);
-		update_changeattr(dir->d_inode, &up->res);
+		update_changeattr(dir->d_inode, &up->res.cinfo);
+		nfs_post_op_update_inode(dir->d_inode, up->res.dir_attr);
 		kfree(up);
 		msg->rpc_resp = NULL;
 		msg->rpc_argp = NULL;
@@ -1531,13 +1696,20 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
+	struct nfs_server *server = NFS_SERVER(old_dir);
 	struct nfs4_rename_arg arg = {
 		.old_dir = NFS_FH(old_dir),
 		.new_dir = NFS_FH(new_dir),
 		.old_name = old_name,
 		.new_name = new_name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr old_fattr, new_fattr;
+	struct nfs4_rename_res res = {
+		.server = server,
+		.old_fattr = &old_fattr,
+		.new_fattr = &new_fattr,
 	};
-	struct nfs4_rename_res res = { };
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
 		.rpc_argp = &arg,
@@ -1545,11 +1717,15 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	};
 	int			status;
 	
-	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+	nfs_fattr_init(res.old_fattr);
+	nfs_fattr_init(res.new_fattr);
+	status = rpc_call_sync(server->client, &msg, 0);
 
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
+		nfs_post_op_update_inode(old_dir, res.old_fattr);
 		update_changeattr(new_dir, &res.new_cinfo);
+		nfs_post_op_update_inode(new_dir, res.new_fattr);
 	}
 	return status;
 }
@@ -1570,22 +1746,34 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
 static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_link_arg arg = {
 		.fh     = NFS_FH(inode),
 		.dir_fh = NFS_FH(dir),
 		.name   = name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs4_link_res res = {
+		.server = server,
+		.fattr = &fattr,
+		.dir_attr = &dir_attr,
 	};
-	struct nfs4_change_info	cinfo = { };
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
 		.rpc_argp = &arg,
-		.rpc_resp = &cinfo,
+		.rpc_resp = &res,
 	};
 	int			status;
 
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-	if (!status)
-		update_changeattr(dir, &cinfo);
+	nfs_fattr_init(res.fattr);
+	nfs_fattr_init(res.dir_attr);
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (!status) {
+		update_changeattr(dir, &res.cinfo);
+		nfs_post_op_update_inode(dir, res.dir_attr);
+		nfs_refresh_inode(inode, res.fattr);
+	}
 
 	return status;
 }
@@ -1607,6 +1795,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 		struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_fattr dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1619,6 +1808,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 		.server = server,
 		.fh = fhandle,
 		.fattr = fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
@@ -1630,11 +1820,13 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 	if (path->len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	arg.u.symlink = path;
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
+	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status)
 		update_changeattr(dir, &res.dir_cinfo);
+	nfs_post_op_update_inode(dir, res.dir_fattr);
 	return status;
 }
 
@@ -1658,7 +1850,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fhandle;
-	struct nfs_fattr fattr;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1671,6 +1863,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 		.server = server,
 		.fh = &fhandle,
 		.fattr = &fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1679,11 +1872,13 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	};
 	int			status;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
+	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	}
 	return status;
@@ -1756,7 +1951,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fh;
-	struct nfs_fattr fattr;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1768,6 +1963,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 		.server = server,
 		.fh = &fh,
 		.fattr = &fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1777,7 +1973,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 	int			status;
 	int                     mode = sattr->ia_mode;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
+	nfs_fattr_init(&dir_fattr);
 
 	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
 	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
@@ -1799,6 +1996,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0) {
 		update_changeattr(dir, &res.dir_cinfo);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
 		status = nfs_instantiate(dentry, &fh, &fattr);
 	}
 	return status;
@@ -1830,7 +2028,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = fsstat,
 	};
 
-	fsstat->fattr->valid = 0;
+	nfs_fattr_init(fsstat->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1877,7 +2075,7 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
 
 static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
 {
-	fsinfo->fattr->valid = 0;
+	nfs_fattr_init(fsinfo->fattr);
 	return nfs4_do_fsinfo(server, fhandle, fsinfo);
 }
 
@@ -1900,7 +2098,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 		return 0;
 	}
 
-	pathconf->fattr->valid = 0;
+	nfs_fattr_init(pathconf->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1967,8 +2165,10 @@ nfs4_write_done(struct rpc_task *task)
 		rpc_restart_call(task);
 		return;
 	}
-	if (task->tk_status >= 0)
+	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
+		nfs_post_op_update_inode(inode, data->res.fattr);
+	}
 	/* Call back common NFS writeback processing */
 	nfs_writeback_done(task);
 }
@@ -1984,6 +2184,7 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 		.rpc_cred = data->cred,
 	};
 	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
 	int flags;
 	
@@ -1995,6 +2196,8 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	} else
 		stable = NFS_UNSTABLE;
 	data->args.stable = stable;
+	data->args.bitmask = server->attr_bitmask;
+	data->res.server = server;
 
 	data->timestamp   = jiffies;
 
@@ -2016,6 +2219,8 @@ nfs4_commit_done(struct rpc_task *task)
 		rpc_restart_call(task);
 		return;
 	}
+	if (task->tk_status >= 0)
+		nfs_post_op_update_inode(inode, data->res.fattr);
 	/* Call back common NFS writeback processing */
 	nfs_commit_done(task);
 }
@@ -2031,8 +2236,12 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 		.rpc_cred = data->cred,
 	};	
 	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	int flags;
 	
+	data->args.bitmask = server->attr_bitmask;
+	data->res.server = server;
+
 	/* Set the initial flags for the task.  */
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
@@ -2100,65 +2309,6 @@ nfs4_proc_renew(struct nfs4_client *clp)
 	return 0;
 }
 
-/*
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this open method is prone to inefficiency and race conditions
- * due to the lookup, potential create, and open VFS calls from sys_open()
- * placed on the wire.
- */
-static int
-nfs4_proc_file_open(struct inode *inode, struct file *filp)
-{
-	struct dentry *dentry = filp->f_dentry;
-	struct nfs_open_context *ctx;
-	struct nfs4_state *state = NULL;
-	struct rpc_cred *cred;
-	int status = -ENOMEM;
-
-	dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
-	                       (int)dentry->d_parent->d_name.len,
-	                       dentry->d_parent->d_name.name,
-	                       (int)dentry->d_name.len, dentry->d_name.name);
-
-
-	/* Find our open stateid */
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-	if (IS_ERR(cred))
-		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(dentry, cred);
-	put_rpccred(cred);
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
-	status = -EIO; /* ERACE actually */
-	state = nfs4_find_state(inode, cred, filp->f_mode);
-	if (unlikely(state == NULL))
-		goto no_state;
-	ctx->state = state;
-	nfs4_close_state(state, filp->f_mode);
-	ctx->mode = filp->f_mode;
-	nfs_file_set_open_context(filp, ctx);
-	put_nfs_open_context(ctx);
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_begin_data_update(inode);
-	return 0;
-no_state:
-	printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
-	put_nfs_open_context(ctx);
-	return status;
-}
-
-/*
- * Release our state
- */
-static int
-nfs4_proc_file_release(struct inode *inode, struct file *filp)
-{
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_end_data_update(inode);
-	nfs_file_clear_open_context(filp);
-	return 0;
-}
-
 static inline int nfs4_server_supports_acls(struct nfs_server *server)
 {
 	return (server->caps & NFS_CAP_ACLS)
@@ -2279,7 +2429,7 @@ static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size
 			return -ENOMEM;
 		args.acl_pages[0] = localpage;
 		args.acl_pgbase = 0;
-		args.acl_len = PAGE_SIZE;
+		resp_len = args.acl_len = PAGE_SIZE;
 	} else {
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
@@ -2339,6 +2489,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 
 	if (!nfs4_server_supports_acls(server))
 		return -EOPNOTSUPP;
+	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
 	if (ret == 0)
@@ -2347,7 +2498,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 }
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 
@@ -2412,14 +2563,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 		*timeout = NFS4_POLL_RETRY_MAX;
 	rpc_clnt_sigmask(clnt, &oldset);
 	if (clnt->cl_intr) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(*timeout);
+		schedule_timeout_interruptible(*timeout);
 		if (signalled())
 			res = -ERESTARTSYS;
-	} else {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(*timeout);
-	}
+	} else
+		schedule_timeout_uninterruptible(*timeout);
 	rpc_clnt_sigunmask(clnt, &oldset);
 	*timeout <<= 1;
 	return res;
@@ -2428,7 +2576,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
-int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 	int ret = errorcode;
@@ -2447,12 +2595,10 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
-			if (ret == 0)
-				exception->retry = 1;
-			break;
+			if (ret != 0)
+				break;
 		case -NFS4ERR_OLD_STATEID:
-			if (ret == 0)
-				exception->retry = 1;
+			exception->retry = 1;
 	}
 	/* We failed to handle the error */
 	return nfs4_map_errors(ret);
@@ -2572,8 +2718,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(timeout);
+	schedule_timeout_interruptible(timeout);
 	timeout <<= 1;
 	if (timeout > NFS4_LOCK_MAXTIMEOUT)
 		return NFS4_LOCK_MAXTIMEOUT;
@@ -2630,7 +2775,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
@@ -2657,7 +2801,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		status = 0;
 	}
 out:
-	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	return status;
 }
@@ -2694,79 +2837,153 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 	return res;
 }
 
-static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+struct nfs4_unlockdata {
+	struct nfs_lockargs arg;
+	struct nfs_locku_opargs luargs;
+	struct nfs_lockres res;
+	struct nfs4_lock_state *lsp;
+	struct nfs_open_context *ctx;
+	atomic_t refcount;
+	struct completion completion;
+};
+
+static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
 {
-	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
-	struct nfs_lockargs arg = {
-		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
-	};
-	struct nfs_lockres res = {
-		.server = server,
-	};
+	if (atomic_dec_and_test(&calldata->refcount)) {
+		nfs_free_seqid(calldata->luargs.seqid);
+		nfs4_put_lock_state(calldata->lsp);
+		put_nfs_open_context(calldata->ctx);
+		kfree(calldata);
+	}
+}
+
+static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
+{
+	complete(&calldata->completion);
+	nfs4_locku_release_calldata(calldata);
+}
+
+static void nfs4_locku_done(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+
+	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
+	switch (task->tk_status) {
+		case 0:
+			memcpy(calldata->lsp->ls_stateid.data,
+					calldata->res.u.stateid.data,
+					sizeof(calldata->lsp->ls_stateid.data));
+			break;
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
+			break;
+		default:
+			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
+			}
+	}
+	nfs4_locku_complete(calldata);
+}
+
+static void nfs4_locku_begin(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= state->owner->so_cred,
+		.rpc_argp       = &calldata->arg,
+		.rpc_resp       = &calldata->res,
+		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
 	};
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
+	if (status != 0)
+		return;
+	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
+		nfs4_locku_complete(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	rpc_call_setup(task, &msg, 0);
+}
+
+static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_unlockdata *calldata;
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp;
-	struct nfs_locku_opargs luargs;
 	int status;
-			
-	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
+
+	/* Is this a delegated lock? */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+		return do_vfs_lock(request->fl_file, request);
+
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
-		goto out;
+		return status;
 	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
 	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
-		goto out;
-	luargs.seqid = lsp->ls_seqid;
-	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
-	arg.u.locku = &luargs;
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_lock_seqid(status, lsp);
-
-	if (status == 0)
-		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
-				sizeof(lsp->ls_stateid));
-out:
-	up(&state->lock_sema);
+		return 0;
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (calldata->luargs.seqid == NULL) {
+		kfree(calldata);
+		return -ENOMEM;
+	}
+	calldata->luargs.stateid = &lsp->ls_stateid;
+	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.type = nfs4_lck_type(cmd, request);
+	calldata->arg.offset = request->fl_start;
+	calldata->arg.length = nfs4_lck_length(request);
+	calldata->arg.u.locku = &calldata->luargs;
+	calldata->res.server = server;
+	calldata->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+
+	/* Ensure we don't close file until we're done freeing locks! */
+	calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
+
+	atomic_set(&calldata->refcount, 2);
+	init_completion(&calldata->completion);
+
+	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
+			nfs4_locku_done, calldata);
 	if (status == 0)
-		do_vfs_lock(request->fl_file, request);
-	up_read(&clp->cl_sem);
+		wait_for_completion_interruptible(&calldata->completion);
+	do_vfs_lock(request->fl_file, request);
+	nfs4_locku_release_calldata(calldata);
 	return status;
 }
 
-static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
-{
-	struct nfs4_exception exception = { };
-	int err;
-
-	do {
-		err = nfs4_handle_exception(NFS_SERVER(state->inode),
-				_nfs4_proc_unlck(state, cmd, request),
-				&exception);
-	} while (exception.retry);
-	return err;
-}
-
 static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
+	struct nfs_lock_opargs largs = {
+		.lock_stateid = &lsp->ls_stateid,
+		.open_stateid = &state->stateid,
+		.lock_owner = {
+			.clientid = server->nfs4_state->cl_clientid,
+			.id = lsp->ls_id,
+		},
+		.reclaim = reclaim,
+	};
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
 		.offset = request->fl_start,
 		.length = nfs4_lck_length(request),
+		.u = {
+			.lock = &largs,
+		},
 	};
 	struct nfs_lockres res = {
 		.server = server,
@@ -2777,53 +2994,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		.rpc_resp       = &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
-	struct nfs_lock_opargs largs = {
-		.reclaim = reclaim,
-		.new_lock_owner = 0,
-	};
-	int status;
+	int status = -ENOMEM;
 
-	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
+	largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (largs.lock_seqid == NULL)
+		return -ENOMEM;
+	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
 		struct nfs4_state_owner *owner = state->owner;
-		struct nfs_open_to_lock otl = {
-			.lock_owner = {
-				.clientid = server->nfs4_state->cl_clientid,
-			},
-		};
-
-		otl.lock_seqid = lsp->ls_seqid;
-		otl.lock_owner.id = lsp->ls_id;
-		memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
-		largs.u.open_lock = &otl;
+
+		largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
+		if (largs.open_seqid == NULL)
+			goto out;
 		largs.new_lock_owner = 1;
-		arg.u.lock = &largs;
-		down(&owner->so_sema);
-		otl.open_seqid = owner->so_seqid;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment open_owner seqid on success, and 
-		* seqid mutating errors */
-		nfs4_increment_seqid(status, owner);
-		up(&owner->so_sema);
-		if (status == 0) {
-			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-			lsp->ls_seqid++;
+		/* increment open seqid on success, and seqid mutating errors */
+		if (largs.new_lock_owner != 0) {
+			nfs_increment_open_seqid(status, largs.open_seqid);
+			if (status == 0)
+				nfs_confirm_seqid(&lsp->ls_seqid, 0);
 		}
-	} else {
-		struct nfs_exist_lock el = {
-			.seqid = lsp->ls_seqid,
-		};
-		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
-		largs.u.exist_lock = &el;
-		arg.u.lock = &largs;
+		nfs_free_seqid(largs.open_seqid);
+	} else
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment seqid on success, and * seqid mutating errors*/
-		nfs4_increment_lock_seqid(status, lsp);
-	}
+	/* increment lock seqid on success, and seqid mutating errors*/
+	nfs_increment_lock_seqid(status, largs.lock_seqid);
 	/* save the returned stateid. */
-	if (status == 0)
-		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-	else if (status == -NFS4ERR_DENIED)
+	if (status == 0) {
+		memcpy(lsp->ls_stateid.data, res.u.stateid.data,
+				sizeof(lsp->ls_stateid.data));
+		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+	} else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
+out:
+	nfs_free_seqid(largs.lock_seqid);
 	return status;
 }
 
@@ -2833,6 +3036,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
 	struct nfs4_exception exception = { };
 	int err;
 
+	/* Cache the lock if possible... */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+		return 0;
 	do {
 		err = _nfs4_do_setlk(state, F_SETLK, request, 1);
 		if (err != -NFS4ERR_DELAY)
@@ -2848,6 +3054,9 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 	struct nfs4_exception exception = { };
 	int err;
 
+	err = nfs4_set_lock_state(state, request);
+	if (err != 0)
+		return err;
 	do {
 		err = _nfs4_do_setlk(state, F_SETLK, request, 0);
 		if (err != -NFS4ERR_DELAY)
@@ -2863,17 +3072,25 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 
 	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
-	status = nfs4_set_lock_state(state, request);
-	if (status == 0)
-		status = _nfs4_do_setlk(state, cmd, request, 0);
-	up(&state->lock_sema);
-	if (status == 0) {
-		/* Note: we always want to sleep here! */
-		request->fl_flags |= FL_SLEEP;
-		if (do_vfs_lock(request->fl_file, request) < 0)
-			printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+	/* Is this a delegated open? */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		/* Yes: cache locks! */
+		status = do_vfs_lock(request->fl_file, request);
+		/* ...but avoid races with delegation recall... */
+		if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags))
+			goto out;
 	}
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
+		goto out;
+	status = _nfs4_do_setlk(state, cmd, request, 0);
+	if (status != 0)
+		goto out;
+	/* Note: we always want to sleep here! */
+	request->fl_flags |= FL_SLEEP;
+	if (do_vfs_lock(request->fl_file, request) < 0)
+		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+out:
 	up_read(&clp->cl_sem);
 	return status;
 }
@@ -2927,6 +3144,24 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 	return status;
 }
 
+int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
+{
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	err = nfs4_set_lock_state(state, fl);
+	if (err != 0)
+		goto out;
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, fl, 0);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+out:
+	return err;
+}
 
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
@@ -3022,8 +3257,8 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.read_setup	= nfs4_proc_read_setup,
 	.write_setup	= nfs4_proc_write_setup,
 	.commit_setup	= nfs4_proc_commit_setup,
-	.file_open      = nfs4_proc_file_open,
-	.file_release   = nfs4_proc_file_release,
+	.file_open      = nfs_open,
+	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
 };
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afe587d82f1..52a26baa114 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void)
 {
 	struct nfs4_state_owner *sp;
 
-	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 	if (!sp)
 		return NULL;
-	init_MUTEX(&sp->so_sema);
-	sp->so_seqid = 0;                 /* arbitrary */
+	spin_lock_init(&sp->so_lock);
 	INIT_LIST_HEAD(&sp->so_states);
 	INIT_LIST_HEAD(&sp->so_delegations);
+	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+	sp->so_seqid.sequence = &sp->so_sequence;
+	spin_lock_init(&sp->so_sequence.lock);
+	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
 	return sp;
 }
@@ -359,35 +362,25 @@ nfs4_alloc_open_state(void)
 	memset(state->stateid.data, 0, sizeof(state->stateid.data));
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
-	init_MUTEX(&state->lock_sema);
 	spin_lock_init(&state->state_lock);
 	return state;
 }
 
-static struct nfs4_state *
-__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+void
+nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs4_state *state;
-
-	mode &= (FMODE_READ|FMODE_WRITE);
-	list_for_each_entry(state, &nfsi->open_states, inode_states) {
-		if (state->owner->so_cred != cred)
-			continue;
-		if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
-			continue;
-		if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
-			continue;
-		if ((state->state & mode) != mode)
-			continue;
-		atomic_inc(&state->count);
-		if (mode & FMODE_READ)
-			state->nreaders++;
+	if (state->state == mode)
+		return;
+	/* NB! List reordering - see the reclaim code for why.  */
+	if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
 		if (mode & FMODE_WRITE)
-			state->nwriters++;
-		return state;
+			list_move(&state->open_states, &state->owner->so_states);
+		else
+			list_move_tail(&state->open_states, &state->owner->so_states);
 	}
-	return NULL;
+	if (mode == 0)
+		list_del_init(&state->inode_states);
+	state->state = mode;
 }
 
 static struct nfs4_state *
@@ -398,7 +391,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
 		/* Is this in the process of being freed? */
-		if (state->nreaders == 0 && state->nwriters == 0)
+		if (state->state == 0)
 			continue;
 		if (state->owner == owner) {
 			atomic_inc(&state->count);
@@ -408,17 +401,6 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 	return NULL;
 }
 
-struct nfs4_state *
-nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
-{
-	struct nfs4_state *state;
-
-	spin_lock(&inode->i_lock);
-	state = __nfs4_find_state(inode, cred, mode);
-	spin_unlock(&inode->i_lock);
-	return state;
-}
-
 static void
 nfs4_free_open_state(struct nfs4_state *state)
 {
@@ -437,21 +419,23 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 	if (state)
 		goto out;
 	new = nfs4_alloc_open_state();
+	spin_lock(&owner->so_lock);
 	spin_lock(&inode->i_lock);
 	state = __nfs4_find_state_byowner(inode, owner);
 	if (state == NULL && new != NULL) {
 		state = new;
-		/* Caller *must* be holding owner->so_sem */
-		/* Note: The reclaim code dictates that we add stateless
-		 * and read-only stateids to the end of the list */
-		list_add_tail(&state->open_states, &owner->so_states);
 		state->owner = owner;
 		atomic_inc(&owner->so_count);
 		list_add(&state->inode_states, &nfsi->open_states);
 		state->inode = igrab(inode);
 		spin_unlock(&inode->i_lock);
+		/* Note: The reclaim code dictates that we add stateless
+		 * and read-only stateids to the end of the list */
+		list_add_tail(&state->open_states, &owner->so_states);
+		spin_unlock(&owner->so_lock);
 	} else {
 		spin_unlock(&inode->i_lock);
+		spin_unlock(&owner->so_lock);
 		if (new)
 			nfs4_free_open_state(new);
 	}
@@ -461,68 +445,59 @@ out:
 
 /*
  * Beware! Caller must be holding exactly one
- * reference to clp->cl_sem and owner->so_sema!
+ * reference to clp->cl_sem!
  */
 void nfs4_put_open_state(struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
 
-	if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
+	if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
 		return;
+	spin_lock(&inode->i_lock);
 	if (!list_empty(&state->inode_states))
 		list_del(&state->inode_states);
-	spin_unlock(&inode->i_lock);
 	list_del(&state->open_states);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&owner->so_lock);
 	iput(inode);
-	BUG_ON (state->state != 0);
 	nfs4_free_open_state(state);
 	nfs4_put_state_owner(owner);
 }
 
 /*
- * Beware! Caller must be holding no references to clp->cl_sem!
- * of owner->so_sema!
+ * Close the current file.
  */
 void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 {
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	struct nfs4_client *clp = owner->so_client;
-	int newstate;
+	int oldstate, newstate = 0;
 
 	atomic_inc(&owner->so_count);
-	down_read(&clp->cl_sem);
-	down(&owner->so_sema);
 	/* Protect against nfs4_find_state() */
+	spin_lock(&owner->so_lock);
 	spin_lock(&inode->i_lock);
 	if (mode & FMODE_READ)
 		state->nreaders--;
 	if (mode & FMODE_WRITE)
 		state->nwriters--;
-	if (state->nwriters == 0) {
-		if (state->nreaders == 0)
-			list_del_init(&state->inode_states);
-		/* See reclaim code */
-		list_move_tail(&state->open_states, &owner->so_states);
+	oldstate = newstate = state->state;
+	if (state->nreaders == 0)
+		newstate &= ~FMODE_READ;
+	if (state->nwriters == 0)
+		newstate &= ~FMODE_WRITE;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		nfs4_state_set_mode_locked(state, newstate);
+		oldstate = newstate;
 	}
 	spin_unlock(&inode->i_lock);
-	newstate = 0;
-	if (state->state != 0) {
-		if (state->nreaders)
-			newstate |= FMODE_READ;
-		if (state->nwriters)
-			newstate |= FMODE_WRITE;
-		if (state->state == newstate)
-			goto out;
-		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
-			return;
-	}
-out:
+	spin_unlock(&owner->so_lock);
+
+	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+		return;
 	nfs4_put_open_state(state);
-	up(&owner->so_sema);
 	nfs4_put_state_owner(owner);
-	up_read(&clp->cl_sem);
 }
 
 /*
@@ -546,19 +521,16 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema
  */
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
 	struct nfs4_client *clp = state->owner->so_client;
 
-	lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
 		return NULL;
-	lsp->ls_flags = 0;
-	lsp->ls_seqid = 0;	/* arbitrary */
-	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+	lsp->ls_seqid.sequence = &state->owner->so_sequence;
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
@@ -572,7 +544,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema and clp->cl_sem
+ * The caller must be holding clp->cl_sem
  */
 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
@@ -605,7 +577,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
  * Release reference to lock_state, and free it if we see that
  * it is no longer in use
  */
-static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
 	struct nfs4_state *state;
 
@@ -673,29 +645,94 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
 	nfs4_put_lock_state(lsp);
 }
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
+	struct nfs_seqid *new;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (new != NULL) {
+		new->sequence = counter;
+		INIT_LIST_HEAD(&new->list);
+	}
+	return new;
+}
+
+void nfs_free_seqid(struct nfs_seqid *seqid)
+{
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+
+	if (!list_empty(&seqid->list)) {
+		spin_lock(&sequence->lock);
+		list_del(&seqid->list);
+		spin_unlock(&sequence->lock);
+	}
+	rpc_wake_up_next(&sequence->wait);
+	kfree(seqid);
 }
 
 /*
-* Called with sp->so_sema and clp->cl_sem held.
-*
-* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
-* failed with a seqid incrementing error -
-* see comments nfs_fs.h:seqid_mutating_error()
-*/
-void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
-{
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		sp->so_seqid++;
-	/* If the server returns BAD_SEQID, unhash state_owner here */
-	if (status == -NFS4ERR_BAD_SEQID)
+ * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+{
+	switch (status) {
+		case 0:
+			break;
+		case -NFS4ERR_BAD_SEQID:
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_BAD_STATEID:
+		case -NFS4ERR_BADXDR:
+		case -NFS4ERR_RESOURCE:
+		case -NFS4ERR_NOFILEHANDLE:
+			/* Non-seqid mutating errors */
+			return;
+	};
+	/*
+	 * Note: no locking needed as we are guaranteed to be first
+	 * on the sequence list
+	 */
+	seqid->sequence->counter++;
+}
+
+void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
+{
+	if (status == -NFS4ERR_BAD_SEQID) {
+		struct nfs4_state_owner *sp = container_of(seqid->sequence,
+				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
+	}
+	return nfs_increment_seqid(status, seqid);
+}
+
+/*
+ * Increment the seqid if the LOCK/LOCKU succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
+{
+	return nfs_increment_seqid(status, seqid);
+}
+
+int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
+{
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	int status = 0;
+
+	if (sequence->list.next == &seqid->list)
+		goto out;
+	spin_lock(&sequence->lock);
+	if (!list_empty(&sequence->list)) {
+		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+		status = -EAGAIN;
+	} else
+		list_add(&seqid->list, &sequence->list);
+	spin_unlock(&sequence->lock);
+out:
+	return status;
 }
 
 static int reclaimer(void *);
@@ -747,7 +784,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
 	int status = 0;
 
 	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_POSIX))
+		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
 		if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
 			continue;
@@ -762,7 +799,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
 			case -NFS4ERR_NO_GRACE:
 			case -NFS4ERR_RECLAIM_BAD:
 			case -NFS4ERR_RECLAIM_CONFLICT:
-				/* kill_proc(fl->fl_owner, SIGLOST, 1); */
+				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 				break;
 			case -NFS4ERR_STALE_CLIENTID:
 				goto out_err;
@@ -791,8 +828,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
 		if (state->state == 0)
 			continue;
 		status = ops->recover_open(sp, state);
-		list_for_each_entry(lock, &state->lock_states, ls_locks)
-			lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
 		if (status >= 0) {
 			status = nfs4_reclaim_locks(ops, state);
 			if (status < 0)
@@ -831,6 +866,28 @@ out_err:
 	return status;
 }
 
+static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+	struct nfs4_state *state;
+	struct nfs4_lock_state *lock;
+
+	/* Reset all sequence ids to zero */
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		sp->so_seqid.counter = 0;
+		sp->so_seqid.flags = 0;
+		spin_lock(&sp->so_lock);
+		list_for_each_entry(state, &sp->so_states, open_states) {
+			list_for_each_entry(lock, &state->lock_states, ls_locks) {
+				lock->ls_seqid.counter = 0;
+				lock->ls_seqid.flags = 0;
+				lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+			}
+		}
+		spin_unlock(&sp->so_lock);
+	}
+}
+
 static int reclaimer(void *ptr)
 {
 	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
@@ -864,6 +921,7 @@ restart_loop:
 		default:
 			ops = &nfs4_network_partition_recovery_ops;
 	};
+	nfs4_state_mark_reclaim(clp);
 	status = __nfs4_init_client(clp);
 	if (status)
 		goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6c564ef9489..fbbace8a30c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -95,6 +95,8 @@ static int nfs_stat_to_errno(int);
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
+#define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+#define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
 #define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
 #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
@@ -157,16 +159,20 @@ static int nfs_stat_to_errno(int);
 				op_decode_hdr_maxsz + 2)
 #define NFS4_enc_write_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				op_encode_hdr_maxsz + 8)
+				op_encode_hdr_maxsz + 8 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_write_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 4)
+				op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_commit_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				op_encode_hdr_maxsz + 3)
+				op_encode_hdr_maxsz + 3 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_commit_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 2)
+				op_decode_hdr_maxsz + 2 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + \
@@ -196,17 +202,21 @@ static int nfs_stat_to_errno(int);
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + 7)
+                                op_encode_hdr_maxsz + 7 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_open_downgrade_sz \
 				(compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4)
+                                op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_close_sz       (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + 5)
+                                op_encode_hdr_maxsz + 5 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_close_sz       (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4)
+                                op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
@@ -300,30 +310,44 @@ static int nfs_stat_to_errno(int);
 				decode_getfh_maxsz)
 #define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				encode_remove_maxsz)
+				encode_remove_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 5)
+				op_decode_hdr_maxsz + 5 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
-				encode_rename_maxsz)
+				encode_rename_maxsz + \
+				encode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
-				decode_rename_maxsz)
+				decode_rename_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
-				encode_link_maxsz)
+				encode_link_maxsz + \
+				decode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
-				decode_link_maxsz)
+				decode_link_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_symlink_maxsz + \
@@ -336,14 +360,20 @@ static int nfs_stat_to_errno(int);
 				decode_getfh_maxsz)
 #define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
 				encode_create_maxsz + \
+				encode_getfh_maxsz + \
 				encode_getattr_maxsz + \
-				encode_getfh_maxsz)
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
 				decode_create_maxsz + \
+				decode_getfh_maxsz + \
 				decode_getattr_maxsz + \
-				decode_getfh_maxsz)
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
@@ -602,10 +632,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_CLOSE);
-	WRITE32(arg->seqid);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITE32(arg->seqid->sequence->counter);
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	
 	return 0;
 }
@@ -729,22 +759,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	WRITE64(arg->length);
 	WRITE32(opargs->new_lock_owner);
 	if (opargs->new_lock_owner){
-		struct nfs_open_to_lock *ol = opargs->u.open_lock;
-
 		RESERVE_SPACE(40);
-		WRITE32(ol->open_seqid);
-		WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
-		WRITE32(ol->lock_seqid);
-		WRITE64(ol->lock_owner.clientid);
+		WRITE32(opargs->open_seqid->sequence->counter);
+		WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
+		WRITE64(opargs->lock_owner.clientid);
 		WRITE32(4);
-		WRITE32(ol->lock_owner.id);
+		WRITE32(opargs->lock_owner.id);
 	}
 	else {
-		struct nfs_exist_lock *el = opargs->u.exist_lock;
-
 		RESERVE_SPACE(20);
-		WRITEMEM(&el->stateid, sizeof(el->stateid));
-		WRITE32(el->seqid);
+		WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
 	}
 
 	return 0;
@@ -775,8 +801,8 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
 	WRITE32(arg->type);
-	WRITE32(opargs->seqid);
-	WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
+	WRITE32(opargs->seqid->sequence->counter);
+	WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
 	WRITE64(arg->offset);
 	WRITE64(arg->length);
 
@@ -826,7 +852,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
  */
 	RESERVE_SPACE(8);
 	WRITE32(OP_OPEN);
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	RESERVE_SPACE(16);
 	WRITE64(arg->clientid);
@@ -941,7 +967,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 	RESERVE_SPACE(8+sizeof(arg->stateid.data));
 	WRITE32(OP_OPEN_CONFIRM);
 	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 
 	return 0;
 }
@@ -950,10 +976,10 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_DOWNGRADE);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	return 0;
 }
@@ -1117,6 +1143,17 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
 }
 
 static int
+encode_restorefh(struct xdr_stream *xdr)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_RESTOREFH);
+
+	return 0;
+}
+
+static int
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
 {
 	uint32_t *p;
@@ -1296,14 +1333,18 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	if ((status = encode_putfh(&xdr, args->fh)) == 0)
-		status = encode_remove(&xdr, args->name);
+	if ((status = encode_putfh(&xdr, args->fh)) != 0)
+		goto out;
+	if ((status = encode_remove(&xdr, args->name)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
 	return status;
 }
 
@@ -1314,7 +1355,7 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1326,7 +1367,13 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
 		goto out;
 	if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
 		goto out;
-	status = encode_rename(&xdr, args->old_name, args->new_name);
+	if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0)
+		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1338,7 +1385,7 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1350,7 +1397,13 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
 		goto out;
 	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
 		goto out;
-	status = encode_link(&xdr, args->name);
+	if ((status = encode_link(&xdr, args->name)) != 0)
+		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1362,7 +1415,7 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1370,10 +1423,16 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
 	encode_compound_hdr(&xdr, &hdr);
 	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
 		goto out;
+	if ((status = encode_savefh(&xdr)) != 0)
+		goto out;
 	if ((status = encode_create(&xdr, args)) != 0)
 		goto out;
 	if ((status = encode_getfh(&xdr)) != 0)
 		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
 	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
@@ -1412,7 +1471,7 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr = {
-                .nops   = 2,
+                .nops   = 3,
         };
         int status;
 
@@ -1422,6 +1481,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
         if(status)
                 goto out;
         status = encode_close(&xdr, args);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
         return status;
 }
@@ -1433,15 +1495,21 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
 	if (status)
 		goto out;
+	status = encode_savefh(&xdr);
+	if (status)
+		goto out;
 	status = encode_open(&xdr, args);
 	if (status)
 		goto out;
@@ -1449,6 +1517,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	if (status)
 		goto out;
 	status = encode_getfattr(&xdr, args->bitmask);
+	if (status)
+		goto out;
+	status = encode_restorefh(&xdr);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1464,6 +1538,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1485,6 +1562,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1502,7 +1582,7 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops	= 2,
+		.nops	= 3,
 	};
 	int status;
 
@@ -1512,6 +1592,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 	if (status)
 		goto out;
 	status = encode_open_downgrade(&xdr, args);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1525,8 +1608,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
+	struct nfs_lock_opargs *opargs = args->u.lock;
 	int status;
 
+	status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
+	if (status != 0)
+		goto out;
+	/* Do we need to do an open_to_lock_owner? */
+	if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+		opargs->new_lock_owner = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1713,7 +1803,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
@@ -1723,6 +1813,9 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
 	if (status)
 		goto out;
 	status = encode_write(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1734,7 +1827,7 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
@@ -1744,6 +1837,9 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
 	if (status)
 		goto out;
 	status = encode_commit(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -2670,8 +2766,7 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
 		goto xdr_error;
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 	
@@ -2704,8 +2799,7 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2730,8 +2824,7 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2787,13 +2880,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 		goto xdr_error;
 	if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
 		goto xdr_error;
-	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) {
+	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
 		fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
-		fattr->timestamp = jiffies;
-	}
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2826,8 +2916,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2890,8 +2979,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCK);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	} else if (status == -NFS4ERR_DENIED)
 		return decode_lock_denied(xdr, &res->u.denied);
 	return status;
@@ -2913,8 +3002,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	}
 	return status;
 }
@@ -2994,7 +3083,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
         p += bmlen;
 	return decode_delegation(xdr, res);
 xdr_error:
-	printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__);
+	dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
 	return -EIO;
 }
 
@@ -3208,6 +3297,12 @@ static int decode_renew(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 
+static int
+decode_restorefh(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_RESTOREFH);
+}
+
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		size_t *acl_len)
 {
@@ -3243,7 +3338,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		if (attrlen <= *acl_len)
 			xdr_read_pages(xdr, attrlen);
 		*acl_len = attrlen;
-	}
+	} else
+		status = -EOPNOTSUPP;
 
 out:
 	return status;
@@ -3352,6 +3448,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, stru
         if (status)
                 goto out;
         status = decode_open_downgrade(&xdr, res);
+	if (status != 0)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
         return status;
 }
@@ -3424,7 +3523,7 @@ out:
 /*
  * Decode REMOVE response
  */
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3433,8 +3532,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
 		goto out;
-	if ((status = decode_putfh(&xdr)) == 0)
-		status = decode_remove(&xdr, cinfo);
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -3457,7 +3559,14 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
-	status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo);
+	if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+		goto out;
+	/* Current FH is target directory */
+	if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->old_fattr, res->server);
 out:
 	return status;
 }
@@ -3465,7 +3574,7 @@ out:
 /*
  * Decode LINK response
  */
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3480,7 +3589,17 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ch
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
-	status = decode_link(&xdr, cinfo);
+	if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+		goto out;
+	/*
+	 * Note order: OP_LINK leaves the directory as the current
+	 *             filehandle.
+	 */
+	if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -3499,13 +3618,17 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
+	if ((status = decode_savefh(&xdr)) != 0)
+		goto out;
 	if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
 		goto out;
 	if ((status = decode_getfh(&xdr, res->fh)) != 0)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server);
-	if (status == NFS4ERR_DELAY)
-		status = 0;
+	if (decode_getfattr(&xdr, res->fattr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_fattr, res->server);
 out:
 	return status;
 }
@@ -3623,6 +3746,15 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_cl
         if (status)
                 goto out;
         status = decode_close(&xdr, res);
+	if (status != 0)
+		goto out;
+	/*
+	 * Note: Server may do delete on close for this file
+	 * 	in which case the getattr call will fail with
+	 * 	an ESTALE error. Shouldn't be a problem,
+	 * 	though, since fattr->valid will remain unset.
+	 */
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
         return status;
 }
@@ -3643,15 +3775,20 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_ope
         status = decode_putfh(&xdr);
         if (status)
                 goto out;
+        status = decode_savefh(&xdr);
+	if (status)
+		goto out;
         status = decode_open(&xdr, res);
         if (status)
                 goto out;
 	status = decode_getfh(&xdr, &res->fh);
         if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->f_attr, res->server);
-	if (status == NFS4ERR_DELAY)
-		status = 0;
+	if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_attr, res->server);
 out:
         return status;
 }
@@ -3869,6 +4006,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_wr
 	if (status)
 		goto out;
 	status = decode_write(&xdr, res);
+	if (status)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 	if (!status)
 		status = res->count;
 out:
@@ -3892,6 +4032,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_w
 	if (status)
 		goto out;
 	status = decode_commit(&xdr, res);
+	if (status)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index cedf636bcf3..a48a003242c 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -61,7 +61,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
@@ -93,7 +93,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  getattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client, NFSPROC_GETATTR,
 				fhandle, fattr, 0);
 	dprintk("NFS reply getattr: %d\n", status);
@@ -112,8 +112,10 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int	status;
 
 	dprintk("NFS call  setattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	if (status == 0)
+		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
@@ -134,7 +136,7 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
@@ -172,7 +174,7 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
 
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0) {
 		nfs_refresh_inode(inode, fattr);
@@ -201,10 +203,10 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0) {
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 		wdata->res.count = wdata->args.count;
 		wdata->verf.committed = NFS_FILE_SYNC;
 	}
@@ -214,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		int flags)
+		int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
@@ -230,7 +232,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	};
 	int			status;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	if (status == 0)
@@ -271,12 +273,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
 	}
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
-		fattr.valid = 0;
+		nfs_fattr_init(&fattr);
 		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	}
 	if (status == 0)
@@ -303,6 +306,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  remove %s\n", name->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_mark_for_revalidate(dir);
 
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
@@ -329,8 +333,10 @@ nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 {
 	struct rpc_message *msg = &task->tk_msg;
 	
-	if (msg->rpc_argp)
+	if (msg->rpc_argp) {
+		nfs_mark_for_revalidate(dir->d_inode);
 		kfree(msg->rpc_argp);
+	}
 	return 0;
 }
 
@@ -350,6 +356,8 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	nfs_mark_for_revalidate(old_dir);
+	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
@@ -367,6 +375,7 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  link %s\n", name->name);
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
 }
@@ -389,9 +398,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	if (path->len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	fhandle->size = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -414,8 +424,9 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply mkdir: %d\n", status);
@@ -434,6 +445,7 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
 }
@@ -482,7 +494,7 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  statfs\n");
-	stat->fattr->valid = 0;
+	nfs_fattr_init(stat->fattr);
 	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
@@ -505,7 +517,7 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
@@ -577,7 +589,7 @@ nfs_write_done(struct rpc_task *task)
 	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
 
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_writeback_done(task);
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6f866b8aa2d..5f20eafba8e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		if (rdata->res.eof != 0 || result == 0)
 			break;
 	} while (count);
-	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&inode->i_lock);
 
 	if (count)
 		memclear_highpage_flush(page, rdata->args.pgbase, count);
@@ -182,14 +184,13 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
 	unlock_page(req->wb_page);
 
-	nfs_clear_request(req);
-	nfs_release_request(req);
-
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
+	nfs_clear_request(req);
+	nfs_release_request(req);
 }
 
 /*
@@ -214,6 +215,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.eof     = 0;
+	nfs_fattr_init(&data->fattr);
 
 	NFS_PROTO(inode)->read_setup(data);
 
@@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task)
 		}
 		task->tk_status = -EIO;
 	}
-	NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+	spin_lock(&data->inode->i_lock);
+	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&data->inode->i_lock);
 	data->complete(data, status);
 }
 
@@ -503,7 +507,7 @@ int nfs_readpage(struct file *file, struct page *page)
 		goto out_error;
 
 	if (file == NULL) {
-		ctx = nfs_find_open_context(inode, FMODE_READ);
+		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
 			return -EBADF;
 	} else
@@ -572,7 +576,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			nr_pages);
 
 	if (filp == NULL) {
-		desc.ctx = nfs_find_open_context(inode, FMODE_READ);
+		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
 			return -EBADF;
 	} else
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 35f10659914..18dc95b0b64 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -27,26 +27,14 @@
 
 /* Symlink caching in the page cache is even more simplistic
  * and straight-forward than readdir caching.
- *
- * At the beginning of the page we store pointer to struct page in question,
- * simplifying nfs_put_link() (if inode got invalidated we can't find the page
- * to be freed via pagecache lookup).
- * The NUL-terminated string follows immediately thereafter.
  */
 
-struct nfs_symlink {
-	struct page *page;
-	char body[0];
-};
-
 static int nfs_symlink_filler(struct inode *inode, struct page *page)
 {
-	const unsigned int pgbase = offsetof(struct nfs_symlink, body);
-	const unsigned int pglen = PAGE_SIZE - pgbase;
 	int error;
 
 	lock_kernel();
-	error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
+	error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
 	unlock_kernel();
 	if (error < 0)
 		goto error;
@@ -60,11 +48,10 @@ error:
 	return -EIO;
 }
 
-static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
 	struct page *page;
-	struct nfs_symlink *p;
 	void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
 	if (err)
 		goto read_failed;
@@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 		err = ERR_PTR(-EIO);
 		goto getlink_read_error;
 	}
-	p = kmap(page);
-	p->page = page;
-	nd_set_link(nd, p->body);
-	return 0;
+	nd_set_link(nd, kmap(page));
+	return page;
 
 getlink_read_error:
 	page_cache_release(page);
 read_failed:
 	nd_set_link(nd, err);
-	return 0;
+	return NULL;
 }
 
-static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s)) {
-		struct nfs_symlink *p;
-		struct page *page;
-
-		p = container_of(s, struct nfs_symlink, body[0]);
-		page = p->page;
-
+	if (cookie) {
+		struct page *page = cookie;
 		kunmap(page);
 		page_cache_release(page);
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5130eda231d..8f71e766cc5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -294,7 +294,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	if (page->index >= end_index+1 || !offset)
 		goto out;
 do_it:
-	ctx = nfs_find_open_context(inode, FMODE_WRITE);
+	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
 	if (ctx == NULL) {
 		err = -EBADF;
 		goto out;
@@ -734,14 +734,14 @@ int nfs_updatepage(struct file *file, struct page *page,
 		unsigned int offset, unsigned int count)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct dentry	*dentry = file->f_dentry;
 	struct inode	*inode = page->mapping->host;
 	struct nfs_page	*req;
 	int		status = 0;
 
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		count, (long long)(page_offset(page) +offset));
+		file->f_dentry->d_parent->d_name.name,
+		file->f_dentry->d_name.name, count,
+		(long long)(page_offset(page) +offset));
 
 	if (IS_SYNC(inode)) {
 		status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
@@ -850,7 +850,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		unsigned int count, unsigned int offset,
 		int how)
 {
-	struct rpc_task		*task = &data->task;
 	struct inode		*inode;
 
 	/* Set up the RPC argument and reply structs
@@ -870,6 +869,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
 
 	NFS_PROTO(inode)->write_setup(data, how);
 
@@ -880,7 +880,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->task.tk_release = nfs_writedata_release;
 
 	dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
-		task->tk_pid,
+		data->task.tk_pid,
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode),
 		count,
@@ -1216,7 +1216,6 @@ static void nfs_commit_release(struct rpc_task *task)
 static void nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
 	struct nfs_page		*first;
 	struct inode		*inode;
 
@@ -1237,6 +1236,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
 	
 	NFS_PROTO(inode)->commit_setup(data, how);
 
@@ -1246,7 +1246,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	/* Release requests */
 	data->task.tk_release = nfs_commit_release;
 	
-	dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+	dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid);
 }
 
 /*
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 18c58c32e32..0c2be8c0307 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -48,43 +48,26 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
 		(struct nfsacl_encode_desc *) desc;
 	u32 *p = (u32 *) elem;
 
-	if (nfsacl_desc->count < nfsacl_desc->acl->a_count) {
-		struct posix_acl_entry *entry =
-			&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+	struct posix_acl_entry *entry =
+		&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
 
-		*p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
-		switch(entry->e_tag) {
-			case ACL_USER_OBJ:
-				*p++ = htonl(nfsacl_desc->uid);
-				break;
-			case ACL_GROUP_OBJ:
-				*p++ = htonl(nfsacl_desc->gid);
-				break;
-			case ACL_USER:
-			case ACL_GROUP:
-				*p++ = htonl(entry->e_id);
-				break;
-			default:  /* Solaris depends on that! */
-				*p++ = 0;
-				break;
-		}
-		*p++ = htonl(entry->e_perm & S_IRWXO);
-	} else {
-		const struct posix_acl_entry *pa, *pe;
-		int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE;
-
-		FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) {
-			if (pa->e_tag == ACL_GROUP_OBJ) {
-				group_obj_perm = pa->e_perm & S_IRWXO;
-				break;
-			}
-		}
-		/* fake up ACL_MASK entry */
-		*p++ = htonl(ACL_MASK | nfsacl_desc->typeflag);
-		*p++ = htonl(0);
-		*p++ = htonl(group_obj_perm);
+	*p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
+	switch(entry->e_tag) {
+		case ACL_USER_OBJ:
+			*p++ = htonl(nfsacl_desc->uid);
+			break;
+		case ACL_GROUP_OBJ:
+			*p++ = htonl(nfsacl_desc->gid);
+			break;
+		case ACL_USER:
+		case ACL_GROUP:
+			*p++ = htonl(entry->e_id);
+			break;
+		default:  /* Solaris depends on that! */
+			*p++ = 0;
+			break;
 	}
-
+	*p++ = htonl(entry->e_perm & S_IRWXO);
 	return 0;
 }
 
@@ -105,11 +88,28 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
 		.gid = inode->i_gid,
 	};
 	int err;
+	struct posix_acl *acl2 = NULL;
 
 	if (entries > NFS_ACL_MAX_ENTRIES ||
 	    xdr_encode_word(buf, base, entries))
 		return -EINVAL;
+	if (encode_entries && acl && acl->a_count == 3) {
+		/* Fake up an ACL_MASK entry. */
+		acl2 = posix_acl_alloc(4, GFP_KERNEL);
+		if (!acl2)
+			return -ENOMEM;
+		/* Insert entries in canonical order: other orders seem
+		 to confuse Solaris VxFS. */
+		acl2->a_entries[0] = acl->a_entries[0];  /* ACL_USER_OBJ */
+		acl2->a_entries[1] = acl->a_entries[1];  /* ACL_GROUP_OBJ */
+		acl2->a_entries[2] = acl->a_entries[1];  /* ACL_MASK */
+		acl2->a_entries[2].e_tag = ACL_MASK;
+		acl2->a_entries[3] = acl->a_entries[2];  /* ACL_OTHER */
+		nfsacl_desc.acl = acl2;
+	}
 	err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (acl2)
+		posix_acl_release(acl2);
 	if (!err)
 		err = 8 + nfsacl_desc.desc.elem_size *
 			  nfsacl_desc.desc.array_len;
@@ -239,6 +239,7 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
 	if (xdr_decode_word(buf, base, &entries) ||
 	    entries > NFS_ACL_MAX_ENTRIES)
 		return -EINVAL;
+	nfsacl_desc.desc.array_maxlen = entries;
 	err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
 	if (err)
 		return err;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9a11aa39e2e..057aff74550 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -26,6 +26,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/hash.h>
+#include <linux/module.h>
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m,
 }
 	
 struct cache_detail svc_expkey_cache = {
+	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
 	.hash_table	= expkey_table,
 	.name		= "nfsd.fh",
@@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m,
 	return 0;
 }
 struct cache_detail svc_export_cache = {
+	.owner		= THIS_MODULE,
 	.hash_size	= EXPORT_HASHMAX,
 	.hash_table	= export_table,
 	.name		= "nfsd.export",
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5605a26efc5..13369650cdf 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -187,6 +187,7 @@ static int         idtoname_parse(struct cache_detail *, char *, int);
 static struct ent *idtoname_lookup(struct ent *, int);
 
 static struct cache_detail idtoname_cache = {
+	.owner		= THIS_MODULE,
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= idtoname_table,
 	.name		= "nfs4.idtoname",
@@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int);
 static int         nametoid_parse(struct cache_detail *, char *, int);
 
 static struct cache_detail nametoid_cache = {
+	.owner		= THIS_MODULE,
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= nametoid_table,
 	.name		= "nfs4.nametoid",
@@ -404,8 +406,10 @@ nfsd_idmap_init(void)
 void
 nfsd_idmap_shutdown(void)
 {
-	cache_unregister(&idtoname_cache);
-	cache_unregister(&nametoid_cache);
+	if (cache_unregister(&idtoname_cache))
+		printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n");
+	if (cache_unregister(&nametoid_cache))
+		printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
 }
 
 /*
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e08edc17c6a..361b4007d4a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -162,7 +162,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 
 
 static inline int
-nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
@@ -238,8 +238,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	 */
 	status = nfsd4_process_open2(rqstp, current_fh, open);
 out:
-	if (open->op_stateowner)
+	if (open->op_stateowner) {
 		nfs4_get_stateowner(open->op_stateowner);
+		*replay_owner = open->op_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -809,8 +811,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_access(rqstp, current_fh, &op->u.access);
 			break;
 		case OP_CLOSE:
-			op->status = nfsd4_close(rqstp, current_fh, &op->u.close);
-			replay_owner = op->u.close.cl_stateowner;
+			op->status = nfsd4_close(rqstp, current_fh, &op->u.close, &replay_owner);
 			break;
 		case OP_COMMIT:
 			op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit);
@@ -831,15 +832,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link);
 			break;
 		case OP_LOCK:
-			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock);
-			replay_owner = op->u.lock.lk_stateowner;
+			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock, &replay_owner);
 			break;
 		case OP_LOCKT:
 			op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt);
 			break;
 		case OP_LOCKU:
-			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku);
-			replay_owner = op->u.locku.lu_stateowner;
+			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku, &replay_owner);
 			break;
 		case OP_LOOKUP:
 			op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup);
@@ -853,16 +852,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 				op->status = nfs_ok;
 			break;
 		case OP_OPEN:
-			op->status = nfsd4_open(rqstp, current_fh, &op->u.open);
-			replay_owner = op->u.open.op_stateowner;
+			op->status = nfsd4_open(rqstp, current_fh, &op->u.open, &replay_owner);
 			break;
 		case OP_OPEN_CONFIRM:
-			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm);
-			replay_owner = op->u.open_confirm.oc_stateowner;
+			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm, &replay_owner);
 			break;
 		case OP_OPEN_DOWNGRADE:
-			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade);
-			replay_owner = op->u.open_downgrade.od_stateowner;
+			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade, &replay_owner);
 			break;
 		case OP_PUTFH:
 			op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 57ed50fe7f8..954cf893d50 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -93,7 +93,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_tfm("md5", 0);
+	tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
 	if (tfm == NULL)
 		goto out;
 	cksum.len = crypto_tfm_alg_digestsize(tfm);
@@ -114,8 +114,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 	kfree(cksum.data);
 	status = nfs_ok;
 out:
-	if (tfm)
-		crypto_free_tfm(tfm);
+	crypto_free_tfm(tfm);
 	return status;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b83f8fb441e..6bbefd06f10 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -624,7 +624,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	cb->cb_ident = se->se_callback_ident;
 	return;
 out_err:
-	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
@@ -678,13 +678,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	int 			status;
 	char                    dname[HEXDIR_LEN];
 	
-	status = nfserr_inval;
 	if (!check_name(clname))
-		goto out;
+		return nfserr_inval;
 
 	status = nfs4_make_rec_clidname(dname, &clname);
 	if (status)
-		goto out;
+		return status;
 
 	/* 
 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
@@ -2014,7 +2013,7 @@ STALE_STATEID(stateid_t *stateid)
 {
 	if (stateid->si_boot == boot_time)
 		return 0;
-	printk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
+	dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
 		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
 		stateid->si_generation);
 	return 1;
@@ -2275,7 +2274,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 
 check_replay:
 	if (seqid == sop->so_seqid - 1) {
-		printk("NFSD: preprocess_seqid_op: retransmission?\n");
+		dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
 		/* indicate replay to calling function */
 		return NFSERR_REPLAY_ME;
 	}
@@ -2286,7 +2285,7 @@ check_replay:
 }
 
 int
-nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateowner *sop;
@@ -2320,8 +2319,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 
 	nfsd4_create_clid_dir(sop->so_client);
 out:
-	if (oc->oc_stateowner)
+	if (oc->oc_stateowner) {
 		nfs4_get_stateowner(oc->oc_stateowner);
+		*replay_owner = oc->oc_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2352,7 +2353,7 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 }
 
 int
-nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
+nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateid *stp;
@@ -2394,8 +2395,10 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
 	memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
 	status = nfs_ok;
 out:
-	if (od->od_stateowner)
+	if (od->od_stateowner) {
 		nfs4_get_stateowner(od->od_stateowner);
+		*replay_owner = od->od_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2404,7 +2407,7 @@ out:
  * nfs4_unlock_state() called after encode
  */
 int
-nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close)
+nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateid *stp;
@@ -2430,8 +2433,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
 	/* release_state_owner() calls nfsd_close() if needed */
 	release_state_owner(stp, OPEN_STATE);
 out:
-	if (close->cl_stateowner)
+	if (close->cl_stateowner) {
 		nfs4_get_stateowner(close->cl_stateowner);
+		*replay_owner = close->cl_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2500,8 +2505,7 @@ find_stateid(stateid_t *stid, int flags)
 			    (local->st_stateid.si_fileid == f_id))
 				return local;
 		}
-	} else
-		printk("NFSD: find_stateid: ERROR: no state flag\n");
+	}
 	return NULL;
 }
 
@@ -2624,7 +2628,9 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	sop->so_is_open_owner = 0;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
-	sop->so_seqid = lock->lk_new_lock_seqid;
+	/* It is the openowner seqid that will be incremented in encode in the
+	 * case of new lockowners; so increment the lock seqid manually: */
+	sop->so_seqid = lock->lk_new_lock_seqid + 1;
 	sop->so_confirmed = 1;
 	rp = &sop->so_replay;
 	rp->rp_status = NFSERR_SERVERFAULT;
@@ -2676,9 +2682,10 @@ check_lock_length(u64 offset, u64 length)
  *  LOCK operation 
  */
 int
-nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
+nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateowner *open_sop = NULL;
+	struct nfs4_stateowner *lock_sop = NULL;
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
@@ -2705,19 +2712,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		struct nfs4_file *fp;
 		
 		status = nfserr_stale_clientid;
-		if (STALE_CLIENTID(&lock->lk_new_clientid)) {
-			printk("NFSD: nfsd4_lock: clientid is stale!\n");
+		if (STALE_CLIENTID(&lock->lk_new_clientid))
 			goto out;
-		}
 
 		/* validate and update open stateid and open seqid */
 		status = nfs4_preprocess_seqid_op(current_fh, 
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
 		                        CHECK_FH | OPEN_STATE,
-		                        &open_sop, &open_stp, lock);
+		                        &lock->lk_stateowner, &open_stp,
+					lock);
 		if (status)
 			goto out;
+		open_sop = lock->lk_stateowner;
 		/* create lockowner and lock stateid */
 		fp = open_stp->st_file;
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
@@ -2727,16 +2734,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		 * the same file, or should they just be allowed (and
 		 * create new stateids)? */
 		status = nfserr_resource;
-		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
+		lock_sop = alloc_init_lock_stateowner(strhashval,
+				open_sop->so_client, open_stp, lock);
+		if (lock_sop == NULL)
 			goto out;
-		if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, 
-						fp, open_stp)) == NULL) {
-			release_stateowner(lock->lk_stateowner);
-			lock->lk_stateowner = NULL;
+		lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
+		if (lock_stp == NULL) {
+			release_stateowner(lock_sop);
 			goto out;
 		}
-		/* bump the open seqid used to create the lock */
-		open_sop->so_seqid++;
 	} else {
 		/* lock (lock owner + lock stateid) already exists */
 		status = nfs4_preprocess_seqid_op(current_fh,
@@ -2746,12 +2752,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 				       &lock->lk_stateowner, &lock_stp, lock);
 		if (status)
 			goto out;
+		lock_sop = lock->lk_stateowner;
 	}
 	/* lock->lk_stateowner and lock_stp have been created or found */
 	filp = lock_stp->st_vfs_file;
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
-		printk("NFSD: nfsd4_lock: permission denied!\n");
+		dprintk("NFSD: nfsd4_lock: permission denied!\n");
 		goto out;
 	}
 
@@ -2776,7 +2783,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 			status = nfserr_inval;
 		goto out;
 	}
-	file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner;
+	file_lock.fl_owner = (fl_owner_t)lock_sop;
 	file_lock.fl_pid = current->tgid;
 	file_lock.fl_file = filp;
 	file_lock.fl_flags = FL_POSIX;
@@ -2832,14 +2839,13 @@ out_destroy_new_stateid:
 		 * An error encountered after instantiation of the new
 		 * stateid has forced us to destroy it.
 		 */
-		if (!seqid_mutating_err(status))
-			open_sop->so_seqid--;
-
 		release_state_owner(lock_stp, LOCK_STATE);
 	}
 out:
-	if (lock->lk_stateowner)
+	if (lock->lk_stateowner) {
 		nfs4_get_stateowner(lock->lk_stateowner);
+		*replay_owner = lock->lk_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2866,13 +2872,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	nfs4_lock_state();
 
 	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(&lockt->lt_clientid)) {
-		printk("NFSD: nfsd4_lockt: clientid is stale!\n");
+	if (STALE_CLIENTID(&lockt->lt_clientid))
 		goto out;
-	}
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
-		printk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
+		dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
 		if (status == nfserr_symlink)
 			status = nfserr_inval;
 		goto out;
@@ -2930,7 +2934,7 @@ out:
 }
 
 int
-nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku)
+nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
@@ -2976,7 +2980,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
 		file_lock.fl_ops->fl_release_private(&file_lock);
 	if (status) {
-		printk("NFSD: nfs4_locku: posix_lock_file failed!\n");
+		dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
 		goto out_nfserr;
 	}
 	/*
@@ -2986,8 +2990,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
 
 out:
-	if (locku->lu_stateowner)
+	if (locku->lu_stateowner) {
 		nfs4_get_stateowner(locku->lu_stateowner);
+		*replay_owner = locku->lu_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 
@@ -3036,10 +3042,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 	/* XXX check for lease expiration */
 
 	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid)) {
-		printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n");
+	if (STALE_CLIENTID(clid))
 		return status;
-	}
 
 	nfs4_lock_state();
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 07b9a065e9d..1697539a717 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -287,6 +287,7 @@ out:
 	svc_exit_thread(rqstp);
 
 	/* Release module */
+	unlock_kernel();
 	module_put_and_exit(0);
 }
 
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9709fac6531..50a7749cfca 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,18 +1,15 @@
 ToDo/Notes:
 	- Find and fix bugs.
-	- In between ntfs_prepare/commit_write, need exclusion between
-	  simultaneous file extensions.  This is given to us by holding i_sem
-	  on the inode.  The only places in the kernel when a file is resized
-	  are prepare/commit write and truncate for both of which i_sem is
-	  held.  Just have to be careful in readpage/writepage and all other
-	  helpers not running under i_sem that we play nice...
-	  Also need to be careful with initialized_size extention in
-	  ntfs_prepare_write. Basically, just be _very_ careful in this code...
-	  UPDATE: The only things that need to be checked are read/writepage
-	  which do not hold i_sem.  Note writepage cannot change i_size but it
-	  needs to cope with a concurrent i_size change, just like readpage.
-	  Also both need to cope with concurrent changes to the other sizes,
-	  i.e. initialized/allocated/compressed size, as well.
+	- The only places in the kernel where a file is resized are
+	  ntfs_file_write*() and ntfs_truncate() for both of which i_sem is
+	  held.  Just have to be careful in read-/writepage and other helpers
+	  not running under i_sem that we play nice...  Also need to be careful
+	  with initialized_size extension in ntfs_file_write*() and writepage.
+	  UPDATE: The only things that need to be checked are the compressed
+	  write and the other attribute resize/write cases like index
+	  attributes, etc.  For now none of these are implemented so are safe.
+	- Implement filling in of holes in aops.c::ntfs_writepage() and its
+	  helpers.
 	- Implement mft.c::sync_mft_mirror_umount().  We currently will just
 	  leave the volume dirty on umount if the final iput(vol->mft_ino)
 	  causes a write of any mirrored mft records due to the mft mirror
@@ -22,6 +19,152 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
+2.1.25 - (Almost) fully implement write(2) and truncate(2).
+
+	- Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and
+	  {__,}ntfs_cluster_free() to also take an optional attribute search
+	  context as argument.  This allows calling these functions with the
+	  mft record mapped.  Update all callers.
+	- Fix potential deadlock in ntfs_mft_data_extend_allocation_nolock()
+	  error handling by passing in the active search context when calling
+	  ntfs_cluster_free().
+	- Change ntfs_cluster_alloc() to take an extra boolean parameter
+	  specifying whether the cluster are being allocated to extend an
+	  attribute or to fill a hole.
+	- Change ntfs_attr_make_non_resident() to call ntfs_cluster_alloc()
+	  with @is_extension set to TRUE and remove the runlist terminator
+	  fixup code as this is now done by ntfs_cluster_alloc().
+	- Change ntfs_attr_make_non_resident to take the attribute value size
+	  as an extra parameter.  This is needed since we need to know the size
+	  before we can map the mft record and our callers always know it.  The
+	  reason we cannot simply read the size from the vfs inode i_size is
+	  that this is not necessarily uptodate.  This happens when
+	  ntfs_attr_make_non_resident() is called in the ->truncate call path.
+	- Fix ntfs_attr_make_non_resident() to update the vfs inode i_blocks
+	  which is zero for a resident attribute but should no longer be zero
+	  once the attribute is non-resident as it then has real clusters
+	  allocated.
+	- Add fs/ntfs/attrib.[hc]::ntfs_attr_extend_allocation(), a function to
+	  extend the allocation of an attributes.  Optionally, the data size,
+	  but not the initialized size can be extended, too.
+	- Implement fs/ntfs/inode.[hc]::ntfs_truncate().  It only supports
+	  uncompressed and unencrypted files and it never creates sparse files
+	  at least for the moment (making a file sparse requires us to modify
+	  its directory entries and we do not support directory operations at
+	  the moment).  Also, support for highly fragmented files, i.e. ones
+	  whose data attribute is split across multiple extents, is severly
+	  limited.  When such a case is encountered, EOPNOTSUPP is returned.
+	- Enable ATTR_SIZE attribute changes in ntfs_setattr().  This completes
+	  the initial implementation of file truncation.  Now both open(2)ing
+	  a file with the O_TRUNC flag and the {,f}truncate(2) system calls
+	  will resize a file appropriately.  The limitations are that only
+	  uncompressed and unencrypted files are supported.  Also, there is
+	  only very limited support for highly fragmented files (the ones whose
+	  $DATA attribute is split into multiple attribute extents).
+	- In attrib.c::ntfs_attr_set() call balance_dirty_pages_ratelimited()
+	  and cond_resched() in the main loop as we could be dirtying a lot of
+	  pages and this ensures we play nice with the VM and the system as a
+	  whole.
+	- Implement file operations ->write, ->aio_write, ->writev for regular
+	  files.  This replaces the old use of generic_file_write(), et al and
+	  the address space operations ->prepare_write and ->commit_write.
+	  This means that both sparse and non-sparse (unencrypted and
+	  uncompressed) files can now be extended using the normal write(2)
+	  code path.  There are two limitations at present and these are that
+	  we never create sparse files and that we only have limited support
+	  for highly fragmented files, i.e. ones whose data attribute is split
+	  across multiple extents.   When such a case is encountered,
+	  EOPNOTSUPP is returned.
+	- $EA attributes can be both resident and non-resident.
+	- Use %z for size_t to fix compilation warnings.  (Andrew Morton)
+	- Fix compilation warnings with gcc-4.0.2 on SUSE 10.0.
+	- Document extended attribute ($EA) NEED_EA flag.  (Based on libntfs
+	  patch by Yura Pakhuchiy.)
+
+2.1.24 - Lots of bug fixes and support more clean journal states.
+
+	- Support journals ($LogFile) which have been modified by chkdsk.  This
+	  means users can boot into Windows after we marked the volume dirty.
+	  The Windows boot will run chkdsk and then reboot.  The user can then
+	  immediately boot into Linux rather than having to do a full Windows
+	  boot first before rebooting into Linux and we will recognize such a
+	  journal and empty it as it is clean by definition.  Note, this only
+	  works if chkdsk left the journal in an obviously clean state.
+	- Support journals ($LogFile) with only one restart page as well as
+	  journals with two different restart pages.  We sanity check both and
+	  either use the only sane one or the more recent one of the two in the
+	  case that both are valid.
+	- Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to
+	  ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and
+	  hence cannot fail.
+	- Use ntfs_malloc_nofs_nofail() in the two critical regions in
+	  fs/ntfs/runlist.c::ntfs_runlists_merge().  This means we no longer
+	  need to panic() if the allocation fails as it now cannot fail.
+	- Fix two nasty runlist merging bugs that had gone unnoticed so far.
+	  Thanks to Stefano Picerno for the bug report.
+	- Remove two bogus BUG_ON()s from fs/ntfs/mft.c.
+	- Fix handling of valid but empty mapping pairs array in
+	  fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress().
+	- Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING
+	  messages and include the inode number.  Thanks to Yura Pakhuchiy for
+	  pointing this out.
+	- Change ntfs_rl_truncate_nolock() to throw away the runlist if the new
+	  length is zero.
+	- Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller
+	  specified hole into a runlist.
+	- Fix a bug in fs/ntfs/index.c::ntfs_index_lookup().  When the returned
+	  index entry is in the index root, we forgot to set the @ir pointer in
+	  the index context.  Thanks to Yura Pakhuchiy for finding this bug.
+	- Remove bogus setting of PageError in ntfs_read_compressed_block().
+	- Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize().
+	- Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect
+	  access to the allocated size in the ntfs inode with the size lock.
+	- Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to
+	  return LCN_ENOENT when there is no runlist and the allocated size is
+	  zero.
+	- Fix load_attribute_list() to handle the case of a NULL runlist.
+	- Fix handling of sparse attributes in ntfs_attr_make_non_resident().
+	- Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set()
+	  to ensure that these functions are never called for compressed or
+	  encrypted attributes.
+	- Fix cluster (de)allocators to work when the runlist is NULL and more
+	  importantly to take a locked runlist rather than them locking it
+	  which leads to lock reversal.
+	- Truncate {a,c,m}time to the ntfs supported time granularity when
+	  updating the times in the inode in ntfs_setattr().
+	- Fixup handling of sparse, compressed, and encrypted attributes in
+	  fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(),
+	  fs/ntfs/aops.c::ntfs_{read,write}page().
+	- Make ntfs_write_block() not instantiate sparse blocks if they contain
+	  only zeroes.
+	- Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page
+	  lock protection over the buffer submission for i/o which allows the
+	  removal of the get_bh()/put_bh() pairs for each buffer.
+	- Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case
+	  where a concurrent truncate has truncated the runlist under our feet.
+	- Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c.
+	- In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock
+	  in the first buffer head instead of a driver global spin lock to
+	  improve scalability.
+	- Minor fix to error handling and error message display in
+	  fs/ntfs/aops.c::ntfs_prepare_nonresident_write().
+	- Change the mount options {u,f,d}mask to always parse the number as
+	  an octal number to conform to how chmod(1) works, too.  Thanks to
+	  Giuseppe Bilotta and Horst von Brand for pointing out the errors of
+	  my ways.
+	- Fix various bugs in the runlist merging code.  (Based on libntfs
+	  changes by Richard Russon.)
+	- Fix sparse warnings that have crept in over time.
+	- Change ntfs_cluster_free() to require a write locked runlist on entry
+	  since we otherwise get into a lock reversal deadlock if a read locked
+	  runlist is passed in. In the process also change it to take an ntfs
+	  inode instead of a vfs inode as parameter.
+	- Fix the definition of the CHKD ntfs record magic.  It had an off by
+	  two error causing it to be CHKB instead of CHKD.
+	- Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the
+	  count to become negative and hence we had a wild memset() scribbling
+	  all over the system's ram.
+
 2.1.23 - Implement extension of resident files and make writing safe as well as
 	 many bug fixes, cleanups, and enhancements...
 
@@ -174,6 +317,9 @@ ToDo/Notes:
 	  fact that the vfs and ntfs inodes are one struct in memory to find
 	  the ntfs inode in memory if present.  Also, the ntfs inode has its
 	  own locking so it does not matter if the vfs inode is locked.
+	- Fix bug in mft record writing where we forgot to set the device in
+	  the buffers when mapping them after the VM had discarded them.
+	  Thanks to Martin MOKREJŠ for the bug report.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f083f27d8b6..d0d45d1c853 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 3f43bfe6184..1c0a4315876 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -27,6 +27,7 @@
 #include <linux/swap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
 
 #include "aops.h"
 #include "attrib.h"
@@ -55,45 +56,56 @@
  */
 static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 {
-	static DEFINE_SPINLOCK(page_uptodate_lock);
 	unsigned long flags;
-	struct buffer_head *tmp;
+	struct buffer_head *first, *tmp;
 	struct page *page;
+	struct inode *vi;
 	ntfs_inode *ni;
 	int page_uptodate = 1;
 
 	page = bh->b_page;
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 
 	if (likely(uptodate)) {
-		s64 file_ofs, initialized_size;
+		loff_t i_size;
+		s64 file_ofs, init_size;
 
 		set_buffer_uptodate(bh);
 
 		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
 				bh_offset(bh);
 		read_lock_irqsave(&ni->size_lock, flags);
-		initialized_size = ni->initialized_size;
+		init_size = ni->initialized_size;
+		i_size = i_size_read(vi);
 		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (unlikely(init_size > i_size)) {
+			/* Race with shrinking truncate. */
+			init_size = i_size;
+		}
 		/* Check for the current buffer head overflowing. */
-		if (file_ofs + bh->b_size > initialized_size) {
-			char *addr;
-			int ofs = 0;
-
-			if (file_ofs < initialized_size)
-				ofs = initialized_size - file_ofs;
-			addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
-			memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+		if (unlikely(file_ofs + bh->b_size > init_size)) {
+			u8 *kaddr;
+			int ofs;
+
+			ofs = 0;
+			if (file_ofs < init_size)
+				ofs = init_size - file_ofs;
+			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+			memset(kaddr + bh_offset(bh) + ofs, 0,
+					bh->b_size - ofs);
+			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 			flush_dcache_page(page);
-			kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		}
 	} else {
 		clear_buffer_uptodate(bh);
-		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
-				(unsigned long long)bh->b_blocknr);
 		SetPageError(page);
+		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
+				"0x%llx.", (unsigned long long)bh->b_blocknr);
 	}
-	spin_lock_irqsave(&page_uptodate_lock, flags);
+	first = page_buffers(page);
+	local_irq_save(flags);
+	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -108,7 +120,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 	/*
 	 * If none of the buffers had errors then we can set the page uptodate,
 	 * but we first have to perform the post read mst fixups, if the
@@ -121,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	} else {
-		char *addr;
+		u8 *kaddr;
 		unsigned int i, recs;
 		u32 rec_size;
 
@@ -129,19 +142,20 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		recs = PAGE_CACHE_SIZE / rec_size;
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
-		addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
 		for (i = 0; i < recs; i++)
-			post_read_mst_fixup((NTFS_RECORD*)(addr +
+			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
 					i * rec_size), rec_size);
+		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 		flush_dcache_page(page);
-		kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	}
 	unlock_page(page);
 	return;
 still_busy:
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 	return;
 }
 
@@ -164,8 +178,11 @@ still_busy:
  */
 static int ntfs_read_block(struct page *page)
 {
+	loff_t i_size;
 	VCN vcn;
 	LCN lcn;
+	s64 init_size;
+	struct inode *vi;
 	ntfs_inode *ni;
 	ntfs_volume *vol;
 	runlist_element *rl;
@@ -176,7 +193,8 @@ static int ntfs_read_block(struct page *page)
 	int i, nr;
 	unsigned char blocksize_bits;
 
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 	vol = ni->vol;
 
 	/* $MFT/$DATA must have its complete runlist in memory at all times. */
@@ -185,25 +203,45 @@ static int ntfs_read_block(struct page *page)
 	blocksize_bits = VFS_I(ni)->i_blkbits;
 	blocksize = 1 << blocksize_bits;
 
-	if (!page_has_buffers(page))
+	if (!page_has_buffers(page)) {
 		create_empty_buffers(page, blocksize, 0);
-	bh = head = page_buffers(page);
-	if (unlikely(!bh)) {
-		unlock_page(page);
-		return -ENOMEM;
+		if (unlikely(!page_has_buffers(page))) {
+			unlock_page(page);
+			return -ENOMEM;
+		}
 	}
+	bh = head = page_buffers(page);
+	BUG_ON(!bh);
 
+	/*
+	 * We may be racing with truncate.  To avoid some of the problems we
+	 * now take a snapshot of the various sizes and use those for the whole
+	 * of the function.  In case of an extending truncate it just means we
+	 * may leave some buffers unmapped which are now allocated.  This is
+	 * not a problem since these buffers will just get mapped when a write
+	 * occurs.  In case of a shrinking truncate, we will detect this later
+	 * on due to the runlist being incomplete and if the page is being
+	 * fully truncated, truncate will throw it away as soon as we unlock
+	 * it so no need to worry what we do with it.
+	 */
 	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
-	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+	init_size = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(init_size > i_size)) {
+		/* Race with shrinking truncate. */
+		init_size = i_size;
+	}
+	zblock = (init_size + blocksize - 1) >> blocksize_bits;
 
 	/* Loop through all the buffers in the page. */
 	rl = NULL;
 	nr = i = 0;
 	do {
 		u8 *kaddr;
+		int err;
 
 		if (unlikely(buffer_uptodate(bh)))
 			continue;
@@ -211,6 +249,7 @@ static int ntfs_read_block(struct page *page)
 			arr[nr++] = bh;
 			continue;
 		}
+		err = 0;
 		bh->b_bdev = vol->sb->s_bdev;
 		/* Is the block within the allowed limits? */
 		if (iblock < lblock) {
@@ -252,7 +291,6 @@ lock_retry_remap:
 				goto handle_hole;
 			/* If first try and runlist unmapped, map and retry. */
 			if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
-				int err;
 				is_retry = TRUE;
 				/*
 				 * Attempt to map runlist, dropping lock for
@@ -263,20 +301,30 @@ lock_retry_remap:
 				if (likely(!err))
 					goto lock_retry_remap;
 				rl = NULL;
-				lcn = err;
 			} else if (!rl)
 				up_read(&ni->runlist.lock);
+			/*
+			 * If buffer is outside the runlist, treat it as a
+			 * hole.  This can happen due to concurrent truncate
+			 * for example.
+			 */
+			if (err == -ENOENT || lcn == LCN_ENOENT) {
+				err = 0;
+				goto handle_hole;
+			}
 			/* Hard error, zero out region. */
+			if (!err)
+				err = -EIO;
 			bh->b_blocknr = -1;
 			SetPageError(page);
 			ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
 					"attribute type 0x%x, vcn 0x%llx, "
 					"offset 0x%x because its location on "
 					"disk could not be determined%s "
-					"(error code %lli).", ni->mft_no,
+					"(error code %i).", ni->mft_no,
 					ni->type, (unsigned long long)vcn,
 					vcn_ofs, is_retry ? " even after "
-					"retrying" : "", (long long)lcn);
+					"retrying" : "", err);
 		}
 		/*
 		 * Either iblock was outside lblock limits or
@@ -289,9 +337,10 @@ handle_hole:
 handle_zblock:
 		kaddr = kmap_atomic(page, KM_USER0);
 		memset(kaddr + i * blocksize, 0, blocksize);
-		flush_dcache_page(page);
 		kunmap_atomic(kaddr, KM_USER0);
-		set_buffer_uptodate(bh);
+		flush_dcache_page(page);
+		if (likely(!err))
+			set_buffer_uptodate(bh);
 	} while (i++, iblock++, (bh = bh->b_this_page) != head);
 
 	/* Release the lock if we took it. */
@@ -348,6 +397,8 @@ handle_zblock:
  */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
+	loff_t i_size;
+	struct inode *vi;
 	ntfs_inode *ni, *base_ni;
 	u8 *kaddr;
 	ntfs_attr_search_ctx *ctx;
@@ -366,32 +417,42 @@ retry_readpage:
 		unlock_page(page);
 		return 0;
 	}
-	ni = NTFS_I(page->mapping->host);
-
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
+	/*
+	 * Only $DATA attributes can be encrypted and only unnamed $DATA
+	 * attributes can be compressed.  Index root can have the flags set but
+	 * this means to create compressed/encrypted files, not that the
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
+	 */
+	if (ni->type != AT_INDEX_ALLOCATION) {
+		/* If attribute is encrypted, deny access, just like NT4. */
+		if (NInoEncrypted(ni)) {
+			BUG_ON(ni->type != AT_DATA);
+			err = -EACCES;
+			goto err_out;
+		}
+		/* Compressed data streams are handled in compress.c. */
+		if (NInoNonResident(ni) && NInoCompressed(ni)) {
+			BUG_ON(ni->type != AT_DATA);
+			BUG_ON(ni->name_len);
+			return ntfs_read_compressed_block(page);
+		}
+	}
 	/* NInoNonResident() == NInoIndexAllocPresent() */
 	if (NInoNonResident(ni)) {
-		/*
-		 * Only unnamed $DATA attributes can be compressed or
-		 * encrypted.
-		 */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* If file is encrypted, deny access, just like NT4. */
-			if (NInoEncrypted(ni)) {
-				err = -EACCES;
-				goto err_out;
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni))
-				return ntfs_read_compressed_block(page);
-		}
-		/* Normal data stream. */
+		/* Normal, non-resident data stream. */
 		return ntfs_read_block(page);
 	}
 	/*
 	 * Attribute is resident, implying it is not compressed or encrypted.
 	 * This also means the attribute is smaller than an mft record and
 	 * hence smaller than a page, so can simply zero out any pages with
-	 * index above 0.
+	 * index above 0.  Note the attribute can actually be marked compressed
+	 * but if it is resident the actual data is not compressed so we are
+	 * ok to ignore the compressed flag here.
 	 */
 	if (unlikely(page->index > 0)) {
 		kaddr = kmap_atomic(page, KM_USER0);
@@ -431,7 +492,12 @@ retry_readpage:
 	read_lock_irqsave(&ni->size_lock, flags);
 	if (unlikely(attr_len > ni->initialized_size))
 		attr_len = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate. */
+		attr_len = i_size;
+	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data to the page. */
 	memcpy(kaddr, (u8*)ctx->attr +
@@ -511,19 +577,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 		BUG_ON(!PageUptodate(page));
 		create_empty_buffers(page, blocksize,
 				(1 << BH_Uptodate) | (1 << BH_Dirty));
+		if (unlikely(!page_has_buffers(page))) {
+			ntfs_warning(vol->sb, "Error allocating page "
+					"buffers.  Redirtying page so we try "
+					"again later.");
+			/*
+			 * Put the page back on mapping->dirty_pages, but leave
+			 * its buffers' dirty state as-is.
+			 */
+			redirty_page_for_writepage(wbc, page);
+			unlock_page(page);
+			return 0;
+		}
 	}
 	bh = head = page_buffers(page);
-	if (unlikely(!bh)) {
-		ntfs_warning(vol->sb, "Error allocating page buffers. "
-				"Redirtying page so we try again later.");
-		/*
-		 * Put the page back on mapping->dirty_pages, but leave its
-		 * buffer's dirty state as-is.
-		 */
-		redirty_page_for_writepage(wbc, page);
-		unlock_page(page);
-		return 0;
-	}
+	BUG_ON(!bh);
 
 	/* NOTE: Different naming scheme to ntfs_read_block()! */
 
@@ -670,6 +738,27 @@ lock_retry_remap:
 		}
 		/* It is a hole, need to instantiate it. */
 		if (lcn == LCN_HOLE) {
+			u8 *kaddr;
+			unsigned long *bpos, *bend;
+
+			/* Check if the buffer is zero. */
+			kaddr = kmap_atomic(page, KM_USER0);
+			bpos = (unsigned long *)(kaddr + bh_offset(bh));
+			bend = (unsigned long *)((u8*)bpos + blocksize);
+			do {
+				if (unlikely(*bpos))
+					break;
+			} while (likely(++bpos < bend));
+			kunmap_atomic(kaddr, KM_USER0);
+			if (bpos == bend) {
+				/*
+				 * Buffer is zero and sparse, no need to write
+				 * it.
+				 */
+				bh->b_blocknr = -1;
+				clear_buffer_dirty(bh);
+				continue;
+			}
 			// TODO: Instantiate the hole.
 			// clear_buffer_new(bh);
 			// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -690,20 +779,37 @@ lock_retry_remap:
 			if (likely(!err))
 				goto lock_retry_remap;
 			rl = NULL;
-			lcn = err;
 		} else if (!rl)
 			up_read(&ni->runlist.lock);
+		/*
+		 * If buffer is outside the runlist, truncate has cut it out
+		 * of the runlist.  Just clean and clear the buffer and set it
+		 * uptodate so it can get discarded by the VM.
+		 */
+		if (err == -ENOENT || lcn == LCN_ENOENT) {
+			u8 *kaddr;
+
+			bh->b_blocknr = -1;
+			clear_buffer_dirty(bh);
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + bh_offset(bh), 0, blocksize);
+			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(page);
+			set_buffer_uptodate(bh);
+			err = 0;
+			continue;
+		}
 		/* Failed to map the buffer, even after retrying. */
+		if (!err)
+			err = -EIO;
 		bh->b_blocknr = -1;
 		ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
 				"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
 				"because its location on disk could not be "
-				"determined%s (error code %lli).", ni->mft_no,
+				"determined%s (error code %i).", ni->mft_no,
 				ni->type, (unsigned long long)vcn,
 				vcn_ofs, is_retry ? " even after "
-				"retrying" : "", (long long)lcn);
-		if (!err)
-			err = -EIO;
+				"retrying" : "", err);
 		break;
 	} while (block++, (bh = bh->b_this_page) != head);
 
@@ -714,7 +820,7 @@ lock_retry_remap:
 	/* For the error case, need to reset bh to the beginning. */
 	bh = head;
 
-	/* Just an optimization, so ->readpage() isn't called later. */
+	/* Just an optimization, so ->readpage() is not called later. */
 	if (unlikely(!PageUptodate(page))) {
 		int uptodate = 1;
 		do {
@@ -730,7 +836,6 @@ lock_retry_remap:
 
 	/* Setup all mapped, dirty buffers for async write i/o. */
 	do {
-		get_bh(bh);
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
@@ -768,14 +873,8 @@ lock_retry_remap:
 
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);	/* Keeps try_to_free_buffers() away. */
-	unlock_page(page);
 
-	/*
-	 * Submit the prepared buffers for i/o. Note the page is unlocked,
-	 * and the async write i/o completion handler can end_page_writeback()
-	 * at any time after the *first* submit_bh(). So the buffers can then
-	 * disappear...
-	 */
+	/* Submit the prepared buffers for i/o. */
 	need_end_writeback = TRUE;
 	do {
 		struct buffer_head *next = bh->b_this_page;
@@ -783,9 +882,9 @@ lock_retry_remap:
 			submit_bh(WRITE, bh);
 			need_end_writeback = FALSE;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	unlock_page(page);
 
 	/* If no i/o was started, need to end_page_writeback(). */
 	if (unlikely(need_end_writeback))
@@ -860,7 +959,6 @@ static int ntfs_write_mst_block(struct page *page,
 	sync = (wbc->sync_mode == WB_SYNC_ALL);
 
 	/* Make sure we have mapped buffers. */
-	BUG_ON(!page_has_buffers(page));
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
 
@@ -924,6 +1022,7 @@ static int ntfs_write_mst_block(struct page *page,
 			LCN lcn;
 			unsigned int vcn_ofs;
 
+			bh->b_bdev = vol->sb->s_bdev;
 			/* Obtain the vcn and offset of the current block. */
 			vcn = (VCN)block << bh_size_bits;
 			vcn_ofs = vcn & vol->cluster_size_mask;
@@ -1279,58 +1378,65 @@ retry_writepage:
 		ntfs_debug("Write outside i_size - truncated?");
 		return 0;
 	}
+	/*
+	 * Only $DATA attributes can be encrypted and only unnamed $DATA
+	 * attributes can be compressed.  Index root can have the flags set but
+	 * this means to create compressed/encrypted files, not that the
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
+	 */
+	if (ni->type != AT_INDEX_ALLOCATION) {
+		/* If file is encrypted, deny access, just like NT4. */
+		if (NInoEncrypted(ni)) {
+			unlock_page(page);
+			BUG_ON(ni->type != AT_DATA);
+			ntfs_debug("Denying write access to encrypted file.");
+			return -EACCES;
+		}
+		/* Compressed data streams are handled in compress.c. */
+		if (NInoNonResident(ni) && NInoCompressed(ni)) {
+			BUG_ON(ni->type != AT_DATA);
+			BUG_ON(ni->name_len);
+			// TODO: Implement and replace this with
+			// return ntfs_write_compressed_block(page);
+			unlock_page(page);
+			ntfs_error(vi->i_sb, "Writing to compressed files is "
+					"not supported yet.  Sorry.");
+			return -EOPNOTSUPP;
+		}
+		// TODO: Implement and remove this check.
+		if (NInoNonResident(ni) && NInoSparse(ni)) {
+			unlock_page(page);
+			ntfs_error(vi->i_sb, "Writing to sparse files is not "
+					"supported yet.  Sorry.");
+			return -EOPNOTSUPP;
+		}
+	}
 	/* NInoNonResident() == NInoIndexAllocPresent() */
 	if (NInoNonResident(ni)) {
-		/*
-		 * Only unnamed $DATA attributes can be compressed, encrypted,
-		 * and/or sparse.
-		 */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* If file is encrypted, deny access, just like NT4. */
-			if (NInoEncrypted(ni)) {
-				unlock_page(page);
-				ntfs_debug("Denying write access to encrypted "
-						"file.");
-				return -EACCES;
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni)) {
-				// TODO: Implement and replace this check with
-				// return ntfs_write_compressed_block(page);
-				unlock_page(page);
-				ntfs_error(vi->i_sb, "Writing to compressed "
-						"files is not supported yet. "
-						"Sorry.");
-				return -EOPNOTSUPP;
-			}
-			// TODO: Implement and remove this check.
-			if (NInoSparse(ni)) {
-				unlock_page(page);
-				ntfs_error(vi->i_sb, "Writing to sparse files "
-						"is not supported yet. Sorry.");
-				return -EOPNOTSUPP;
-			}
-		}
 		/* We have to zero every time due to mmap-at-end-of-file. */
 		if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
 			/* The page straddles i_size. */
 			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
-			flush_dcache_page(page);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(page);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
 			return ntfs_write_mst_block(page, wbc);
-		/* Normal data stream. */
+		/* Normal, non-resident data stream. */
 		return ntfs_write_block(page, wbc);
 	}
 	/*
-	 * Attribute is resident, implying it is not compressed, encrypted,
-	 * sparse, or mst protected.  This also means the attribute is smaller
-	 * than an mft record and hence smaller than a page, so can simply
-	 * return error on any pages with index above 0.
+	 * Attribute is resident, implying it is not compressed, encrypted, or
+	 * mst protected.  This also means the attribute is smaller than an mft
+	 * record and hence smaller than a page, so can simply return error on
+	 * any pages with index above 0.  Note the attribute can actually be
+	 * marked compressed but if it is resident the actual data is not
+	 * compressed so we are ok to ignore the compressed flag here.
 	 */
 	BUG_ON(page_has_buffers(page));
 	BUG_ON(!PageUptodate(page));
@@ -1379,50 +1485,33 @@ retry_writepage:
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 	unlock_page(page);
-
-	/*
-	 * Here, we don't need to zero the out of bounds area everytime because
-	 * the below memcpy() already takes care of the mmap-at-end-of-file
-	 * requirements. If the file is converted to a non-resident one, then
-	 * the code path use is switched to the non-resident one where the
-	 * zeroing happens on each ntfs_writepage() invocation.
-	 *
-	 * The above also applies nicely when i_size is decreased.
-	 *
-	 * When i_size is increased, the memory between the old and new i_size
-	 * _must_ be zeroed (or overwritten with new data). Otherwise we will
-	 * expose data to userspace/disk which should never have been exposed.
-	 *
-	 * FIXME: Ensure that i_size increases do the zeroing/overwriting and
-	 * if we cannot guarantee that, then enable the zeroing below.  If the
-	 * zeroing below is enabled, we MUST move the unlock_page() from above
-	 * to after the kunmap_atomic(), i.e. just before the
-	 * end_page_writeback().
-	 * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
-	 * increases for resident attributes so those are ok.
-	 * TODO: ntfs_truncate(), others?
-	 */
-
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
 	i_size = i_size_read(vi);
 	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate or a failed truncate. */
 		attr_len = i_size;
-		ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+		/*
+		 * If the truncate failed, fix it up now.  If a concurrent
+		 * truncate, we do its job, so it does not have to do anything.
+		 */
+		err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+				attr_len);
+		/* Shrinking cannot fail. */
+		BUG_ON(err);
 	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data from the page to the mft record. */
 	memcpy((u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			kaddr, attr_len);
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	/* Zero out of bounds area in the page cache page. */
 	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
-
+	flush_dcache_page(page);
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	/* We are done with the page. */
 	end_page_writeback(page);
-
-	/* Mark the mft record dirty, so it gets written back. */
+	/* Finally, mark the mft record dirty, so it gets written back. */
 	mark_mft_record_dirty(ctx->ntfs_ino);
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(base_ni);
@@ -1452,832 +1541,6 @@ err_out:
 	return err;
 }
 
-/**
- * ntfs_prepare_nonresident_write -
- *
- */
-static int ntfs_prepare_nonresident_write(struct page *page,
-		unsigned from, unsigned to)
-{
-	VCN vcn;
-	LCN lcn;
-	s64 initialized_size;
-	loff_t i_size;
-	sector_t block, ablock, iblock;
-	struct inode *vi;
-	ntfs_inode *ni;
-	ntfs_volume *vol;
-	runlist_element *rl;
-	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
-	unsigned long flags;
-	unsigned int vcn_ofs, block_start, block_end, blocksize;
-	int err;
-	BOOL is_retry;
-	unsigned char blocksize_bits;
-
-	vi = page->mapping->host;
-	ni = NTFS_I(vi);
-	vol = ni->vol;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
-			page->index, from, to);
-
-	BUG_ON(!NInoNonResident(ni));
-
-	blocksize_bits = vi->i_blkbits;
-	blocksize = 1 << blocksize_bits;
-
-	/*
-	 * create_empty_buffers() will create uptodate/dirty buffers if the
-	 * page is uptodate/dirty.
-	 */
-	if (!page_has_buffers(page))
-		create_empty_buffers(page, blocksize, 0);
-	bh = head = page_buffers(page);
-	if (unlikely(!bh))
-		return -ENOMEM;
-
-	/* The first block in the page. */
-	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
-
-	read_lock_irqsave(&ni->size_lock, flags);
-	/*
-	 * The first out of bounds block for the allocated size.  No need to
-	 * round up as allocated_size is in multiples of cluster size and the
-	 * minimum cluster size is 512 bytes, which is equal to the smallest
-	 * blocksize.
-	 */
-	ablock = ni->allocated_size >> blocksize_bits;
-	i_size = i_size_read(vi);
-	initialized_size = ni->initialized_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-
-	/* The last (fully or partially) initialized block. */
-	iblock = initialized_size >> blocksize_bits;
-
-	/* Loop through all the buffers in the page. */
-	block_start = 0;
-	rl = NULL;
-	err = 0;
-	do {
-		block_end = block_start + blocksize;
-		/*
-		 * If buffer @bh is outside the write, just mark it uptodate
-		 * if the page is uptodate and continue with the next buffer.
-		 */
-		if (block_end <= from || block_start >= to) {
-			if (PageUptodate(page)) {
-				if (!buffer_uptodate(bh))
-					set_buffer_uptodate(bh);
-			}
-			continue;
-		}
-		/*
-		 * @bh is at least partially being written to.
-		 * Make sure it is not marked as new.
-		 */
-		//if (buffer_new(bh))
-		//	clear_buffer_new(bh);
-
-		if (block >= ablock) {
-			// TODO: block is above allocated_size, need to
-			// allocate it. Best done in one go to accommodate not
-			// only block but all above blocks up to and including:
-			// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
-			// - 1) >> blobksize_bits. Obviously will need to round
-			// up to next cluster boundary, too. This should be
-			// done with a helper function, so it can be reused.
-			ntfs_error(vol->sb, "Writing beyond allocated size "
-					"is not supported yet. Sorry.");
-			err = -EOPNOTSUPP;
-			goto err_out;
-			// Need to update ablock.
-			// Need to set_buffer_new() on all block bhs that are
-			// newly allocated.
-		}
-		/*
-		 * Now we have enough allocated size to fulfill the whole
-		 * request, i.e. block < ablock is true.
-		 */
-		if (unlikely((block >= iblock) &&
-				(initialized_size < i_size))) {
-			/*
-			 * If this page is fully outside initialized size, zero
-			 * out all pages between the current initialized size
-			 * and the current page. Just use ntfs_readpage() to do
-			 * the zeroing transparently.
-			 */
-			if (block > iblock) {
-				// TODO:
-				// For each page do:
-				// - read_cache_page()
-				// Again for each page do:
-				// - wait_on_page_locked()
-				// - Check (PageUptodate(page) &&
-				//			!PageError(page))
-				// Update initialized size in the attribute and
-				// in the inode.
-				// Again, for each page do:
-				//	__set_page_dirty_buffers();
-				// page_cache_release()
-				// We don't need to wait on the writes.
-				// Update iblock.
-			}
-			/*
-			 * The current page straddles initialized size. Zero
-			 * all non-uptodate buffers and set them uptodate (and
-			 * dirty?). Note, there aren't any non-uptodate buffers
-			 * if the page is uptodate.
-			 * FIXME: For an uptodate page, the buffers may need to
-			 * be written out because they were not initialized on
-			 * disk before.
-			 */
-			if (!PageUptodate(page)) {
-				// TODO:
-				// Zero any non-uptodate buffers up to i_size.
-				// Set them uptodate and dirty.
-			}
-			// TODO:
-			// Update initialized size in the attribute and in the
-			// inode (up to i_size).
-			// Update iblock.
-			// FIXME: This is inefficient. Try to batch the two
-			// size changes to happen in one go.
-			ntfs_error(vol->sb, "Writing beyond initialized size "
-					"is not supported yet. Sorry.");
-			err = -EOPNOTSUPP;
-			goto err_out;
-			// Do NOT set_buffer_new() BUT DO clear buffer range
-			// outside write request range.
-			// set_buffer_uptodate() on complete buffers as well as
-			// set_buffer_dirty().
-		}
-
-		/* Need to map unmapped buffers. */
-		if (!buffer_mapped(bh)) {
-			/* Unmapped buffer. Need to map it. */
-			bh->b_bdev = vol->sb->s_bdev;
-
-			/* Convert block into corresponding vcn and offset. */
-			vcn = (VCN)block << blocksize_bits >>
-					vol->cluster_size_bits;
-			vcn_ofs = ((VCN)block << blocksize_bits) &
-					vol->cluster_size_mask;
-
-			is_retry = FALSE;
-			if (!rl) {
-lock_retry_remap:
-				down_read(&ni->runlist.lock);
-				rl = ni->runlist.rl;
-			}
-			if (likely(rl != NULL)) {
-				/* Seek to element containing target vcn. */
-				while (rl->length && rl[1].vcn <= vcn)
-					rl++;
-				lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
-			} else
-				lcn = LCN_RL_NOT_MAPPED;
-			if (unlikely(lcn < 0)) {
-				/*
-				 * We extended the attribute allocation above.
-				 * If we hit an ENOENT here it means that the
-				 * allocation was insufficient which is a bug.
-				 */
-				BUG_ON(lcn == LCN_ENOENT);
-
-				/* It is a hole, need to instantiate it. */
-				if (lcn == LCN_HOLE) {
-					// TODO: Instantiate the hole.
-					// clear_buffer_new(bh);
-					// unmap_underlying_metadata(bh->b_bdev,
-					//		bh->b_blocknr);
-					// For non-uptodate buffers, need to
-					// zero out the region outside the
-					// request in this bh or all bhs,
-					// depending on what we implemented
-					// above.
-					// Need to flush_dcache_page().
-					// Or could use set_buffer_new()
-					// instead?
-					ntfs_error(vol->sb, "Writing into "
-							"sparse regions is "
-							"not supported yet. "
-							"Sorry.");
-					err = -EOPNOTSUPP;
-					if (!rl)
-						up_read(&ni->runlist.lock);
-					goto err_out;
-				} else if (!is_retry &&
-						lcn == LCN_RL_NOT_MAPPED) {
-					is_retry = TRUE;
-					/*
-					 * Attempt to map runlist, dropping
-					 * lock for the duration.
-					 */
-					up_read(&ni->runlist.lock);
-					err = ntfs_map_runlist(ni, vcn);
-					if (likely(!err))
-						goto lock_retry_remap;
-					rl = NULL;
-					lcn = err;
-				} else if (!rl)
-					up_read(&ni->runlist.lock);
-				/*
-				 * Failed to map the buffer, even after
-				 * retrying.
-				 */
-				bh->b_blocknr = -1;
-				ntfs_error(vol->sb, "Failed to write to inode "
-						"0x%lx, attribute type 0x%x, "
-						"vcn 0x%llx, offset 0x%x "
-						"because its location on disk "
-						"could not be determined%s "
-						"(error code %lli).",
-						ni->mft_no, ni->type,
-						(unsigned long long)vcn,
-						vcn_ofs, is_retry ? " even "
-						"after retrying" : "",
-						(long long)lcn);
-				if (!err)
-					err = -EIO;
-				goto err_out;
-			}
-			/* We now have a successful remap, i.e. lcn >= 0. */
-
-			/* Setup buffer head to correct block. */
-			bh->b_blocknr = ((lcn << vol->cluster_size_bits)
-					+ vcn_ofs) >> blocksize_bits;
-			set_buffer_mapped(bh);
-
-			// FIXME: Something analogous to this is needed for
-			// each newly allocated block, i.e. BH_New.
-			// FIXME: Might need to take this out of the
-			// if (!buffer_mapped(bh)) {}, depending on how we
-			// implement things during the allocated_size and
-			// initialized_size extension code above.
-			if (buffer_new(bh)) {
-				clear_buffer_new(bh);
-				unmap_underlying_metadata(bh->b_bdev,
-						bh->b_blocknr);
-				if (PageUptodate(page)) {
-					set_buffer_uptodate(bh);
-					continue;
-				}
-				/*
-				 * Page is _not_ uptodate, zero surrounding
-				 * region. NOTE: This is how we decide if to
-				 * zero or not!
-				 */
-				if (block_end > to || block_start < from) {
-					void *kaddr;
-
-					kaddr = kmap_atomic(page, KM_USER0);
-					if (block_end > to)
-						memset(kaddr + to, 0,
-								block_end - to);
-					if (block_start < from)
-						memset(kaddr + block_start, 0,
-								from -
-								block_start);
-					flush_dcache_page(page);
-					kunmap_atomic(kaddr, KM_USER0);
-				}
-				continue;
-			}
-		}
-		/* @bh is mapped, set it uptodate if the page is uptodate. */
-		if (PageUptodate(page)) {
-			if (!buffer_uptodate(bh))
-				set_buffer_uptodate(bh);
-			continue;
-		}
-		/*
-		 * The page is not uptodate. The buffer is mapped. If it is not
-		 * uptodate, and it is only partially being written to, we need
-		 * to read the buffer in before the write, i.e. right now.
-		 */
-		if (!buffer_uptodate(bh) &&
-				(block_start < from || block_end > to)) {
-			ll_rw_block(READ, 1, &bh);
-			*wait_bh++ = bh;
-		}
-	} while (block++, block_start = block_end,
-			(bh = bh->b_this_page) != head);
-
-	/* Release the lock if we took it. */
-	if (rl) {
-		up_read(&ni->runlist.lock);
-		rl = NULL;
-	}
-
-	/* If we issued read requests, let them complete. */
-	while (wait_bh > wait) {
-		wait_on_buffer(*--wait_bh);
-		if (!buffer_uptodate(*wait_bh))
-			return -EIO;
-	}
-
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	/*
-	 * Zero out any newly allocated blocks to avoid exposing stale data.
-	 * If BH_New is set, we know that the block was newly allocated in the
-	 * above loop.
-	 * FIXME: What about initialized_size increments? Have we done all the
-	 * required zeroing above? If not this error handling is broken, and
-	 * in particular the if (block_end <= from) check is completely bogus.
-	 */
-	bh = head;
-	block_start = 0;
-	is_retry = FALSE;
-	do {
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to)
-			break;
-		if (buffer_new(bh)) {
-			void *kaddr;
-
-			clear_buffer_new(bh);
-			kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr + block_start, 0, bh->b_size);
-			kunmap_atomic(kaddr, KM_USER0);
-			set_buffer_uptodate(bh);
-			mark_buffer_dirty(bh);
-			is_retry = TRUE;
-		}
-	} while (block_start = block_end, (bh = bh->b_this_page) != head);
-	if (is_retry)
-		flush_dcache_page(page);
-	if (rl)
-		up_read(&ni->runlist.lock);
-	return err;
-}
-
-/**
- * ntfs_prepare_write - prepare a page for receiving data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host).  The @page is locked but not kmap()ped.  The source
- * data has not yet been copied into the @page.
- *
- * Need to extend the attribute/fill in holes if necessary, create blocks and
- * make partially overwritten blocks uptodate,
- *
- * i_size is not to be modified yet.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using block_prepare_write() [support for sparse files] or
- * cont_prepare_write() [no support for sparse files].  Cannot do that due to
- * ntfs specifics but can look at them for implementation guidance.
- *
- * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
- * the first byte in the page that will be written to and @to is the first byte
- * after the last byte that will be written to.
- */
-static int ntfs_prepare_write(struct file *file, struct page *page,
-		unsigned from, unsigned to)
-{
-	s64 new_size;
-	loff_t i_size;
-	struct inode *vi = page->mapping->host;
-	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
-	ntfs_volume *vol = ni->vol;
-	ntfs_attr_search_ctx *ctx = NULL;
-	MFT_RECORD *m = NULL;
-	ATTR_RECORD *a;
-	u8 *kaddr;
-	u32 attr_len;
-	int err;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
-			page->index, from, to);
-	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
-	BUG_ON(from > to);
-	BUG_ON(NInoMstProtected(ni));
-	/*
-	 * If a previous ntfs_truncate() failed, repeat it and abort if it
-	 * fails again.
-	 */
-	if (unlikely(NInoTruncateFailed(ni))) {
-		down_write(&vi->i_alloc_sem);
-		err = ntfs_truncate(vi);
-		up_write(&vi->i_alloc_sem);
-		if (err || NInoTruncateFailed(ni)) {
-			if (!err)
-				err = -EIO;
-			goto err_out;
-		}
-	}
-	/* If the attribute is not resident, deal with it elsewhere. */
-	if (NInoNonResident(ni)) {
-		/*
-		 * Only unnamed $DATA attributes can be compressed, encrypted,
-		 * and/or sparse.
-		 */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* If file is encrypted, deny access, just like NT4. */
-			if (NInoEncrypted(ni)) {
-				ntfs_debug("Denying write access to encrypted "
-						"file.");
-				return -EACCES;
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni)) {
-				// TODO: Implement and replace this check with
-				// return ntfs_write_compressed_block(page);
-				ntfs_error(vi->i_sb, "Writing to compressed "
-						"files is not supported yet. "
-						"Sorry.");
-				return -EOPNOTSUPP;
-			}
-			// TODO: Implement and remove this check.
-			if (NInoSparse(ni)) {
-				ntfs_error(vi->i_sb, "Writing to sparse files "
-						"is not supported yet. Sorry.");
-				return -EOPNOTSUPP;
-			}
-		}
-		/* Normal data stream. */
-		return ntfs_prepare_nonresident_write(page, from, to);
-	}
-	/*
-	 * Attribute is resident, implying it is not compressed, encrypted, or
-	 * sparse.
-	 */
-	BUG_ON(page_has_buffers(page));
-	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
-	/* If we do not need to resize the attribute allocation we are done. */
-	if (new_size <= i_size_read(vi))
-		goto done;
-	/* Map, pin, and lock the (base) mft record. */
-	if (!NInoAttr(ni))
-		base_ni = ni;
-	else
-		base_ni = ni->ext.base_ntfs_ino;
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m)) {
-		err = PTR_ERR(m);
-		m = NULL;
-		ctx = NULL;
-		goto err_out;
-	}
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, 0, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
-	}
-	m = ctx->mrec;
-	a = ctx->attr;
-	/* The total length of the attribute value. */
-	attr_len = le32_to_cpu(a->data.resident.value_length);
-	/* Fix an eventual previous failure of ntfs_commit_write(). */
-	i_size = i_size_read(vi);
-	if (unlikely(attr_len > i_size)) {
-		attr_len = i_size;
-		a->data.resident.value_length = cpu_to_le32(attr_len);
-	}
-	/* If we do not need to resize the attribute allocation we are done. */
-	if (new_size <= attr_len)
-		goto done_unm;
-	/* Check if new size is allowed in $AttrDef. */
-	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
-	if (unlikely(err)) {
-		if (err == -ERANGE) {
-			ntfs_error(vol->sb, "Write would cause the inode "
-					"0x%lx to exceed the maximum size for "
-					"its attribute type (0x%x).  Aborting "
-					"write.", vi->i_ino,
-					le32_to_cpu(ni->type));
-		} else {
-			ntfs_error(vol->sb, "Inode 0x%lx has unknown "
-					"attribute type 0x%x.  Aborting "
-					"write.", vi->i_ino,
-					le32_to_cpu(ni->type));
-			err = -EIO;
-		}
-		goto err_out2;
-	}
-	/*
-	 * Extend the attribute record to be able to store the new attribute
-	 * size.
-	 */
-	if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
-			le16_to_cpu(a->data.resident.value_offset) +
-			new_size)) {
-		/* Not enough space in the mft record. */
-		ntfs_error(vol->sb, "Not enough space in the mft record for "
-				"the resized attribute value.  This is not "
-				"supported yet.  Aborting write.");
-		err = -EOPNOTSUPP;
-		goto err_out2;
-	}
-	/*
-	 * We have enough space in the mft record to fit the write.  This
-	 * implies the attribute is smaller than the mft record and hence the
-	 * attribute must be in a single page and hence page->index must be 0.
-	 */
-	BUG_ON(page->index);
-	/*
-	 * If the beginning of the write is past the old size, enlarge the
-	 * attribute value up to the beginning of the write and fill it with
-	 * zeroes.
-	 */
-	if (from > attr_len) {
-		memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
-				attr_len, 0, from - attr_len);
-		a->data.resident.value_length = cpu_to_le32(from);
-		/* Zero the corresponding area in the page as well. */
-		if (PageUptodate(page)) {
-			kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr + attr_len, 0, from - attr_len);
-			kunmap_atomic(kaddr, KM_USER0);
-			flush_dcache_page(page);
-		}
-	}
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
-	mark_mft_record_dirty(ctx->ntfs_ino);
-done_unm:
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
-	/*
-	 * Because resident attributes are handled by memcpy() to/from the
-	 * corresponding MFT record, and because this form of i/o is byte
-	 * aligned rather than block aligned, there is no need to bring the
-	 * page uptodate here as in the non-resident case where we need to
-	 * bring the buffers straddled by the write uptodate before
-	 * generic_file_write() does the copying from userspace.
-	 *
-	 * We thus defer the uptodate bringing of the page region outside the
-	 * region written to to ntfs_commit_write(), which makes the code
-	 * simpler and saves one atomic kmap which is good.
-	 */
-done:
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err == -ENOMEM)
-		ntfs_warning(vi->i_sb, "Error allocating memory required to "
-				"prepare the write.");
-	else {
-		ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
-				"with error %i.", err);
-		NVolSetErrors(vol);
-		make_bad_inode(vi);
-	}
-err_out2:
-	if (ctx)
-		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(base_ni);
-	return err;
-}
-
-/**
- * ntfs_commit_nonresident_write -
- *
- */
-static int ntfs_commit_nonresident_write(struct page *page,
-		unsigned from, unsigned to)
-{
-	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
-	struct inode *vi = page->mapping->host;
-	struct buffer_head *bh, *head;
-	unsigned int block_start, block_end, blocksize;
-	BOOL partial;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino,
-			NTFS_I(vi)->type, page->index, from, to);
-	blocksize = 1 << vi->i_blkbits;
-
-	// FIXME: We need a whole slew of special cases in here for compressed
-	// files for example...
-	// For now, we know ntfs_prepare_write() would have failed so we can't
-	// get here in any of the cases which we have to special case, so we
-	// are just a ripped off, unrolled generic_commit_write().
-
-	bh = head = page_buffers(page);
-	block_start = 0;
-	partial = FALSE;
-	do {
-		block_end = block_start + blocksize;
-		if (block_end <= from || block_start >= to) {
-			if (!buffer_uptodate(bh))
-				partial = TRUE;
-		} else {
-			set_buffer_uptodate(bh);
-			mark_buffer_dirty(bh);
-		}
-	} while (block_start = block_end, (bh = bh->b_this_page) != head);
-	/*
-	 * If this is a partial write which happened to make all buffers
-	 * uptodate then we can optimize away a bogus ->readpage() for the next
-	 * read().  Here we 'discover' whether the page went uptodate as a
-	 * result of this (potentially partial) write.
-	 */
-	if (!partial)
-		SetPageUptodate(page);
-	/*
-	 * Not convinced about this at all.  See disparity comment above.  For
-	 * now we know ntfs_prepare_write() would have failed in the write
-	 * exceeds i_size case, so this will never trigger which is fine.
-	 */
-	if (pos > i_size_read(vi)) {
-		ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
-				"not supported yet.  Sorry.");
-		return -EOPNOTSUPP;
-		// vi->i_size = pos;
-		// mark_inode_dirty(vi);
-	}
-	ntfs_debug("Done.");
-	return 0;
-}
-
-/**
- * ntfs_commit_write - commit the received data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host).  The @page is locked but not kmap()ped.  The source
- * data has already been copied into the @page.  ntfs_prepare_write() has been
- * called before the data copied and it returned success so we can take the
- * results of various BUG checks and some error handling for granted.
- *
- * Need to mark modified blocks dirty so they get written out later when
- * ntfs_writepage() is invoked by the VM.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using generic_commit_write().  This marks buffers uptodate and
- * dirty, sets the page uptodate if all buffers in the page are uptodate, and
- * updates i_size if the end of io is beyond i_size.  In that case, it also
- * marks the inode dirty.
- *
- * Cannot use generic_commit_write() due to ntfs specialities but can look at
- * it for implementation guidance.
- *
- * If things have gone as outlined in ntfs_prepare_write(), then we do not
- * need to do any page content modifications here at all, except in the write
- * to resident attribute case, where we need to do the uptodate bringing here
- * which we combine with the copying into the mft record which means we save
- * one atomic kmap.
- */
-static int ntfs_commit_write(struct file *file, struct page *page,
-		unsigned from, unsigned to)
-{
-	struct inode *vi = page->mapping->host;
-	ntfs_inode *base_ni, *ni = NTFS_I(vi);
-	char *kaddr, *kattr;
-	ntfs_attr_search_ctx *ctx;
-	MFT_RECORD *m;
-	ATTR_RECORD *a;
-	u32 attr_len;
-	int err;
-
-	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
-			page->index, from, to);
-	/* If the attribute is not resident, deal with it elsewhere. */
-	if (NInoNonResident(ni)) {
-		/* Only unnamed $DATA attributes can be compressed/encrypted. */
-		if (ni->type == AT_DATA && !ni->name_len) {
-			/* Encrypted files need separate handling. */
-			if (NInoEncrypted(ni)) {
-				// We never get here at present!
-				BUG();
-			}
-			/* Compressed data streams are handled in compress.c. */
-			if (NInoCompressed(ni)) {
-				// TODO: Implement this!
-				// return ntfs_write_compressed_block(page);
-				// We never get here at present!
-				BUG();
-			}
-		}
-		/* Normal data stream. */
-		return ntfs_commit_nonresident_write(page, from, to);
-	}
-	/*
-	 * Attribute is resident, implying it is not compressed, encrypted, or
-	 * sparse.
-	 */
-	if (!NInoAttr(ni))
-		base_ni = ni;
-	else
-		base_ni = ni->ext.base_ntfs_ino;
-	/* Map, pin, and lock the mft record. */
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m)) {
-		err = PTR_ERR(m);
-		m = NULL;
-		ctx = NULL;
-		goto err_out;
-	}
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, 0, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
-	}
-	a = ctx->attr;
-	/* The total length of the attribute value. */
-	attr_len = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(from > attr_len);
-	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
-	kaddr = kmap_atomic(page, KM_USER0);
-	/* Copy the received data from the page to the mft record. */
-	memcpy(kattr + from, kaddr + from, to - from);
-	/* Update the attribute length if necessary. */
-	if (to > attr_len) {
-		attr_len = to;
-		a->data.resident.value_length = cpu_to_le32(attr_len);
-	}
-	/*
-	 * If the page is not uptodate, bring the out of bounds area(s)
-	 * uptodate by copying data from the mft record to the page.
-	 */
-	if (!PageUptodate(page)) {
-		if (from > 0)
-			memcpy(kaddr, kattr, from);
-		if (to < attr_len)
-			memcpy(kaddr + to, kattr + to, attr_len - to);
-		/* Zero the region outside the end of the attribute value. */
-		if (attr_len < PAGE_CACHE_SIZE)
-			memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-		/*
-		 * The probability of not having done any of the above is
-		 * extremely small, so we just flush unconditionally.
-		 */
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-	}
-	kunmap_atomic(kaddr, KM_USER0);
-	/* Update i_size if necessary. */
-	if (i_size_read(vi) < attr_len) {
-		unsigned long flags;
-
-		write_lock_irqsave(&ni->size_lock, flags);
-		ni->allocated_size = ni->initialized_size = attr_len;
-		i_size_write(vi, attr_len);
-		write_unlock_irqrestore(&ni->size_lock, flags);
-	}
-	/* Mark the mft record dirty, so it gets written back. */
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
-	mark_mft_record_dirty(ctx->ntfs_ino);
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err == -ENOMEM) {
-		ntfs_warning(vi->i_sb, "Error allocating memory required to "
-				"commit the write.");
-		if (PageUptodate(page)) {
-			ntfs_warning(vi->i_sb, "Page is uptodate, setting "
-					"dirty so the write will be retried "
-					"later on by the VM.");
-			/*
-			 * Put the page on mapping->dirty_pages, but leave its
-			 * buffers' dirty state as-is.
-			 */
-			__set_page_dirty_nobuffers(page);
-			err = 0;
-		} else
-			ntfs_error(vi->i_sb, "Page is not uptodate.  Written "
-					"data has been lost.");
-	} else {
-		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
-				"with error %i.", err);
-		NVolSetErrors(ni->vol);
-		make_bad_inode(vi);
-	}
-	if (ctx)
-		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(base_ni);
-	return err;
-}
-
 #endif	/* NTFS_RW */
 
 /**
@@ -2289,9 +1552,6 @@ struct address_space_operations ntfs_aops = {
 						   disk request queue. */
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
-	.prepare_write	= ntfs_prepare_write,	/* Prepare page and buffers
-						   ready to receive data. */
-	.commit_write	= ntfs_commit_write,	/* Commit received data. */
 #endif /* NTFS_RW */
 };
 
@@ -2356,6 +1616,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
 			buffers_to_free = bh;
 	}
 	bh = head = page_buffers(page);
+	BUG_ON(!bh);
 	do {
 		bh_ofs = bh_offset(bh);
 		if (bh_ofs + bh_size <= ofs)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index cd0f9e740b1..eda056bac25 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -21,7 +21,9 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/sched.h>
 #include <linux/swap.h>
+#include <linux/writeback.h>
 
 #include "attrib.h"
 #include "debug.h"
@@ -36,25 +38,60 @@
  * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
  * @ni:		ntfs inode for which to map (part of) a runlist
  * @vcn:	map runlist part containing this vcn
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
  *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_map_runlist_nolock() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
+ * will perform the necessary mapping and unmapping.
+ *
+ * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
+ * restores it before returning.  Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry.  However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_map_runlist_nolock(), you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
  * Return 0 on success and -errno on error.  There is one special error code
  * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
  * of bounds of the runlist.
  *
- * Locking: - The runlist must be locked for writing.
- *	    - This function modifies the runlist.
+ * Note the runlist can be NULL after this function returns if @vcn is zero and
+ * the attribute has zero allocated size, i.e. there simply is no runlist.
+ *
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist will be modified.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
-int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
 {
 	VCN end_vcn;
+	unsigned long flags;
 	ntfs_inode *base_ni;
 	MFT_RECORD *m;
 	ATTR_RECORD *a;
-	ntfs_attr_search_ctx *ctx;
 	runlist_element *rl;
+	struct page *put_this_page = NULL;
 	int err = 0;
+	BOOL ctx_is_temporary, ctx_needs_reset;
+	ntfs_attr_search_ctx old_ctx = { NULL, };
 
 	ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
 			(unsigned long long)vcn);
@@ -62,20 +99,77 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m))
-		return PTR_ERR(m);
-	ctx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
+	if (!ctx) {
+		ctx_is_temporary = ctx_needs_reset = TRUE;
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+	} else {
+		VCN allocated_size_vcn;
+
+		BUG_ON(IS_ERR(ctx->mrec));
+		a = ctx->attr;
+		BUG_ON(!a->non_resident);
+		ctx_is_temporary = FALSE;
+		end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size_vcn = ni->allocated_size >>
+				ni->vol->cluster_size_bits;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
+			end_vcn = allocated_size_vcn - 1;
+		/*
+		 * If we already have the attribute extent containing @vcn in
+		 * @ctx, no need to look it up again.  We slightly cheat in
+		 * that if vcn exceeds the allocated size, we will refuse to
+		 * map the runlist below, so there is definitely no need to get
+		 * the right attribute extent.
+		 */
+		if (vcn >= allocated_size_vcn || (a->type == ni->type &&
+				a->name_length == ni->name_len &&
+				!memcmp((u8*)a + le16_to_cpu(a->name_offset),
+				ni->name, ni->name_len) &&
+				sle64_to_cpu(a->data.non_resident.lowest_vcn)
+				<= vcn && end_vcn >= vcn))
+			ctx_needs_reset = FALSE;
+		else {
+			/* Save the old search context. */
+			old_ctx = *ctx;
+			/*
+			 * If the currently mapped (extent) inode is not the
+			 * base inode we will unmap it when we reinitialize the
+			 * search context which means we need to get a
+			 * reference to the page containing the mapped mft
+			 * record so we do not accidentally drop changes to the
+			 * mft record when it has not been marked dirty yet.
+			 */
+			if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
+					old_ctx.base_ntfs_ino) {
+				put_this_page = old_ctx.ntfs_ino->page;
+				page_cache_get(put_this_page);
+			}
+			/*
+			 * Reinitialize the search context so we can lookup the
+			 * needed attribute extent.
+			 */
+			ntfs_attr_reinit_search_ctx(ctx);
+			ctx_needs_reset = TRUE;
+		}
 	}
-	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
-			CASE_SENSITIVE, vcn, NULL, 0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT)
-			err = -EIO;
-		goto err_out;
+	if (ctx_needs_reset) {
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, vcn, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			goto err_out;
+		}
+		BUG_ON(!ctx->attr->non_resident);
 	}
 	a = ctx->attr;
 	/*
@@ -85,8 +179,9 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 	 * ntfs_mapping_pairs_decompress() fails.
 	 */
 	end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
-	if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
-		end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
+	if (!a->data.non_resident.lowest_vcn && end_vcn == 1)
+		end_vcn = sle64_to_cpu(a->data.non_resident.allocated_size) >>
+				ni->vol->cluster_size_bits;
 	if (unlikely(vcn >= end_vcn)) {
 		err = -ENOENT;
 		goto err_out;
@@ -97,9 +192,93 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 	else
 		ni->runlist.rl = rl;
 err_out:
-	if (likely(ctx))
-		ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(base_ni);
+	if (ctx_is_temporary) {
+		if (likely(ctx))
+			ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+	} else if (ctx_needs_reset) {
+		/*
+		 * If there is no attribute list, restoring the search context
+		 * is acomplished simply by copying the saved context back over
+		 * the caller supplied context.  If there is an attribute list,
+		 * things are more complicated as we need to deal with mapping
+		 * of mft records and resulting potential changes in pointers.
+		 */
+		if (NInoAttrList(base_ni)) {
+			/*
+			 * If the currently mapped (extent) inode is not the
+			 * one we had before, we need to unmap it and map the
+			 * old one.
+			 */
+			if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
+				/*
+				 * If the currently mapped inode is not the
+				 * base inode, unmap it.
+				 */
+				if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
+						ctx->base_ntfs_ino) {
+					unmap_extent_mft_record(ctx->ntfs_ino);
+					ctx->mrec = ctx->base_mrec;
+					BUG_ON(!ctx->mrec);
+				}
+				/*
+				 * If the old mapped inode is not the base
+				 * inode, map it.
+				 */
+				if (old_ctx.base_ntfs_ino &&
+						old_ctx.ntfs_ino !=
+						old_ctx.base_ntfs_ino) {
+retry_map:
+					ctx->mrec = map_mft_record(
+							old_ctx.ntfs_ino);
+					/*
+					 * Something bad has happened.  If out
+					 * of memory retry till it succeeds.
+					 * Any other errors are fatal and we
+					 * return the error code in ctx->mrec.
+					 * Let the caller deal with it...  We
+					 * just need to fudge things so the
+					 * caller can reinit and/or put the
+					 * search context safely.
+					 */
+					if (IS_ERR(ctx->mrec)) {
+						if (PTR_ERR(ctx->mrec) ==
+								-ENOMEM) {
+							schedule();
+							goto retry_map;
+						} else
+							old_ctx.ntfs_ino =
+								old_ctx.
+								base_ntfs_ino;
+					}
+				}
+			}
+			/* Update the changed pointers in the saved context. */
+			if (ctx->mrec != old_ctx.mrec) {
+				if (!IS_ERR(ctx->mrec))
+					old_ctx.attr = (ATTR_RECORD*)(
+							(u8*)ctx->mrec +
+							((u8*)old_ctx.attr -
+							(u8*)old_ctx.mrec));
+				old_ctx.mrec = ctx->mrec;
+			}
+		}
+		/* Restore the search context to the saved one. */
+		*ctx = old_ctx;
+		/*
+		 * We drop the reference on the page we took earlier.  In the
+		 * case that IS_ERR(ctx->mrec) is true this means we might lose
+		 * some changes to the mft record that had been made between
+		 * the last time it was marked dirty/written out and now.  This
+		 * at this stage is not a problem as the mapping error is fatal
+		 * enough that the mft record cannot be written out anyway and
+		 * the caller is very likely to shutdown the whole inode
+		 * immediately and mark the volume dirty for chkdsk to pick up
+		 * the pieces anyway.
+		 */
+		if (put_this_page)
+			page_cache_release(put_this_page);
+	}
 	return err;
 }
 
@@ -115,8 +294,8 @@ err_out:
  * of bounds of the runlist.
  *
  * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- *	    - This function takes the runlist lock for writing and modifies the
- *	      runlist.
+ *	    - This function takes the runlist lock for writing and may modify
+ *	      the runlist.
  */
 int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 {
@@ -126,7 +305,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 	/* Make sure someone else didn't do the work while we were sleeping. */
 	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
 			LCN_RL_NOT_MAPPED))
-		err = ntfs_map_runlist_nolock(ni, vcn);
+		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 	up_write(&ni->runlist.lock);
 	return err;
 }
@@ -165,6 +344,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked)
 {
 	LCN lcn;
+	unsigned long flags;
 	BOOL is_retry = FALSE;
 
 	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
@@ -173,6 +353,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
+	if (!ni->runlist.rl) {
+		read_lock_irqsave(&ni->size_lock, flags);
+		if (!ni->allocated_size) {
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			return LCN_ENOENT;
+		}
+		read_unlock_irqrestore(&ni->size_lock, flags);
+	}
 retry_remap:
 	/* Convert vcn to lcn.  If that fails map the runlist and retry once. */
 	lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
@@ -196,7 +384,7 @@ retry_remap:
 				goto retry_remap;
 			}
 		}
-		err = ntfs_map_runlist_nolock(ni, vcn);
+		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 		if (!write_locked) {
 			up_write(&ni->runlist.lock);
 			down_read(&ni->runlist.lock);
@@ -220,9 +408,9 @@ retry_remap:
 
 /**
  * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
- * @ni:			ntfs inode describing the runlist to search
- * @vcn:		vcn to find
- * @write_locked:	true if the runlist is locked for writing
+ * @ni:		ntfs inode describing the runlist to search
+ * @vcn:	vcn to find
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Find the virtual cluster number @vcn in the runlist described by the ntfs
  * inode @ni and return the address of the runlist element containing the @vcn.
@@ -230,9 +418,22 @@ retry_remap:
  * If the @vcn is not mapped yet, the attempt is made to map the attribute
  * extent containing the @vcn and the vcn to lcn conversion is retried.
  *
- * If @write_locked is true the caller has locked the runlist for writing and
- * if false for reading.
- *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
+ * will perform the necessary mapping and unmapping.
+ *
+ * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
+ * restores it before returning.  Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry.  However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_attr_find_vcn_nolock(), you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
  * Note you need to distinguish between the lcn of the returned runlist element
  * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
  * read and allocate clusters on write.
@@ -247,24 +448,42 @@ retry_remap:
  *	-ENOMEM - Not enough memory to map runlist.
  *	-EIO	- Critical error (runlist/file is corrupt, i/o error, etc).
  *
- * Locking: - The runlist must be locked on entry and is left locked on return.
- *	    - If @write_locked is FALSE, i.e. the runlist is locked for reading,
- *	      the lock may be dropped inside the function so you cannot rely on
- *	      the runlist still being the same when this function returns.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
 runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
-		const BOOL write_locked)
+		ntfs_attr_search_ctx *ctx)
 {
+	unsigned long flags;
 	runlist_element *rl;
 	int err = 0;
 	BOOL is_retry = FALSE;
 
-	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
-			ni->mft_no, (unsigned long long)vcn,
-			write_locked ? "write" : "read");
+	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
+			ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
 	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
+	if (!ni->runlist.rl) {
+		read_lock_irqsave(&ni->size_lock, flags);
+		if (!ni->allocated_size) {
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			return ERR_PTR(-ENOENT);
+		}
+		read_unlock_irqrestore(&ni->size_lock, flags);
+	}
 retry_remap:
 	rl = ni->runlist.rl;
 	if (likely(rl && vcn >= rl[0].vcn)) {
@@ -287,33 +506,22 @@ retry_remap:
 	}
 	if (!err && !is_retry) {
 		/*
-		 * The @vcn is in an unmapped region, map the runlist and
-		 * retry.
+		 * If the search context is invalid we cannot map the unmapped
+		 * region.
 		 */
-		if (!write_locked) {
-			up_read(&ni->runlist.lock);
-			down_write(&ni->runlist.lock);
-			if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
-					LCN_RL_NOT_MAPPED)) {
-				up_write(&ni->runlist.lock);
-				down_read(&ni->runlist.lock);
+		if (IS_ERR(ctx->mrec))
+			err = PTR_ERR(ctx->mrec);
+		else {
+			/*
+			 * The @vcn is in an unmapped region, map the runlist
+			 * and retry.
+			 */
+			err = ntfs_map_runlist_nolock(ni, vcn, ctx);
+			if (likely(!err)) {
+				is_retry = TRUE;
 				goto retry_remap;
 			}
 		}
-		err = ntfs_map_runlist_nolock(ni, vcn);
-		if (!write_locked) {
-			up_write(&ni->runlist.lock);
-			down_read(&ni->runlist.lock);
-		}
-		if (likely(!err)) {
-			is_retry = TRUE;
-			goto retry_remap;
-		}
-		/*
-		 * -EINVAL coming from a failed mapping attempt is equivalent
-		 * to i/o error for us as it should not happen in our code
-		 * paths.
-		 */
 		if (err == -EINVAL)
 			err = -EIO;
 	} else if (!err)
@@ -528,6 +736,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
 	block_size_bits = sb->s_blocksize_bits;
 	down_read(&runlist->lock);
 	rl = runlist->rl;
+	if (!rl) {
+		ntfs_error(sb, "Cannot read attribute list since runlist is "
+				"missing.");
+		goto err_out;	
+	}
 	/* Read all clusters specified by the runlist one run at a time. */
 	while (rl->length) {
 		lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
@@ -981,6 +1194,7 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 	ntfs_inode *base_ni;
 
 	ntfs_debug("Entering.");
+	BUG_ON(IS_ERR(ctx->mrec));
 	if (ctx->base_ntfs_ino)
 		base_ni = ctx->base_ntfs_ino;
 	else
@@ -1197,7 +1411,7 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
  */
 int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
 {
-	if (type == AT_INDEX_ALLOCATION || type == AT_EA)
+	if (type == AT_INDEX_ALLOCATION)
 		return -EPERM;
 	return 0;
 }
@@ -1247,12 +1461,59 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
 }
 
 /**
+ * ntfs_resident_attr_value_resize - resize the value of a resident attribute
+ * @m:		mft record containing attribute record
+ * @a:		attribute record whose value to resize
+ * @new_size:	new size in bytes to which to resize the attribute value of @a
+ *
+ * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
+ * If the value is made bigger, the newly allocated space is cleared.
+ *
+ * Return 0 on success and -errno on error.  The following error codes are
+ * defined:
+ *	-ENOSPC	- Not enough space in the mft record @m to perform the resize.
+ *
+ * Note: On error, no modifications have been performed whatsoever.
+ *
+ * Warning: If you make a record smaller without having copied all the data you
+ *	    are interested in the data may be overwritten.
+ */
+int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+		const u32 new_size)
+{
+	u32 old_size;
+
+	/* Resize the resident part of the attribute record. */
+	if (ntfs_attr_record_resize(m, a,
+			le16_to_cpu(a->data.resident.value_offset) + new_size))
+		return -ENOSPC;
+	/*
+	 * The resize succeeded!  If we made the attribute value bigger, clear
+	 * the area between the old size and @new_size.
+	 */
+	old_size = le32_to_cpu(a->data.resident.value_length);
+	if (new_size > old_size)
+		memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
+				old_size, 0, new_size - old_size);
+	/* Finally update the length of the attribute value. */
+	a->data.resident.value_length = cpu_to_le32(new_size);
+	return 0;
+}
+
+/**
  * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
  * @ni:		ntfs inode describing the attribute to convert
+ * @data_size:	size of the resident data to copy to the non-resident attribute
  *
  * Convert the resident ntfs attribute described by the ntfs inode @ni to a
  * non-resident one.
  *
+ * @data_size must be equal to the attribute value size.  This is needed since
+ * we need to know the size before we can map the mft record and our callers
+ * always know it.  The reason we cannot simply read the size from the vfs
+ * inode i_size is that this is not necessarily uptodate.  This happens when
+ * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
+ *
  * Return 0 on success and -errno on error.  The following error return codes
  * are defined:
  *	-EPERM	- The attribute is not allowed to be non-resident.
@@ -1273,7 +1534,7 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
  *
  * Locking: - The caller must hold i_sem on the inode.
  */
-int ntfs_attr_make_non_resident(ntfs_inode *ni)
+int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 {
 	s64 new_size;
 	struct inode *vi = VFS_I(ni);
@@ -1302,14 +1563,18 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		return err;
 	}
 	/*
+	 * FIXME: Compressed and encrypted attributes are not supported when
+	 * writing and we should never have gotten here for them.
+	 */
+	BUG_ON(NInoCompressed(ni));
+	BUG_ON(NInoEncrypted(ni));
+	/*
 	 * The size needs to be aligned to a cluster boundary for allocation
 	 * purposes.
 	 */
-	new_size = (i_size_read(vi) + vol->cluster_size - 1) &
+	new_size = (data_size + vol->cluster_size - 1) &
 			~(vol->cluster_size - 1);
 	if (new_size > 0) {
-		runlist_element *rl2;
-
 		/*
 		 * Will need the page later and since the page lock nests
 		 * outside all ntfs locks, we need to get the page now.
@@ -1320,7 +1585,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 			return -ENOMEM;
 		/* Start by allocating clusters to hold the attribute value. */
 		rl = ntfs_cluster_alloc(vol, 0, new_size >>
-				vol->cluster_size_bits, -1, DATA_ZONE);
+				vol->cluster_size_bits, -1, DATA_ZONE, TRUE);
 		if (IS_ERR(rl)) {
 			err = PTR_ERR(rl);
 			ntfs_debug("Failed to allocate cluster%s, error code "
@@ -1329,12 +1594,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 					err);
 			goto page_err_out;
 		}
-		/* Change the runlist terminator to LCN_ENOENT. */
-		rl2 = rl;
-		while (rl2->length)
-			rl2++;
-		BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
-		rl2->lcn = LCN_ENOENT;
 	} else {
 		rl = NULL;
 		page = NULL;
@@ -1377,10 +1636,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	BUG_ON(a->non_resident);
 	/*
 	 * Calculate new offsets for the name and the mapping pairs array.
-	 * We assume the attribute is not compressed or sparse.
 	 */
-	name_ofs = (offsetof(ATTR_REC,
-			data.non_resident.compressed_size) + 7) & ~7;
+	if (NInoSparse(ni) || NInoCompressed(ni))
+		name_ofs = (offsetof(ATTR_REC,
+				data.non_resident.compressed_size) +
+				sizeof(a->data.non_resident.compressed_size) +
+				7) & ~7;
+	else
+		name_ofs = (offsetof(ATTR_REC,
+				data.non_resident.compressed_size) + 7) & ~7;
 	mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
 	/*
 	 * Determine the size of the resident part of the now non-resident
@@ -1392,7 +1656,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	 * attribute value.
 	 */
 	attr_size = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(attr_size != i_size_read(vi));
+	BUG_ON(attr_size != data_size);
 	if (page && !PageUptodate(page)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memcpy(kaddr, (u8*)a +
@@ -1419,24 +1683,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
 				a->name_length * sizeof(ntfschar));
 	a->name_offset = cpu_to_le16(name_ofs);
-	/*
-	 * FIXME: For now just clear all of these as we do not support them
-	 * when writing.
-	 */
-	a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
-			ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
 	/* Setup the fields specific to non-resident attributes. */
 	a->data.non_resident.lowest_vcn = 0;
 	a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
 			vol->cluster_size_bits);
 	a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
-	a->data.non_resident.compression_unit = 0;
 	memset(&a->data.non_resident.reserved, 0,
 			sizeof(a->data.non_resident.reserved));
 	a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
 	a->data.non_resident.data_size =
 			a->data.non_resident.initialized_size =
 			cpu_to_sle64(attr_size);
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		a->data.non_resident.compression_unit = 4;
+		a->data.non_resident.compressed_size =
+				a->data.non_resident.allocated_size;
+	} else
+		a->data.non_resident.compression_unit = 0;
 	/* Generate the mapping pairs array into the attribute record. */
 	err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
 			arec_size - mp_ofs, rl, 0, -1, NULL);
@@ -1446,16 +1709,21 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		goto undo_err_out;
 	}
 	/* Setup the in-memory attribute structure to be non-resident. */
-	/*
-	 * FIXME: For now just clear all of these as we do not support them
-	 * when writing.
-	 */
-	NInoClearSparse(ni);
-	NInoClearEncrypted(ni);
-	NInoClearCompressed(ni);
 	ni->runlist.rl = rl;
 	write_lock_irqsave(&ni->size_lock, flags);
 	ni->allocated_size = new_size;
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		ni->itype.compressed.size = ni->allocated_size;
+		ni->itype.compressed.block_size = 1U <<
+				(a->data.non_resident.compression_unit +
+				vol->cluster_size_bits);
+		ni->itype.compressed.block_size_bits =
+				ffs(ni->itype.compressed.block_size) - 1;
+		ni->itype.compressed.block_clusters = 1U <<
+				a->data.non_resident.compression_unit;
+		vi->i_blocks = ni->itype.compressed.size >> 9;
+	} else
+		vi->i_blocks = ni->allocated_size >> 9;
 	write_unlock_irqrestore(&ni->size_lock, flags);
 	/*
 	 * This needs to be last since the address space operations ->readpage
@@ -1569,6 +1837,640 @@ page_err_out:
 }
 
 /**
+ * ntfs_attr_extend_allocation - extend the allocated space of an attribute
+ * @ni:			ntfs inode of the attribute whose allocation to extend
+ * @new_alloc_size:	new size in bytes to which to extend the allocation to
+ * @new_data_size:	new size in bytes to which to extend the data to
+ * @data_start:		beginning of region which is required to be non-sparse
+ *
+ * Extend the allocated space of an attribute described by the ntfs inode @ni
+ * to @new_alloc_size bytes.  If @data_start is -1, the whole extension may be
+ * implemented as a hole in the file (as long as both the volume and the ntfs
+ * inode @ni have sparse support enabled).  If @data_start is >= 0, then the
+ * region between the old allocated size and @data_start - 1 may be made sparse
+ * but the regions between @data_start and @new_alloc_size must be backed by
+ * actual clusters.
+ *
+ * If @new_data_size is -1, it is ignored.  If it is >= 0, then the data size
+ * of the attribute is extended to @new_data_size.  Note that the i_size of the
+ * vfs inode is not updated.  Only the data size in the base attribute record
+ * is updated.  The caller has to update i_size separately if this is required.
+ * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
+ * size as well as for @new_data_size to be greater than @new_alloc_size.
+ *
+ * For resident attributes this involves resizing the attribute record and if
+ * necessary moving it and/or other attributes into extent mft records and/or
+ * converting the attribute to a non-resident attribute which in turn involves
+ * extending the allocation of a non-resident attribute as described below.
+ *
+ * For non-resident attributes this involves allocating clusters in the data
+ * zone on the volume (except for regions that are being made sparse) and
+ * extending the run list to describe the allocated clusters as well as
+ * updating the mapping pairs array of the attribute.  This in turn involves
+ * resizing the attribute record and if necessary moving it and/or other
+ * attributes into extent mft records and/or splitting the attribute record
+ * into multiple extent attribute records.
+ *
+ * Also, the attribute list attribute is updated if present and in some of the
+ * above cases (the ones where extent mft records/attributes come into play),
+ * an attribute list attribute is created if not already present.
+ *
+ * Return the new allocated size on success and -errno on error.  In the case
+ * that an error is encountered but a partial extension at least up to
+ * @data_start (if present) is possible, the allocation is partially extended
+ * and this is returned.  This means the caller must check the returned size to
+ * determine if the extension was partial.  If @data_start is -1 then partial
+ * allocations are not performed.
+ *
+ * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
+ *
+ * Locking: This function takes the runlist lock of @ni for writing as well as
+ * locking the mft record of the base ntfs inode.  These locks are maintained
+ * throughout execution of the function.  These locks are required so that the
+ * attribute can be resized safely and so that it can for example be converted
+ * from resident to non-resident safely.
+ *
+ * TODO: At present attribute list attribute handling is not implemented.
+ *
+ * TODO: At present it is not safe to call this function for anything other
+ * than the $DATA attribute(s) of an uncompressed and unencrypted file.
+ */
+s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+		const s64 new_data_size, const s64 data_start)
+{
+	VCN vcn;
+	s64 ll, allocated_size, start = data_start;
+	struct inode *vi = VFS_I(ni);
+	ntfs_volume *vol = ni->vol;
+	ntfs_inode *base_ni;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	ntfs_attr_search_ctx *ctx;
+	runlist_element *rl, *rl2;
+	unsigned long flags;
+	int err, mp_size;
+	u32 attr_len = 0; /* Silence stupid gcc warning. */
+	BOOL mp_rebuilt;
+
+#ifdef NTFS_DEBUG
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"old_allocated_size 0x%llx, "
+			"new_allocated_size 0x%llx, new_data_size 0x%llx, "
+			"data_start 0x%llx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)allocated_size,
+			(unsigned long long)new_alloc_size,
+			(unsigned long long)new_data_size,
+			(unsigned long long)start);
+#endif
+retry_extend:
+	/*
+	 * For non-resident attributes, @start and @new_size need to be aligned
+	 * to cluster boundaries for allocation purposes.
+	 */
+	if (NInoNonResident(ni)) {
+		if (start > 0)
+			start &= ~(s64)vol->cluster_size_mask;
+		new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
+				~(s64)vol->cluster_size_mask;
+	}
+	BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
+	/* Check if new size is allowed in $AttrDef. */
+	err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
+	if (unlikely(err)) {
+		/* Only emit errors when the write will fail completely. */
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (start < 0 || start >= allocated_size) {
+			if (err == -ERANGE) {
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because the new "
+						"allocation would exceed the "
+						"maximum allowed size for "
+						"this attribute type.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+			} else {
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because this "
+						"attribute type is not "
+						"defined on the NTFS volume.  "
+						"Possible corruption!  You "
+						"should run chkdsk!",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+			}
+		}
+		/* Translate error code to be POSIX conformant for write(2). */
+		if (err == -ERANGE)
+			err = -EFBIG;
+		else
+			err = -EIO;
+		return err;
+	}
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/*
+	 * We will be modifying both the runlist (if non-resident) and the mft
+	 * record so lock them both down.
+	 */
+	down_write(&ni->runlist.lock);
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * If non-resident, seek to the last extent.  If resident, there is
+	 * only one extent, so seek to that.
+	 */
+	vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
+			0;
+	/*
+	 * Abort if someone did the work whilst we waited for the locks.  If we
+	 * just converted the attribute from resident to non-resident it is
+	 * likely that exactly this has happened already.  We cannot quite
+	 * abort if we need to update the data size.
+	 */
+	if (unlikely(new_alloc_size <= allocated_size)) {
+		ntfs_debug("Allocated size already exceeds requested size.");
+		new_alloc_size = allocated_size;
+		if (new_data_size < 0)
+			goto done;
+		/*
+		 * We want the first attribute extent so that we can update the
+		 * data size.
+		 */
+		vcn = 0;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, vcn, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	/* Use goto to reduce indentation. */
+	if (a->non_resident)
+		goto do_non_resident_extend;
+	BUG_ON(NInoNonResident(ni));
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	/*
+	 * Extend the attribute record to be able to store the new attribute
+	 * size.  ntfs_attr_record_resize() will not do anything if the size is
+	 * not changing.
+	 */
+	if (new_alloc_size < vol->mft_record_size &&
+			!ntfs_attr_record_resize(m, a,
+			le16_to_cpu(a->data.resident.value_offset) +
+			new_alloc_size)) {
+		/* The resize succeeded! */
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->allocated_size = le32_to_cpu(a->length) -
+				le16_to_cpu(a->data.resident.value_offset);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		if (new_data_size >= 0) {
+			BUG_ON(new_data_size < attr_len);
+			a->data.resident.value_length =
+					cpu_to_le32((u32)new_data_size);
+		}
+		goto flush_done;
+	}
+	/*
+	 * We have to drop all the locks so we can call
+	 * ntfs_attr_make_non_resident().  This could be optimised by try-
+	 * locking the first page cache page and only if that fails dropping
+	 * the locks, locking the page, and redoing all the locking and
+	 * lookups.  While this would be a huge optimisation, it is not worth
+	 * it as this is definitely a slow code path.
+	 */
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	/*
+	 * Not enough space in the mft record, try to make the attribute
+	 * non-resident and if successful restart the extension process.
+	 */
+	err = ntfs_attr_make_non_resident(ni, attr_len);
+	if (likely(!err))
+		goto retry_extend;
+	/*
+	 * Could not make non-resident.  If this is due to this not being
+	 * permitted for this attribute type or there not being enough space,
+	 * try to make other attributes non-resident.  Otherwise fail.
+	 */
+	if (unlikely(err != -EPERM && err != -ENOSPC)) {
+		/* Only emit errors when the write will fail completely. */
+		read_lock_irqsave(&ni->size_lock, flags);
+		allocated_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the conversion from resident "
+					"to non-resident attribute failed "
+					"with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		goto conv_err_out;
+	}
+	/* TODO: Not implemented from here, abort. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	allocated_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (start < 0 || start >= allocated_size) {
+		if (err == -ENOSPC)
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record/on disk for the non-resident "
+					"attribute value.  This case is not "
+					"implemented yet.");
+		else /* if (err == -EPERM) */
+			ntfs_error(vol->sb, "This attribute type may not be "
+					"non-resident.  This case is not "
+					"implemented yet.");
+	}
+	err = -EOPNOTSUPP;
+	goto conv_err_out;
+#if 0
+	// TODO: Attempt to make other attributes non-resident.
+	if (!err)
+		goto do_resident_extend;
+	/*
+	 * Both the attribute list attribute and the standard information
+	 * attribute must remain in the base inode.  Thus, if this is one of
+	 * these attributes, we have to try to move other attributes out into
+	 * extent mft records instead.
+	 */
+	if (ni->type == AT_ATTRIBUTE_LIST ||
+			ni->type == AT_STANDARD_INFORMATION) {
+		// TODO: Attempt to move other attributes into extent mft
+		// records.
+		err = -EOPNOTSUPP;
+		if (!err)
+			goto do_resident_extend;
+		goto err_out;
+	}
+	// TODO: Attempt to move this attribute to an extent mft record, but
+	// only if it is not already the only attribute in an mft record in
+	// which case there would be nothing to gain.
+	err = -EOPNOTSUPP;
+	if (!err)
+		goto do_resident_extend;
+	/* There is nothing we can do to make enough space. )-: */
+	goto err_out;
+#endif
+do_non_resident_extend:
+	BUG_ON(!NInoNonResident(ni));
+	if (new_alloc_size == allocated_size) {
+		BUG_ON(vcn);
+		goto alloc_done;
+	}
+	/*
+	 * If the data starts after the end of the old allocation, this is a
+	 * $DATA attribute and sparse attributes are enabled on the volume and
+	 * for this inode, then create a sparse region between the old
+	 * allocated size and the start of the data.  Otherwise simply proceed
+	 * with filling the whole space between the old allocated size and the
+	 * new allocated size with clusters.
+	 */
+	if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
+			!NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
+		goto skip_sparse;
+	// TODO: This is not implemented yet.  We just fill in with real
+	// clusters for now...
+	ntfs_debug("Inserting holes is not-implemented yet.  Falling back to "
+			"allocating real clusters instead.");
+skip_sparse:
+	rl = ni->runlist.rl;
+	if (likely(rl)) {
+		/* Seek to the end of the runlist. */
+		while (rl->length)
+			rl++;
+	}
+	/* If this attribute extent is not mapped, map it now. */
+	if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
+			(rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
+			(rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
+		if (!rl && !allocated_size)
+			goto first_alloc;
+		rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
+		if (IS_ERR(rl)) {
+			err = PTR_ERR(rl);
+			if (start < 0 || start >= allocated_size)
+				ntfs_error(vol->sb, "Cannot extend allocation "
+						"of inode 0x%lx, attribute "
+						"type 0x%x, because the "
+						"mapping of a runlist "
+						"fragment failed with error "
+						"code %i.", vi->i_ino,
+						(unsigned)le32_to_cpu(ni->type),
+						err);
+			if (err != -ENOMEM)
+				err = -EIO;
+			goto err_out;
+		}
+		ni->runlist.rl = rl;
+		/* Seek to the end of the runlist. */
+		while (rl->length)
+			rl++;
+	}
+	/*
+	 * We now know the runlist of the last extent is mapped and @rl is at
+	 * the end of the runlist.  We want to begin allocating clusters
+	 * starting at the last allocated cluster to reduce fragmentation.  If
+	 * there are no valid LCNs in the attribute we let the cluster
+	 * allocator choose the starting cluster.
+	 */
+	/* If the last LCN is a hole or simillar seek back to last real LCN. */
+	while (rl->lcn < 0 && rl > ni->runlist.rl)
+		rl--;
+first_alloc:
+	// FIXME: Need to implement partial allocations so at least part of the
+	// write can be performed when start >= 0.  (Needed for POSIX write(2)
+	// conformance.)
+	rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
+			(new_alloc_size - allocated_size) >>
+			vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
+			rl->lcn + rl->length : -1, DATA_ZONE, TRUE);
+	if (IS_ERR(rl2)) {
+		err = PTR_ERR(rl2);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the allocation of clusters "
+					"failed with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM && err != -ENOSPC)
+			err = -EIO;
+		goto err_out;
+	}
+	rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+	if (IS_ERR(rl)) {
+		err = PTR_ERR(rl);
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because the runlist merge failed "
+					"with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		if (ntfs_cluster_free_from_rl(vol, rl2)) {
+			ntfs_error(vol->sb, "Failed to release allocated "
+					"cluster(s) in error code path.  Run "
+					"chkdsk to recover the lost "
+					"cluster(s).");
+			NVolSetErrors(vol);
+		}
+		ntfs_free(rl2);
+		goto err_out;
+	}
+	ni->runlist.rl = rl;
+	ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
+			allocated_size) >> vol->cluster_size_bits);
+	/* Find the runlist element with which the attribute extent starts. */
+	ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+	rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
+	BUG_ON(!rl2);
+	BUG_ON(!rl2->length);
+	BUG_ON(rl2->lcn < LCN_HOLE);
+	mp_rebuilt = FALSE;
+	/* Get the size for the new mapping pairs array for this extent. */
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
+	if (unlikely(mp_size <= 0)) {
+		err = mp_size;
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because determining the size for the "
+					"mapping pairs failed with error code "
+					"%i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		err = -EIO;
+		goto undo_alloc;
+	}
+	/* Extend the attribute record to fit the bigger mapping pairs array. */
+	attr_len = le32_to_cpu(a->length);
+	err = ntfs_attr_record_resize(m, a, mp_size +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+	if (unlikely(err)) {
+		BUG_ON(err != -ENOSPC);
+		// TODO: Deal with this by moving this extent to a new mft
+		// record or by starting a new extent in a new mft record,
+		// possibly by extending this extent partially and filling it
+		// and creating a new extent for the remainder, or by making
+		// other attributes non-resident and/or by moving other
+		// attributes out of this mft record.
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record for the extended attribute "
+					"record.  This case is not "
+					"implemented yet.");
+		err = -EOPNOTSUPP;
+		goto undo_alloc;
+	}
+	mp_rebuilt = TRUE;
+	/* Generate the mapping pairs array directly into the attr record. */
+	err = ntfs_mapping_pairs_build(vol, (u8*)a +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+			mp_size, rl2, ll, -1, NULL);
+	if (unlikely(err)) {
+		if (start < 0 || start >= allocated_size)
+			ntfs_error(vol->sb, "Cannot extend allocation of "
+					"inode 0x%lx, attribute type 0x%x, "
+					"because building the mapping pairs "
+					"failed with error code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+		err = -EIO;
+		goto undo_alloc;
+	}
+	/* Update the highest_vcn. */
+	a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+			vol->cluster_size_bits) - 1);
+	/*
+	 * We now have extended the allocated size of the attribute.  Reflect
+	 * this in the ntfs_inode structure and the attribute record.
+	 */
+	if (a->data.non_resident.lowest_vcn) {
+		/*
+		 * We are not in the first attribute extent, switch to it, but
+		 * first ensure the changes will make it to disk later.
+		 */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		ntfs_attr_reinit_search_ctx(ctx);
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, 0, NULL, 0, ctx);
+		if (unlikely(err))
+			goto restore_undo_alloc;
+		/* @m is not used any more so no need to set it. */
+		a = ctx->attr;
+	}
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_alloc_size;
+	a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+	/*
+	 * FIXME: This would fail if @ni is a directory, $MFT, or an index,
+	 * since those can have sparse/compressed set.  For example can be
+	 * set compressed even though it is not compressed itself and in that
+	 * case the bit means that files are to be created compressed in the
+	 * directory...  At present this is ok as this code is only called for
+	 * regular files, and only for their $DATA attribute(s).
+	 * FIXME: The calculation is wrong if we created a hole above.  For now
+	 * it does not matter as we never create holes.
+	 */
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		ni->itype.compressed.size += new_alloc_size - allocated_size;
+		a->data.non_resident.compressed_size =
+				cpu_to_sle64(ni->itype.compressed.size);
+		vi->i_blocks = ni->itype.compressed.size >> 9;
+	} else
+		vi->i_blocks = new_alloc_size >> 9;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+alloc_done:
+	if (new_data_size >= 0) {
+		BUG_ON(new_data_size <
+				sle64_to_cpu(a->data.non_resident.data_size));
+		a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
+	}
+flush_done:
+	/* Ensure the changes make it to disk. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+done:
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	ntfs_debug("Done, new_allocated_size 0x%llx.",
+			(unsigned long long)new_alloc_size);
+	return new_alloc_size;
+restore_undo_alloc:
+	if (start < 0 || start >= allocated_size)
+		ntfs_error(vol->sb, "Cannot complete extension of allocation "
+				"of inode 0x%lx, attribute type 0x%x, because "
+				"lookup of first attribute extent failed with "
+				"error code %i.", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), err);
+	if (err == -ENOENT)
+		err = -EIO;
+	ntfs_attr_reinit_search_ctx(ctx);
+	if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
+			allocated_size >> vol->cluster_size_bits, NULL, 0,
+			ctx)) {
+		ntfs_error(vol->sb, "Failed to find last attribute extent of "
+				"attribute in error code path.  Run chkdsk to "
+				"recover.");
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->allocated_size = new_alloc_size;
+		/*
+		 * FIXME: This would fail if @ni is a directory...  See above.
+		 * FIXME: The calculation is wrong if we created a hole above.
+		 * For now it does not matter as we never create holes.
+		 */
+		if (NInoSparse(ni) || NInoCompressed(ni)) {
+			ni->itype.compressed.size += new_alloc_size -
+					allocated_size;
+			vi->i_blocks = ni->itype.compressed.size >> 9;
+		} else
+			vi->i_blocks = new_alloc_size >> 9;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+		up_write(&ni->runlist.lock);
+		/*
+		 * The only thing that is now wrong is the allocated size of the
+		 * base attribute extent which chkdsk should be able to fix.
+		 */
+		NVolSetErrors(vol);
+		return err;
+	}
+	ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
+			(allocated_size >> vol->cluster_size_bits) - 1);
+undo_alloc:
+	ll = allocated_size >> vol->cluster_size_bits;
+	if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
+		ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
+				"in error code path.  Run chkdsk to recover "
+				"the lost cluster(s).");
+		NVolSetErrors(vol);
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	/*
+	 * If the runlist truncation fails and/or the search context is no
+	 * longer valid, we cannot resize the attribute record or build the
+	 * mapping pairs array thus we mark the inode bad so that no access to
+	 * the freed clusters can happen.
+	 */
+	if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
+		ntfs_error(vol->sb, "Failed to %s in error code path.  Run "
+				"chkdsk to recover.", IS_ERR(m) ?
+				"restore attribute search context" :
+				"truncate attribute runlist");
+		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
+	} else if (mp_rebuilt) {
+		if (ntfs_attr_record_resize(m, a, attr_len)) {
+			ntfs_error(vol->sb, "Failed to restore attribute "
+					"record in error code path.  Run "
+					"chkdsk to recover.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+					a->data.non_resident.
+					mapping_pairs_offset), attr_len -
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), rl2, ll, -1,
+					NULL)) {
+				ntfs_error(vol->sb, "Failed to restore "
+						"mapping pairs array in error "
+						"code path.  Run chkdsk to "
+						"recover.");
+				make_bad_inode(vi);
+				make_bad_inode(VFS_I(base_ni));
+				NVolSetErrors(vol);
+			}
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+		}
+	}
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+conv_err_out:
+	ntfs_debug("Failed.  Returning error code %i.", err);
+	return err;
+}
+
+/**
  * ntfs_attr_set - fill (a part of) an attribute with a byte
  * @ni:		ntfs inode describing the attribute to fill
  * @ofs:	offset inside the attribute at which to start to fill
@@ -1603,6 +2505,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 	BUG_ON(cnt < 0);
 	if (!cnt)
 		goto done;
+	/*
+	 * FIXME: Compressed and encrypted attributes are not supported when
+	 * writing and we should never have gotten here for them.
+	 */
+	BUG_ON(NInoCompressed(ni));
+	BUG_ON(NInoEncrypted(ni));
 	mapping = VFS_I(ni)->i_mapping;
 	/* Work out the starting index and page offset. */
 	idx = ofs >> PAGE_CACHE_SHIFT;
@@ -1684,6 +2592,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		/* Finally unlock and release the page. */
 		unlock_page(page);
 		page_cache_release(page);
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
 	}
 	/* If there is a last partial page, need to do it the slow way. */
 	if (end_ofs) {
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0e4ac6d3c0e..9074886b44b 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -60,14 +60,15 @@ typedef struct {
 	ATTR_RECORD *base_attr;
 } ntfs_attr_search_ctx;
 
-extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
+		ntfs_attr_search_ctx *ctx);
 extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
 
 extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked);
 
 extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
-		const VCN vcn, const BOOL write_locked);
+		const VCN vcn, ntfs_attr_search_ctx *ctx);
 
 int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 		const u32 name_len, const IGNORE_CASE_BOOL ic,
@@ -99,8 +100,13 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
 		const ATTR_TYPE type);
 
 extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
+extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+		const u32 new_size);
+
+extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
 
-extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
+extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+		const s64 new_data_size, const s64 data_start);
 
 extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
 		const u8 val);
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
index 12cf2e30c7d..7a190cdc60e 100644
--- a/fs/ntfs/bitmap.c
+++ b/fs/ntfs/bitmap.c
@@ -1,7 +1,7 @@
 /*
  * bitmap.c - NTFS kernel bitmap handling.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -90,7 +90,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
 	/* If the first byte is partial, modify the appropriate bits in it. */
 	if (bit) {
 		u8 *byte = kaddr + pos;
-		while ((bit & 7) && cnt--) {
+		while ((bit & 7) && cnt) {
+			cnt--;
 			if (value)
 				*byte |= 1 << bit++;
 			else
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6d265cfd49a..25d24106f89 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page)
 	if (unlikely(!pages || !bhs)) {
 		kfree(bhs);
 		kfree(pages);
-		SetPageError(page);
 		unlock_page(page);
 		ntfs_error(vol->sb, "Failed to allocate internal buffers.");
 		return -ENOMEM;
@@ -871,9 +870,6 @@ lock_retry_remap:
 			for (; prev_cur_page < cur_page; prev_cur_page++) {
 				page = pages[prev_cur_page];
 				if (page) {
-					if (prev_cur_page == xpage &&
-							!xpage_done)
-						SetPageError(page);
 					flush_dcache_page(page);
 					kunmap(page);
 					unlock_page(page);
@@ -904,8 +900,6 @@ lock_retry_remap:
 					"Terminating them with extreme "
 					"prejudice.  Inode 0x%lx, page index "
 					"0x%lx.", ni->mft_no, page->index);
-			if (cur_page == xpage && !xpage_done)
-				SetPageError(page);
 			flush_dcache_page(page);
 			kunmap(page);
 			unlock_page(page);
@@ -953,8 +947,6 @@ err_out:
 	for (i = cur_page; i < max_page; i++) {
 		page = pages[i];
 		if (page) {
-			if (i == xpage && !xpage_done)
-				SetPageError(page);
 			flush_dcache_page(page);
 			kunmap(page);
 			unlock_page(page);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 46779471c54..795c3d1930f 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
 			ie->key.file_name.file_name_length, &name,
 			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
 	if (name_len <= 0) {
-		ntfs_debug("Skipping unrepresentable file.");
+		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
+				(long long)MREF_LE(ie->data.dir.indexed_file));
 		return 0;
 	}
 	if (ie->key.file_name.file_attributes &
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e0f530ce6b9..72753389181 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
 /*
- * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
+ * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -19,11 +19,24 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
 
+#include "attrib.h"
+#include "bitmap.h"
 #include "inode.h"
 #include "debug.h"
+#include "lcnalloc.h"
+#include "malloc.h"
+#include "mft.h"
 #include "ntfs.h"
 
 /**
@@ -56,6 +69,2185 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
 #ifdef NTFS_RW
 
 /**
+ * ntfs_attr_extend_initialized - extend the initialized size of an attribute
+ * @ni:			ntfs inode of the attribute to extend
+ * @new_init_size:	requested new initialized size in bytes
+ * @cached_page:	store any allocated but unused page here
+ * @lru_pvec:		lru-buffering pagevec of the caller
+ *
+ * Extend the initialized size of an attribute described by the ntfs inode @ni
+ * to @new_init_size bytes.  This involves zeroing any non-sparse space between
+ * the old initialized size and @new_init_size both in the page cache and on
+ * disk (if relevant complete pages are already uptodate in the page cache then
+ * these are simply marked dirty).
+ *
+ * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
+ * in the resident attribute case, it is tied to the initialized size and, in
+ * the non-resident attribute case, it may not fall below the initialized size.
+ *
+ * Note that if the attribute is resident, we do not need to touch the page
+ * cache at all.  This is because if the page cache page is not uptodate we
+ * bring it uptodate later, when doing the write to the mft record since we
+ * then already have the page mapped.  And if the page is uptodate, the
+ * non-initialized region will already have been zeroed when the page was
+ * brought uptodate and the region may in fact already have been overwritten
+ * with new data via mmap() based writes, so we cannot just zero it.  And since
+ * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
+ * is unspecified, we choose not to do zeroing and thus we do not need to touch
+ * the page at all.  For a more detailed explanation see ntfs_truncate() in
+ * fs/ntfs/inode.c.
+ *
+ * @cached_page and @lru_pvec are just optimizations for dealing with multiple
+ * pages.
+ *
+ * Return 0 on success and -errno on error.  In the case that an error is
+ * encountered it is possible that the initialized size will already have been
+ * incremented some way towards @new_init_size but it is guaranteed that if
+ * this is the case, the necessary zeroing will also have happened and that all
+ * metadata is self-consistent.
+ *
+ * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be
+ *	    held by the caller.
+ */
+static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
+		struct page **cached_page, struct pagevec *lru_pvec)
+{
+	s64 old_init_size;
+	loff_t old_i_size;
+	pgoff_t index, end_index;
+	unsigned long flags;
+	struct inode *vi = VFS_I(ni);
+	ntfs_inode *base_ni;
+	MFT_RECORD *m = NULL;
+	ATTR_RECORD *a;
+	ntfs_attr_search_ctx *ctx = NULL;
+	struct address_space *mapping;
+	struct page *page = NULL;
+	u8 *kattr;
+	int err;
+	u32 attr_len;
+
+	read_lock_irqsave(&ni->size_lock, flags);
+	old_init_size = ni->initialized_size;
+	old_i_size = i_size_read(vi);
+	BUG_ON(new_init_size > ni->allocated_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"old_initialized_size 0x%llx, "
+			"new_initialized_size 0x%llx, i_size 0x%llx.",
+			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)old_init_size,
+			(unsigned long long)new_init_size, old_i_size);
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/* Use goto to reduce indentation and we need the label below anyway. */
+	if (NInoNonResident(ni))
+		goto do_non_resident_extend;
+	BUG_ON(old_init_size != old_i_size);
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	BUG_ON(a->non_resident);
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	BUG_ON(old_i_size != (loff_t)attr_len);
+	/*
+	 * Do the zeroing in the mft record and update the attribute size in
+	 * the mft record.
+	 */
+	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+	memset(kattr + attr_len, 0, new_init_size - attr_len);
+	a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
+	/* Finally, update the sizes in the vfs and ntfs inodes. */
+	write_lock_irqsave(&ni->size_lock, flags);
+	i_size_write(vi, new_init_size);
+	ni->initialized_size = new_init_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	goto done;
+do_non_resident_extend:
+	/*
+	 * If the new initialized size @new_init_size exceeds the current file
+	 * size (vfs inode->i_size), we need to extend the file size to the
+	 * new initialized size.
+	 */
+	if (new_init_size > old_i_size) {
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m)) {
+			err = PTR_ERR(m);
+			m = NULL;
+			goto err_out;
+		}
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, 0, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			goto err_out;
+		}
+		m = ctx->mrec;
+		a = ctx->attr;
+		BUG_ON(!a->non_resident);
+		BUG_ON(old_i_size != (loff_t)
+				sle64_to_cpu(a->data.non_resident.data_size));
+		a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		/* Update the file size in the vfs inode. */
+		i_size_write(vi, new_init_size);
+		ntfs_attr_put_search_ctx(ctx);
+		ctx = NULL;
+		unmap_mft_record(base_ni);
+		m = NULL;
+	}
+	mapping = vi->i_mapping;
+	index = old_init_size >> PAGE_CACHE_SHIFT;
+	end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	do {
+		/*
+		 * Read the page.  If the page is not present, this will zero
+		 * the uninitialized regions for us.
+		 */
+		page = read_cache_page(mapping, index,
+				(filler_t*)mapping->a_ops->readpage, NULL);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto init_err_out;
+		}
+		wait_on_page_locked(page);
+		if (unlikely(!PageUptodate(page) || PageError(page))) {
+			page_cache_release(page);
+			err = -EIO;
+			goto init_err_out;
+		}
+		/*
+		 * Update the initialized size in the ntfs inode.  This is
+		 * enough to make ntfs_writepage() work.
+		 */
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->initialized_size = (index + 1) << PAGE_CACHE_SHIFT;
+		if (ni->initialized_size > new_init_size)
+			ni->initialized_size = new_init_size;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		/* Set the page dirty so it gets written out. */
+		set_page_dirty(page);
+		page_cache_release(page);
+		/*
+		 * Play nice with the vm and the rest of the system.  This is
+		 * very much needed as we can potentially be modifying the
+		 * initialised size from a very small value to a really huge
+		 * value, e.g.
+		 *	f = open(somefile, O_TRUNC);
+		 *	truncate(f, 10GiB);
+		 *	seek(f, 10GiB);
+		 *	write(f, 1);
+		 * And this would mean we would be marking dirty hundreds of
+		 * thousands of pages or as in the above example more than
+		 * two and a half million pages!
+		 *
+		 * TODO: For sparse pages could optimize this workload by using
+		 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit.  This
+		 * would be set in readpage for sparse pages and here we would
+		 * not need to mark dirty any pages which have this bit set.
+		 * The only caveat is that we have to clear the bit everywhere
+		 * where we allocate any clusters that lie in the page or that
+		 * contain the page.
+		 *
+		 * TODO: An even greater optimization would be for us to only
+		 * call readpage() on pages which are not in sparse regions as
+		 * determined from the runlist.  This would greatly reduce the
+		 * number of pages we read and make dirty in the case of sparse
+		 * files.
+		 */
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+	} while (++index < end_index);
+	read_lock_irqsave(&ni->size_lock, flags);
+	BUG_ON(ni->initialized_size != new_init_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/* Now bring in sync the initialized_size in the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		goto init_err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto init_err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto init_err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	BUG_ON(!a->non_resident);
+	a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
+done:
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
+			(unsigned long long)new_init_size, i_size_read(vi));
+	return 0;
+init_err_out:
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->initialized_size = old_init_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_debug("Failed.  Returning error code %i.", err);
+	return err;
+}
+
+/**
+ * ntfs_fault_in_pages_readable -
+ *
+ * Fault a number of userspace pages into pagetables.
+ *
+ * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
+ * with more than two userspace pages as well as handling the single page case
+ * elegantly.
+ *
+ * If you find this difficult to understand, then think of the while loop being
+ * the following code, except that we do without the integer variable ret:
+ *
+ *	do {
+ *		ret = __get_user(c, uaddr);
+ *		uaddr += PAGE_SIZE;
+ *	} while (!ret && uaddr < end);
+ *
+ * Note, the final __get_user() may well run out-of-bounds of the user buffer,
+ * but _not_ out-of-bounds of the page the user buffer belongs to, and since
+ * this is only a read and not a write, and since it is still in the same page,
+ * it should not matter and this makes the code much simpler.
+ */
+static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
+		int bytes)
+{
+	const char __user *end;
+	volatile char c;
+
+	/* Set @end to the first byte outside the last page we care about. */
+	end = (const char __user*)PAGE_ALIGN((ptrdiff_t __user)uaddr + bytes);
+
+	while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
+		;
+}
+
+/**
+ * ntfs_fault_in_pages_readable_iovec -
+ *
+ * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
+ */
+static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
+		size_t iov_ofs, int bytes)
+{
+	do {
+		const char __user *buf;
+		unsigned len;
+
+		buf = iov->iov_base + iov_ofs;
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		ntfs_fault_in_pages_readable(buf, len);
+		bytes -= len;
+		iov++;
+		iov_ofs = 0;
+	} while (bytes);
+}
+
+/**
+ * __ntfs_grab_cache_pages - obtain a number of locked pages
+ * @mapping:	address space mapping from which to obtain page cache pages
+ * @index:	starting index in @mapping at which to begin obtaining pages
+ * @nr_pages:	number of page cache pages to obtain
+ * @pages:	array of pages in which to return the obtained page cache pages
+ * @cached_page: allocated but as yet unused page
+ * @lru_pvec:	lru-buffering pagevec of caller
+ *
+ * Obtain @nr_pages locked page cache pages from the mapping @maping and
+ * starting at index @index.
+ *
+ * If a page is newly created, increment its refcount and add it to the
+ * caller's lru-buffering pagevec @lru_pvec.
+ *
+ * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages
+ * are obtained at once instead of just one page and that 0 is returned on
+ * success and -errno on error.
+ *
+ * Note, the page locks are obtained in ascending page index order.
+ */
+static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
+		pgoff_t index, const unsigned nr_pages, struct page **pages,
+		struct page **cached_page, struct pagevec *lru_pvec)
+{
+	int err, nr;
+
+	BUG_ON(!nr_pages);
+	err = nr = 0;
+	do {
+		pages[nr] = find_lock_page(mapping, index);
+		if (!pages[nr]) {
+			if (!*cached_page) {
+				*cached_page = page_cache_alloc(mapping);
+				if (unlikely(!*cached_page)) {
+					err = -ENOMEM;
+					goto err_out;
+				}
+			}
+			err = add_to_page_cache(*cached_page, mapping, index,
+					GFP_KERNEL);
+			if (unlikely(err)) {
+				if (err == -EEXIST)
+					continue;
+				goto err_out;
+			}
+			pages[nr] = *cached_page;
+			page_cache_get(*cached_page);
+			if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
+				__pagevec_lru_add(lru_pvec);
+			*cached_page = NULL;
+		}
+		index++;
+		nr++;
+	} while (nr < nr_pages);
+out:
+	return err;
+err_out:
+	while (nr > 0) {
+		unlock_page(pages[--nr]);
+		page_cache_release(pages[nr]);
+	}
+	goto out;
+}
+
+static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
+{
+	lock_buffer(bh);
+	get_bh(bh);
+	bh->b_end_io = end_buffer_read_sync;
+	return submit_bh(READ, bh);
+}
+
+/**
+ * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * This is called for non-resident attributes from ntfs_file_buffered_write()
+ * with i_sem held on the inode (@pages[0]->mapping->host).  There are
+ * @nr_pages pages in @pages which are locked but not kmap()ped.  The source
+ * data has not yet been copied into the @pages.
+ * 
+ * Need to fill any holes with actual clusters, allocate buffers if necessary,
+ * ensure all the buffers are mapped, and bring uptodate any buffers that are
+ * only partially being written to.
+ *
+ * If @nr_pages is greater than one, we are guaranteed that the cluster size is
+ * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * the same cluster and that they are the entirety of that cluster, and that
+ * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
+ *
+ * i_size is not to be modified yet.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
+		unsigned nr_pages, s64 pos, size_t bytes)
+{
+	VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
+	LCN lcn;
+	s64 bh_pos, vcn_len, end, initialized_size;
+	sector_t lcn_block;
+	struct page *page;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni = NULL;
+	ntfs_volume *vol;
+	runlist_element *rl, *rl2;
+	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+	ntfs_attr_search_ctx *ctx = NULL;
+	MFT_RECORD *m = NULL;
+	ATTR_RECORD *a = NULL;
+	unsigned long flags;
+	u32 attr_rec_len = 0;
+	unsigned blocksize, u;
+	int err, mp_size;
+	BOOL rl_write_locked, was_hole, is_retry;
+	unsigned char blocksize_bits;
+	struct {
+		u8 runlist_merged:1;
+		u8 mft_attr_mapped:1;
+		u8 mp_rebuilt:1;
+		u8 attr_switched:1;
+	} status = { 0, 0, 0, 0 };
+
+	BUG_ON(!nr_pages);
+	BUG_ON(!pages);
+	BUG_ON(!*pages);
+	vi = pages[0]->mapping->host;
+	ni = NTFS_I(vi);
+	vol = ni->vol;
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
+			vi->i_ino, ni->type, pages[0]->index, nr_pages,
+			(long long)pos, bytes);
+	blocksize_bits = vi->i_blkbits;
+	blocksize = 1 << blocksize_bits;
+	u = 0;
+	do {
+		struct page *page = pages[u];
+		/*
+		 * create_empty_buffers() will create uptodate/dirty buffers if
+		 * the page is uptodate/dirty.
+		 */
+		if (!page_has_buffers(page)) {
+			create_empty_buffers(page, blocksize, 0);
+			if (unlikely(!page_has_buffers(page)))
+				return -ENOMEM;
+		}
+	} while (++u < nr_pages);
+	rl_write_locked = FALSE;
+	rl = NULL;
+	err = 0;
+	vcn = lcn = -1;
+	vcn_len = 0;
+	lcn_block = -1;
+	was_hole = FALSE;
+	cpos = pos >> vol->cluster_size_bits;
+	end = pos + bytes;
+	cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
+	/*
+	 * Loop over each page and for each page over each buffer.  Use goto to
+	 * reduce indentation.
+	 */
+	u = 0;
+do_next_page:
+	page = pages[u];
+	bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+	bh = head = page_buffers(page);
+	do {
+		VCN cdelta;
+		s64 bh_end;
+		unsigned bh_cofs;
+
+		/* Clear buffer_new on all buffers to reinitialise state. */
+		if (buffer_new(bh))
+			clear_buffer_new(bh);
+		bh_end = bh_pos + blocksize;
+		bh_cpos = bh_pos >> vol->cluster_size_bits;
+		bh_cofs = bh_pos & vol->cluster_size_mask;
+		if (buffer_mapped(bh)) {
+			/*
+			 * The buffer is already mapped.  If it is uptodate,
+			 * ignore it.
+			 */
+			if (buffer_uptodate(bh))
+				continue;
+			/*
+			 * The buffer is not uptodate.  If the page is uptodate
+			 * set the buffer uptodate and otherwise ignore it.
+			 */
+			if (PageUptodate(page)) {
+				set_buffer_uptodate(bh);
+				continue;
+			}
+			/*
+			 * Neither the page nor the buffer are uptodate.  If
+			 * the buffer is only partially being written to, we
+			 * need to read it in before the write, i.e. now.
+			 */
+			if ((bh_pos < pos && bh_end > pos) ||
+					(bh_pos < end && bh_end > end)) {
+				/*
+				 * If the buffer is fully or partially within
+				 * the initialized size, do an actual read.
+				 * Otherwise, simply zero the buffer.
+				 */
+				read_lock_irqsave(&ni->size_lock, flags);
+				initialized_size = ni->initialized_size;
+				read_unlock_irqrestore(&ni->size_lock, flags);
+				if (bh_pos < initialized_size) {
+					ntfs_submit_bh_for_read(bh);
+					*wait_bh++ = bh;
+				} else {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+			}
+			continue;
+		}
+		/* Unmapped buffer.  Need to map it. */
+		bh->b_bdev = vol->sb->s_bdev;
+		/*
+		 * If the current buffer is in the same clusters as the map
+		 * cache, there is no need to check the runlist again.  The
+		 * map cache is made up of @vcn, which is the first cached file
+		 * cluster, @vcn_len which is the number of cached file
+		 * clusters, @lcn is the device cluster corresponding to @vcn,
+		 * and @lcn_block is the block number corresponding to @lcn.
+		 */
+		cdelta = bh_cpos - vcn;
+		if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
+map_buffer_cached:
+			BUG_ON(lcn < 0);
+			bh->b_blocknr = lcn_block +
+					(cdelta << (vol->cluster_size_bits -
+					blocksize_bits)) +
+					(bh_cofs >> blocksize_bits);
+			set_buffer_mapped(bh);
+			/*
+			 * If the page is uptodate so is the buffer.  If the
+			 * buffer is fully outside the write, we ignore it if
+			 * it was already allocated and we mark it dirty so it
+			 * gets written out if we allocated it.  On the other
+			 * hand, if we allocated the buffer but we are not
+			 * marking it dirty we set buffer_new so we can do
+			 * error recovery.
+			 */
+			if (PageUptodate(page)) {
+				if (!buffer_uptodate(bh))
+					set_buffer_uptodate(bh);
+				if (unlikely(was_hole)) {
+					/* We allocated the buffer. */
+					unmap_underlying_metadata(bh->b_bdev,
+							bh->b_blocknr);
+					if (bh_end <= pos || bh_pos >= end)
+						mark_buffer_dirty(bh);
+					else
+						set_buffer_new(bh);
+				}
+				continue;
+			}
+			/* Page is _not_ uptodate. */
+			if (likely(!was_hole)) {
+				/*
+				 * Buffer was already allocated.  If it is not
+				 * uptodate and is only partially being written
+				 * to, we need to read it in before the write,
+				 * i.e. now.
+				 */
+				if (!buffer_uptodate(bh) && bh_pos < end &&
+						bh_end > pos &&
+						(bh_pos < pos ||
+						bh_end > end)) {
+					/*
+					 * If the buffer is fully or partially
+					 * within the initialized size, do an
+					 * actual read.  Otherwise, simply zero
+					 * the buffer.
+					 */
+					read_lock_irqsave(&ni->size_lock,
+							flags);
+					initialized_size = ni->initialized_size;
+					read_unlock_irqrestore(&ni->size_lock,
+							flags);
+					if (bh_pos < initialized_size) {
+						ntfs_submit_bh_for_read(bh);
+						*wait_bh++ = bh;
+					} else {
+						u8 *kaddr = kmap_atomic(page,
+								KM_USER0);
+						memset(kaddr + bh_offset(bh),
+								0, blocksize);
+						kunmap_atomic(kaddr, KM_USER0);
+						flush_dcache_page(page);
+						set_buffer_uptodate(bh);
+					}
+				}
+				continue;
+			}
+			/* We allocated the buffer. */
+			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+			/*
+			 * If the buffer is fully outside the write, zero it,
+			 * set it uptodate, and mark it dirty so it gets
+			 * written out.  If it is partially being written to,
+			 * zero region surrounding the write but leave it to
+			 * commit write to do anything else.  Finally, if the
+			 * buffer is fully being overwritten, do nothing.
+			 */
+			if (bh_end <= pos || bh_pos >= end) {
+				if (!buffer_uptodate(bh)) {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+				mark_buffer_dirty(bh);
+				continue;
+			}
+			set_buffer_new(bh);
+			if (!buffer_uptodate(bh) &&
+					(bh_pos < pos || bh_end > end)) {
+				u8 *kaddr;
+				unsigned pofs;
+					
+				kaddr = kmap_atomic(page, KM_USER0);
+				if (bh_pos < pos) {
+					pofs = bh_pos & ~PAGE_CACHE_MASK;
+					memset(kaddr + pofs, 0, pos - bh_pos);
+				}
+				if (bh_end > end) {
+					pofs = end & ~PAGE_CACHE_MASK;
+					memset(kaddr + pofs, 0, bh_end - end);
+				}
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+			}
+			continue;
+		}
+		/*
+		 * Slow path: this is the first buffer in the cluster.  If it
+		 * is outside allocated size and is not uptodate, zero it and
+		 * set it uptodate.
+		 */
+		read_lock_irqsave(&ni->size_lock, flags);
+		initialized_size = ni->allocated_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (bh_pos > initialized_size) {
+			if (PageUptodate(page)) {
+				if (!buffer_uptodate(bh))
+					set_buffer_uptodate(bh);
+			} else if (!buffer_uptodate(bh)) {
+				u8 *kaddr = kmap_atomic(page, KM_USER0);
+				memset(kaddr + bh_offset(bh), 0, blocksize);
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+				set_buffer_uptodate(bh);
+			}
+			continue;
+		}
+		is_retry = FALSE;
+		if (!rl) {
+			down_read(&ni->runlist.lock);
+retry_remap:
+			rl = ni->runlist.rl;
+		}
+		if (likely(rl != NULL)) {
+			/* Seek to element containing target cluster. */
+			while (rl->length && rl[1].vcn <= bh_cpos)
+				rl++;
+			lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
+			if (likely(lcn >= 0)) {
+				/*
+				 * Successful remap, setup the map cache and
+				 * use that to deal with the buffer.
+				 */
+				was_hole = FALSE;
+				vcn = bh_cpos;
+				vcn_len = rl[1].vcn - vcn;
+				lcn_block = lcn << (vol->cluster_size_bits -
+						blocksize_bits);
+				cdelta = 0;
+				/*
+				 * If the number of remaining clusters touched
+				 * by the write is smaller or equal to the
+				 * number of cached clusters, unlock the
+				 * runlist as the map cache will be used from
+				 * now on.
+				 */
+				if (likely(vcn + vcn_len >= cend)) {
+					if (rl_write_locked) {
+						up_write(&ni->runlist.lock);
+						rl_write_locked = FALSE;
+					} else
+						up_read(&ni->runlist.lock);
+					rl = NULL;
+				}
+				goto map_buffer_cached;
+			}
+		} else
+			lcn = LCN_RL_NOT_MAPPED;
+		/*
+		 * If it is not a hole and not out of bounds, the runlist is
+		 * probably unmapped so try to map it now.
+		 */
+		if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
+			if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
+				/* Attempt to map runlist. */
+				if (!rl_write_locked) {
+					/*
+					 * We need the runlist locked for
+					 * writing, so if it is locked for
+					 * reading relock it now and retry in
+					 * case it changed whilst we dropped
+					 * the lock.
+					 */
+					up_read(&ni->runlist.lock);
+					down_write(&ni->runlist.lock);
+					rl_write_locked = TRUE;
+					goto retry_remap;
+				}
+				err = ntfs_map_runlist_nolock(ni, bh_cpos,
+						NULL);
+				if (likely(!err)) {
+					is_retry = TRUE;
+					goto retry_remap;
+				}
+				/*
+				 * If @vcn is out of bounds, pretend @lcn is
+				 * LCN_ENOENT.  As long as the buffer is out
+				 * of bounds this will work fine.
+				 */
+				if (err == -ENOENT) {
+					lcn = LCN_ENOENT;
+					err = 0;
+					goto rl_not_mapped_enoent;
+				}
+			} else
+				err = -EIO;
+			/* Failed to map the buffer, even after retrying. */
+			bh->b_blocknr = -1;
+			ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
+					"attribute type 0x%x, vcn 0x%llx, "
+					"vcn offset 0x%x, because its "
+					"location on disk could not be "
+					"determined%s (error code %i).",
+					ni->mft_no, ni->type,
+					(unsigned long long)bh_cpos,
+					(unsigned)bh_pos &
+					vol->cluster_size_mask,
+					is_retry ? " even after retrying" : "",
+					err);
+			break;
+		}
+rl_not_mapped_enoent:
+		/*
+		 * The buffer is in a hole or out of bounds.  We need to fill
+		 * the hole, unless the buffer is in a cluster which is not
+		 * touched by the write, in which case we just leave the buffer
+		 * unmapped.  This can only happen when the cluster size is
+		 * less than the page cache size.
+		 */
+		if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+			bh_cend = (bh_end + vol->cluster_size - 1) >>
+					vol->cluster_size_bits;
+			if ((bh_cend <= cpos || bh_cpos >= cend)) {
+				bh->b_blocknr = -1;
+				/*
+				 * If the buffer is uptodate we skip it.  If it
+				 * is not but the page is uptodate, we can set
+				 * the buffer uptodate.  If the page is not
+				 * uptodate, we can clear the buffer and set it
+				 * uptodate.  Whether this is worthwhile is
+				 * debatable and this could be removed.
+				 */
+				if (PageUptodate(page)) {
+					if (!buffer_uptodate(bh))
+						set_buffer_uptodate(bh);
+				} else if (!buffer_uptodate(bh)) {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+				continue;
+			}
+		}
+		/*
+		 * Out of bounds buffer is invalid if it was not really out of
+		 * bounds.
+		 */
+		BUG_ON(lcn != LCN_HOLE);
+		/*
+		 * We need the runlist locked for writing, so if it is locked
+		 * for reading relock it now and retry in case it changed
+		 * whilst we dropped the lock.
+		 */
+		BUG_ON(!rl);
+		if (!rl_write_locked) {
+			up_read(&ni->runlist.lock);
+			down_write(&ni->runlist.lock);
+			rl_write_locked = TRUE;
+			goto retry_remap;
+		}
+		/* Find the previous last allocated cluster. */
+		BUG_ON(rl->lcn != LCN_HOLE);
+		lcn = -1;
+		rl2 = rl;
+		while (--rl2 >= ni->runlist.rl) {
+			if (rl2->lcn >= 0) {
+				lcn = rl2->lcn + rl2->length;
+				break;
+			}
+		}
+		rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
+				FALSE);
+		if (IS_ERR(rl2)) {
+			err = PTR_ERR(rl2);
+			ntfs_debug("Failed to allocate cluster, error code %i.",
+					err);
+			break;
+		}
+		lcn = rl2->lcn;
+		rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+		if (IS_ERR(rl)) {
+			err = PTR_ERR(rl);
+			if (err != -ENOMEM)
+				err = -EIO;
+			if (ntfs_cluster_free_from_rl(vol, rl2)) {
+				ntfs_error(vol->sb, "Failed to release "
+						"allocated cluster in error "
+						"code path.  Run chkdsk to "
+						"recover the lost cluster.");
+				NVolSetErrors(vol);
+			}
+			ntfs_free(rl2);
+			break;
+		}
+		ni->runlist.rl = rl;
+		status.runlist_merged = 1;
+		ntfs_debug("Allocated cluster, lcn 0x%llx.", lcn);
+		/* Map and lock the mft record and get the attribute record. */
+		if (!NInoAttr(ni))
+			base_ni = ni;
+		else
+			base_ni = ni->ext.base_ntfs_ino;
+		m = map_mft_record(base_ni);
+		if (IS_ERR(m)) {
+			err = PTR_ERR(m);
+			break;
+		}
+		ctx = ntfs_attr_get_search_ctx(base_ni, m);
+		if (unlikely(!ctx)) {
+			err = -ENOMEM;
+			unmap_mft_record(base_ni);
+			break;
+		}
+		status.mft_attr_mapped = 1;
+		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
+		if (unlikely(err)) {
+			if (err == -ENOENT)
+				err = -EIO;
+			break;
+		}
+		m = ctx->mrec;
+		a = ctx->attr;
+		/*
+		 * Find the runlist element with which the attribute extent
+		 * starts.  Note, we cannot use the _attr_ version because we
+		 * have mapped the mft record.  That is ok because we know the
+		 * runlist fragment must be mapped already to have ever gotten
+		 * here, so we can just use the _rl_ version.
+		 */
+		vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+		rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
+		BUG_ON(!rl2);
+		BUG_ON(!rl2->length);
+		BUG_ON(rl2->lcn < LCN_HOLE);
+		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		/*
+		 * If @highest_vcn is zero, calculate the real highest_vcn
+		 * (which can really be zero).
+		 */
+		if (!highest_vcn)
+			highest_vcn = (sle64_to_cpu(
+					a->data.non_resident.allocated_size) >>
+					vol->cluster_size_bits) - 1;
+		/*
+		 * Determine the size of the mapping pairs array for the new
+		 * extent, i.e. the old extent with the hole filled.
+		 */
+		mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
+				highest_vcn);
+		if (unlikely(mp_size <= 0)) {
+			if (!(err = mp_size))
+				err = -EIO;
+			ntfs_debug("Failed to get size for mapping pairs "
+					"array, error code %i.", err);
+			break;
+		}
+		/*
+		 * Resize the attribute record to fit the new mapping pairs
+		 * array.
+		 */
+		attr_rec_len = le32_to_cpu(a->length);
+		err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset));
+		if (unlikely(err)) {
+			BUG_ON(err != -ENOSPC);
+			// TODO: Deal with this by using the current attribute
+			// and fill it with as much of the mapping pairs
+			// array as possible.  Then loop over each attribute
+			// extent rewriting the mapping pairs arrays as we go
+			// along and if when we reach the end we have not
+			// enough space, try to resize the last attribute
+			// extent and if even that fails, add a new attribute
+			// extent.
+			// We could also try to resize at each step in the hope
+			// that we will not need to rewrite every single extent.
+			// Note, we may need to decompress some extents to fill
+			// the runlist as we are walking the extents...
+			ntfs_error(vol->sb, "Not enough space in the mft "
+					"record for the extended attribute "
+					"record.  This case is not "
+					"implemented yet.");
+			err = -EOPNOTSUPP;
+			break ;
+		}
+		status.mp_rebuilt = 1;
+		/*
+		 * Generate the mapping pairs array directly into the attribute
+		 * record.
+		 */
+		err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset),
+				mp_size, rl2, vcn, highest_vcn, NULL);
+		if (unlikely(err)) {
+			ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
+					"attribute type 0x%x, because building "
+					"the mapping pairs failed with error "
+					"code %i.", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			err = -EIO;
+			break;
+		}
+		/* Update the highest_vcn but only if it was not set. */
+		if (unlikely(!a->data.non_resident.highest_vcn))
+			a->data.non_resident.highest_vcn =
+					cpu_to_sle64(highest_vcn);
+		/*
+		 * If the attribute is sparse/compressed, update the compressed
+		 * size in the ntfs_inode structure and the attribute record.
+		 */
+		if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
+			/*
+			 * If we are not in the first attribute extent, switch
+			 * to it, but first ensure the changes will make it to
+			 * disk later.
+			 */
+			if (a->data.non_resident.lowest_vcn) {
+				flush_dcache_mft_record_page(ctx->ntfs_ino);
+				mark_mft_record_dirty(ctx->ntfs_ino);
+				ntfs_attr_reinit_search_ctx(ctx);
+				err = ntfs_attr_lookup(ni->type, ni->name,
+						ni->name_len, CASE_SENSITIVE,
+						0, NULL, 0, ctx);
+				if (unlikely(err)) {
+					status.attr_switched = 1;
+					break;
+				}
+				/* @m is not used any more so do not set it. */
+				a = ctx->attr;
+			}
+			write_lock_irqsave(&ni->size_lock, flags);
+			ni->itype.compressed.size += vol->cluster_size;
+			a->data.non_resident.compressed_size =
+					cpu_to_sle64(ni->itype.compressed.size);
+			write_unlock_irqrestore(&ni->size_lock, flags);
+		}
+		/* Ensure the changes make it to disk. */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+		/* Successfully filled the hole. */
+		status.runlist_merged = 0;
+		status.mft_attr_mapped = 0;
+		status.mp_rebuilt = 0;
+		/* Setup the map cache and use that to deal with the buffer. */
+		was_hole = TRUE;
+		vcn = bh_cpos;
+		vcn_len = 1;
+		lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
+		cdelta = 0;
+		/*
+		 * If the number of remaining clusters in the @pages is smaller
+		 * or equal to the number of cached clusters, unlock the
+		 * runlist as the map cache will be used from now on.
+		 */
+		if (likely(vcn + vcn_len >= cend)) {
+			up_write(&ni->runlist.lock);
+			rl_write_locked = FALSE;
+			rl = NULL;
+		}
+		goto map_buffer_cached;
+	} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+	/* If there are no errors, do the next page. */
+	if (likely(!err && ++u < nr_pages))
+		goto do_next_page;
+	/* If there are no errors, release the runlist lock if we took it. */
+	if (likely(!err)) {
+		if (unlikely(rl_write_locked)) {
+			up_write(&ni->runlist.lock);
+			rl_write_locked = FALSE;
+		} else if (unlikely(rl))
+			up_read(&ni->runlist.lock);
+		rl = NULL;
+	}
+	/* If we issued read requests, let them complete. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	while (wait_bh > wait) {
+		bh = *--wait_bh;
+		wait_on_buffer(bh);
+		if (likely(buffer_uptodate(bh))) {
+			page = bh->b_page;
+			bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+					bh_offset(bh);
+			/*
+			 * If the buffer overflows the initialized size, need
+			 * to zero the overflowing region.
+			 */
+			if (unlikely(bh_pos + blocksize > initialized_size)) {
+				u8 *kaddr;
+				int ofs = 0;
+
+				if (likely(bh_pos < initialized_size))
+					ofs = initialized_size - bh_pos;
+				kaddr = kmap_atomic(page, KM_USER0);
+				memset(kaddr + bh_offset(bh) + ofs, 0,
+						blocksize - ofs);
+				kunmap_atomic(kaddr, KM_USER0);
+				flush_dcache_page(page);
+			}
+		} else /* if (unlikely(!buffer_uptodate(bh))) */
+			err = -EIO;
+	}
+	if (likely(!err)) {
+		/* Clear buffer_new on all buffers. */
+		u = 0;
+		do {
+			bh = head = page_buffers(pages[u]);
+			do {
+				if (buffer_new(bh))
+					clear_buffer_new(bh);
+			} while ((bh = bh->b_this_page) != head);
+		} while (++u < nr_pages);
+		ntfs_debug("Done.");
+		return err;
+	}
+	if (status.attr_switched) {
+		/* Get back to the attribute extent we modified. */
+		ntfs_attr_reinit_search_ctx(ctx);
+		if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
+			ntfs_error(vol->sb, "Failed to find required "
+					"attribute extent of attribute in "
+					"error code path.  Run chkdsk to "
+					"recover.");
+			write_lock_irqsave(&ni->size_lock, flags);
+			ni->itype.compressed.size += vol->cluster_size;
+			write_unlock_irqrestore(&ni->size_lock, flags);
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+			/*
+			 * The only thing that is now wrong is the compressed
+			 * size of the base attribute extent which chkdsk
+			 * should be able to fix.
+			 */
+			NVolSetErrors(vol);
+		} else {
+			m = ctx->mrec;
+			a = ctx->attr;
+			status.attr_switched = 0;
+		}
+	}
+	/*
+	 * If the runlist has been modified, need to restore it by punching a
+	 * hole into it and we then need to deallocate the on-disk cluster as
+	 * well.  Note, we only modify the runlist if we are able to generate a
+	 * new mapping pairs array, i.e. only when the mapped attribute extent
+	 * is not switched.
+	 */
+	if (status.runlist_merged && !status.attr_switched) {
+		BUG_ON(!rl_write_locked);
+		/* Make the file cluster we allocated sparse in the runlist. */
+		if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
+			ntfs_error(vol->sb, "Failed to punch hole into "
+					"attribute runlist in error code "
+					"path.  Run chkdsk to recover the "
+					"lost cluster.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			status.runlist_merged = 0;
+			/*
+			 * Deallocate the on-disk cluster we allocated but only
+			 * if we succeeded in punching its vcn out of the
+			 * runlist.
+			 */
+			down_write(&vol->lcnbmp_lock);
+			if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
+				ntfs_error(vol->sb, "Failed to release "
+						"allocated cluster in error "
+						"code path.  Run chkdsk to "
+						"recover the lost cluster.");
+				NVolSetErrors(vol);
+			}
+			up_write(&vol->lcnbmp_lock);
+		}
+	}
+	/*
+	 * Resize the attribute record to its old size and rebuild the mapping
+	 * pairs array.  Note, we only can do this if the runlist has been
+	 * restored to its old state which also implies that the mapped
+	 * attribute extent is not switched.
+	 */
+	if (status.mp_rebuilt && !status.runlist_merged) {
+		if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
+			ntfs_error(vol->sb, "Failed to restore attribute "
+					"record in error code path.  Run "
+					"chkdsk to recover.");
+			make_bad_inode(vi);
+			make_bad_inode(VFS_I(base_ni));
+			NVolSetErrors(vol);
+		} else /* if (success) */ {
+			if (ntfs_mapping_pairs_build(vol, (u8*)a +
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), attr_rec_len -
+					le16_to_cpu(a->data.non_resident.
+					mapping_pairs_offset), ni->runlist.rl,
+					vcn, highest_vcn, NULL)) {
+				ntfs_error(vol->sb, "Failed to restore "
+						"mapping pairs array in error "
+						"code path.  Run chkdsk to "
+						"recover.");
+				make_bad_inode(vi);
+				make_bad_inode(VFS_I(base_ni));
+				NVolSetErrors(vol);
+			}
+			flush_dcache_mft_record_page(ctx->ntfs_ino);
+			mark_mft_record_dirty(ctx->ntfs_ino);
+		}
+	}
+	/* Release the mft record and the attribute. */
+	if (status.mft_attr_mapped) {
+		ntfs_attr_put_search_ctx(ctx);
+		unmap_mft_record(base_ni);
+	}
+	/* Release the runlist lock. */
+	if (rl_write_locked)
+		up_write(&ni->runlist.lock);
+	else if (rl)
+		up_read(&ni->runlist.lock);
+	/*
+	 * Zero out any newly allocated blocks to avoid exposing stale data.
+	 * If BH_New is set, we know that the block was newly allocated above
+	 * and that it has not been fully zeroed and marked dirty yet.
+	 */
+	nr_pages = u;
+	u = 0;
+	end = bh_cpos << vol->cluster_size_bits;
+	do {
+		page = pages[u];
+		bh = head = page_buffers(page);
+		do {
+			if (u == nr_pages &&
+					((s64)page->index << PAGE_CACHE_SHIFT) +
+					bh_offset(bh) >= end)
+				break;
+			if (!buffer_new(bh))
+				continue;
+			clear_buffer_new(bh);
+			if (!buffer_uptodate(bh)) {
+				if (PageUptodate(page))
+					set_buffer_uptodate(bh);
+				else {
+					u8 *kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr + bh_offset(bh), 0,
+							blocksize);
+					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(page);
+					set_buffer_uptodate(bh);
+				}
+			}
+			mark_buffer_dirty(bh);
+		} while ((bh = bh->b_this_page) != head);
+	} while (++u <= nr_pages);
+	ntfs_error(vol->sb, "Failed.  Returning error code %i.", err);
+	return err;
+}
+
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were sucessfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static inline size_t ntfs_copy_from_user(struct page **pages,
+		unsigned nr_pages, unsigned ofs, const char __user *buf,
+		size_t bytes)
+{
+	struct page **last_page = pages + nr_pages;
+	char *kaddr;
+	size_t total = 0;
+	unsigned len;
+	int left;
+
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		left = __copy_from_user_inatomic(kaddr + ofs, buf, len);
+		kunmap_atomic(kaddr, KM_USER0);
+		if (unlikely(left)) {
+			/* Do it the slow way. */
+			kaddr = kmap(*pages);
+			left = __copy_from_user(kaddr + ofs, buf, len);
+			kunmap(*pages);
+			if (unlikely(left))
+				goto err_out;
+		}
+		total += len;
+		bytes -= len;
+		if (!bytes)
+			break;
+		buf += len;
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err_out:
+	total += len - left;
+	/* Zero the rest of the target like __copy_from_user(). */
+	while (++pages < last_page) {
+		bytes -= len;
+		if (!bytes)
+			break;
+		len = PAGE_CACHE_SIZE;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		memset(kaddr, 0, len);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	goto out;
+}
+
+static size_t __ntfs_copy_from_user_iovec(char *vaddr,
+		const struct iovec *iov, size_t iov_ofs, size_t bytes)
+{
+	size_t total = 0;
+
+	while (1) {
+		const char __user *buf = iov->iov_base + iov_ofs;
+		unsigned len;
+		size_t left;
+
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		left = __copy_from_user_inatomic(vaddr, buf, len);
+		total += len;
+		bytes -= len;
+		vaddr += len;
+		if (unlikely(left)) {
+			/*
+			 * Zero the rest of the target like __copy_from_user().
+			 */
+			memset(vaddr, 0, bytes);
+			total -= left;
+			break;
+		}
+		if (!bytes)
+			break;
+		iov++;
+		iov_ofs = 0;
+	}
+	return total;
+}
+
+static inline void ntfs_set_next_iovec(const struct iovec **iovp,
+		size_t *iov_ofsp, size_t bytes)
+{
+	const struct iovec *iov = *iovp;
+	size_t iov_ofs = *iov_ofsp;
+
+	while (bytes) {
+		unsigned len;
+
+		len = iov->iov_len - iov_ofs;
+		if (len > bytes)
+			len = bytes;
+		bytes -= len;
+		iov_ofs += len;
+		if (iov->iov_len == iov_ofs) {
+			iov++;
+			iov_ofs = 0;
+		}
+	}
+	*iovp = iov;
+	*iov_ofsp = iov_ofs;
+}
+
+/*
+ * This has the same side-effects and return value as ntfs_copy_from_user().
+ * The difference is that on a fault we need to memset the remainder of the
+ * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
+ * single-segment behaviour.
+ *
+ * We call the same helper (__ntfs_copy_from_user_iovec()) both when atomic and
+ * when not atomic.  This is ok because __ntfs_copy_from_user_iovec() calls
+ * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
+ * fact, the only difference between __copy_from_user_inatomic() and
+ * __copy_from_user() is that the latter calls might_sleep().  And on many
+ * architectures __copy_from_user_inatomic() is just defined to
+ * __copy_from_user() so it makes no difference at all on those architectures.
+ */
+static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
+		unsigned nr_pages, unsigned ofs, const struct iovec **iov,
+		size_t *iov_ofs, size_t bytes)
+{
+	struct page **last_page = pages + nr_pages;
+	char *kaddr;
+	size_t copied, len, total = 0;
+
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+				*iov, *iov_ofs, len);
+		kunmap_atomic(kaddr, KM_USER0);
+		if (unlikely(copied != len)) {
+			/* Do it the slow way. */
+			kaddr = kmap(*pages);
+			copied = __ntfs_copy_from_user_iovec(kaddr + ofs,
+					*iov, *iov_ofs, len);
+			kunmap(*pages);
+			if (unlikely(copied != len))
+				goto err_out;
+		}
+		total += len;
+		bytes -= len;
+		if (!bytes)
+			break;
+		ntfs_set_next_iovec(iov, iov_ofs, len);
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err_out:
+	total += copied;
+	/* Zero the rest of the target like __copy_from_user(). */
+	while (++pages < last_page) {
+		bytes -= len;
+		if (!bytes)
+			break;
+		len = PAGE_CACHE_SIZE;
+		if (len > bytes)
+			len = bytes;
+		kaddr = kmap_atomic(*pages, KM_USER0);
+		memset(kaddr, 0, len);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	goto out;
+}
+
+static inline void ntfs_flush_dcache_pages(struct page **pages,
+		unsigned nr_pages)
+{
+	BUG_ON(!nr_pages);
+	do {
+		/*
+		 * Warning: Do not do the decrement at the same time as the
+		 * call because flush_dcache_page() is a NULL macro on i386
+		 * and hence the decrement never happens.
+		 */
+		flush_dcache_page(pages[nr_pages]);
+	} while (--nr_pages > 0);
+}
+
+/**
+ * ntfs_commit_pages_after_non_resident_write - commit the received data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * See description of ntfs_commit_pages_after_write(), below.
+ */
+static inline int ntfs_commit_pages_after_non_resident_write(
+		struct page **pages, const unsigned nr_pages,
+		s64 pos, size_t bytes)
+{
+	s64 end, initialized_size;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni;
+	struct buffer_head *bh, *head;
+	ntfs_attr_search_ctx *ctx;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	unsigned long flags;
+	unsigned blocksize, u;
+	int err;
+
+	vi = pages[0]->mapping->host;
+	ni = NTFS_I(vi);
+	blocksize = 1 << vi->i_blkbits;
+	end = pos + bytes;
+	u = 0;
+	do {
+		s64 bh_pos;
+		struct page *page;
+		BOOL partial;
+
+		page = pages[u];
+		bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+		bh = head = page_buffers(page);
+		partial = FALSE;
+		do {
+			s64 bh_end;
+
+			bh_end = bh_pos + blocksize;
+			if (bh_end <= pos || bh_pos >= end) {
+				if (!buffer_uptodate(bh))
+					partial = TRUE;
+			} else {
+				set_buffer_uptodate(bh);
+				mark_buffer_dirty(bh);
+			}
+		} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+		/*
+		 * If all buffers are now uptodate but the page is not, set the
+		 * page uptodate.
+		 */
+		if (!partial && !PageUptodate(page))
+			SetPageUptodate(page);
+	} while (++u < nr_pages);
+	/*
+	 * Finally, if we do not need to update initialized_size or i_size we
+	 * are finished.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end <= initialized_size) {
+		ntfs_debug("Done.");
+		return 0;
+	}
+	/*
+	 * Update initialized_size/i_size as appropriate, both in the inode and
+	 * the mft record.
+	 */
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	/* Map, pin, and lock the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	BUG_ON(!NInoNonResident(ni));
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	a = ctx->attr;
+	BUG_ON(!a->non_resident);
+	write_lock_irqsave(&ni->size_lock, flags);
+	BUG_ON(end > ni->allocated_size);
+	ni->initialized_size = end;
+	a->data.non_resident.initialized_size = cpu_to_sle64(end);
+	if (end > i_size_read(vi)) {
+		i_size_write(vi, end);
+		a->data.non_resident.data_size =
+				a->data.non_resident.initialized_size;
+	}
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	ntfs_debug("Done.");
+	return 0;
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
+			"code %i).", err);
+	if (err != -ENOMEM) {
+		NVolSetErrors(ni->vol);
+		make_bad_inode(VFS_I(base_ni));
+		make_bad_inode(vi);
+	}
+	return err;
+}
+
+/**
+ * ntfs_commit_pages_after_write - commit the received data
+ * @pages:	array of destination pages
+ * @nr_pages:	number of pages in @pages
+ * @pos:	byte position in file at which the write begins
+ * @bytes:	number of bytes to be written
+ *
+ * This is called from ntfs_file_buffered_write() with i_sem held on the inode
+ * (@pages[0]->mapping->host).  There are @nr_pages pages in @pages which are
+ * locked but not kmap()ped.  The source data has already been copied into the
+ * @page.  ntfs_prepare_pages_for_non_resident_write() has been called before
+ * the data was copied (for non-resident attributes only) and it returned
+ * success.
+ *
+ * Need to set uptodate and mark dirty all buffers within the boundary of the
+ * write.  If all buffers in a page are uptodate we set the page uptodate, too.
+ *
+ * Setting the buffers dirty ensures that they get written out later when
+ * ntfs_writepage() is invoked by the VM.
+ *
+ * Finally, we need to update i_size and initialized_size as appropriate both
+ * in the inode and the mft record.
+ *
+ * This is modelled after fs/buffer.c::generic_commit_write(), which marks
+ * buffers uptodate and dirty, sets the page uptodate if all buffers in the
+ * page are uptodate, and updates i_size if the end of io is beyond i_size.  In
+ * that case, it also marks the inode dirty.
+ *
+ * If things have gone as outlined in
+ * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
+ * content modifications here for non-resident attributes.  For resident
+ * attributes we need to do the uptodate bringing here which we combine with
+ * the copying into the mft record which means we save one atomic kmap.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_commit_pages_after_write(struct page **pages,
+		const unsigned nr_pages, s64 pos, size_t bytes)
+{
+	s64 end, initialized_size;
+	loff_t i_size;
+	struct inode *vi;
+	ntfs_inode *ni, *base_ni;
+	struct page *page;
+	ntfs_attr_search_ctx *ctx;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	char *kattr, *kaddr;
+	unsigned long flags;
+	u32 attr_len;
+	int err;
+
+	BUG_ON(!nr_pages);
+	BUG_ON(!pages);
+	page = pages[0];
+	BUG_ON(!page);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
+			vi->i_ino, ni->type, page->index, nr_pages,
+			(long long)pos, bytes);
+	if (NInoNonResident(ni))
+		return ntfs_commit_pages_after_non_resident_write(pages,
+				nr_pages, pos, bytes);
+	BUG_ON(nr_pages > 1);
+	/*
+	 * Attribute is resident, implying it is not compressed, encrypted, or
+	 * sparse.
+	 */
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	BUG_ON(NInoNonResident(ni));
+	/* Map, pin, and lock the mft record. */
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	a = ctx->attr;
+	BUG_ON(a->non_resident);
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(a->data.resident.value_length);
+	i_size = i_size_read(vi);
+	BUG_ON(attr_len != i_size);
+	BUG_ON(pos > attr_len);
+	end = pos + bytes;
+	BUG_ON(end > le32_to_cpu(a->length) -
+			le16_to_cpu(a->data.resident.value_offset));
+	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+	kaddr = kmap_atomic(page, KM_USER0);
+	/* Copy the received data from the page to the mft record. */
+	memcpy(kattr + pos, kaddr + pos, bytes);
+	/* Update the attribute length if necessary. */
+	if (end > attr_len) {
+		attr_len = end;
+		a->data.resident.value_length = cpu_to_le32(attr_len);
+	}
+	/*
+	 * If the page is not uptodate, bring the out of bounds area(s)
+	 * uptodate by copying data from the mft record to the page.
+	 */
+	if (!PageUptodate(page)) {
+		if (pos > 0)
+			memcpy(kaddr, kattr, pos);
+		if (end < attr_len)
+			memcpy(kaddr + end, kattr + end, attr_len - end);
+		/* Zero the region outside the end of the attribute value. */
+		memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	}
+	kunmap_atomic(kaddr, KM_USER0);
+	/* Update initialized_size/i_size if necessary. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	initialized_size = ni->initialized_size;
+	BUG_ON(end > ni->allocated_size);
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	BUG_ON(initialized_size != i_size);
+	if (end > initialized_size) {
+		unsigned long flags;
+
+		write_lock_irqsave(&ni->size_lock, flags);
+		ni->initialized_size = end;
+		i_size_write(vi, end);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+	}
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	ntfs_debug("Done.");
+	return 0;
+err_out:
+	if (err == -ENOMEM) {
+		ntfs_warning(vi->i_sb, "Error allocating memory required to "
+				"commit the write.");
+		if (PageUptodate(page)) {
+			ntfs_warning(vi->i_sb, "Page is uptodate, setting "
+					"dirty so the write will be retried "
+					"later on by the VM.");
+			/*
+			 * Put the page on mapping->dirty_pages, but leave its
+			 * buffers' dirty state as-is.
+			 */
+			__set_page_dirty_nobuffers(page);
+			err = 0;
+		} else
+			ntfs_error(vi->i_sb, "Page is not uptodate.  Written "
+					"data has been lost.");
+	} else {
+		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
+				"with error %i.", err);
+		NVolSetErrors(ni->vol);
+		make_bad_inode(VFS_I(base_ni));
+		make_bad_inode(vi);
+	}
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	return err;
+}
+
+/**
+ * ntfs_file_buffered_write -
+ *
+ * Locking: The vfs is holding ->i_sem on the inode.
+ */
+static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs,
+		loff_t pos, loff_t *ppos, size_t count)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *vi = mapping->host;
+	ntfs_inode *ni = NTFS_I(vi);
+	ntfs_volume *vol = ni->vol;
+	struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
+	struct page *cached_page = NULL;
+	char __user *buf = NULL;
+	s64 end, ll;
+	VCN last_vcn;
+	LCN lcn;
+	unsigned long flags;
+	size_t bytes, iov_ofs = 0;	/* Offset in the current iovec. */
+	ssize_t status, written;
+	unsigned nr_pages;
+	int err;
+	struct pagevec lru_pvec;
+
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+			"pos 0x%llx, count 0x%lx.",
+			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)pos, (unsigned long)count);
+	if (unlikely(!count))
+		return 0;
+	BUG_ON(NInoMstProtected(ni));
+	/*
+	 * If the attribute is not an index root and it is encrypted or
+	 * compressed, we cannot write to it yet.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
+	 */
+	if (ni->type != AT_INDEX_ALLOCATION) {
+		/* If file is encrypted, deny access, just like NT4. */
+		if (NInoEncrypted(ni)) {
+			/*
+			 * Reminder for later: Encrypted files are _always_
+			 * non-resident so that the content can always be
+			 * encrypted.
+			 */
+			ntfs_debug("Denying write access to encrypted file.");
+			return -EACCES;
+		}
+		if (NInoCompressed(ni)) {
+			/* Only unnamed $DATA attribute can be compressed. */
+			BUG_ON(ni->type != AT_DATA);
+			BUG_ON(ni->name_len);
+			/*
+			 * Reminder for later: If resident, the data is not
+			 * actually compressed.  Only on the switch to non-
+			 * resident does compression kick in.  This is in
+			 * contrast to encrypted files (see above).
+			 */
+			ntfs_error(vi->i_sb, "Writing to compressed files is "
+					"not implemented yet.  Sorry.");
+			return -EOPNOTSUPP;
+		}
+	}
+	/*
+	 * If a previous ntfs_truncate() failed, repeat it and abort if it
+	 * fails again.
+	 */
+	if (unlikely(NInoTruncateFailed(ni))) {
+		down_write(&vi->i_alloc_sem);
+		err = ntfs_truncate(vi);
+		up_write(&vi->i_alloc_sem);
+		if (err || NInoTruncateFailed(ni)) {
+			if (!err)
+				err = -EIO;
+			ntfs_error(vol->sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"ntfs_truncate() failed (error code "
+					"%i).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			return err;
+		}
+	}
+	/* The first byte after the write. */
+	end = pos + count;
+	/*
+	 * If the write goes beyond the allocated size, extend the allocation
+	 * to cover the whole of the write, rounded up to the nearest cluster.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end > ll) {
+		/* Extend the allocation without changing the data size. */
+		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+		if (likely(ll >= 0)) {
+			BUG_ON(pos >= ll);
+			/* If the extension was partial truncate the write. */
+			if (end > ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"the allocation was only "
+						"partially extended.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+				end = ll;
+				count = ll - pos;
+			}
+		} else {
+			err = ll;
+			read_lock_irqsave(&ni->size_lock, flags);
+			ll = ni->allocated_size;
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			/* Perform a partial write if possible or fail. */
+			if (pos < ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"extending the allocation "
+						"failed (error code %i).",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type), err);
+				end = ll;
+				count = ll - pos;
+			} else {
+				ntfs_error(vol->sb, "Cannot perform write to "
+						"inode 0x%lx, attribute type "
+						"0x%x, because extending the "
+						"allocation failed (error "
+						"code %i).", vi->i_ino,
+						(unsigned)
+						le32_to_cpu(ni->type), err);
+				return err;
+			}
+		}
+	}
+	pagevec_init(&lru_pvec, 0);
+	written = 0;
+	/*
+	 * If the write starts beyond the initialized size, extend it up to the
+	 * beginning of the write and initialize all non-sparse space between
+	 * the old initialized size and the new one.  This automatically also
+	 * increments the vfs inode->i_size to keep it above or equal to the
+	 * initialized_size.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (pos > ll) {
+		err = ntfs_attr_extend_initialized(ni, pos, &cached_page,
+				&lru_pvec);
+		if (err < 0) {
+			ntfs_error(vol->sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"extending the initialized size "
+					"failed (error code %i).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type), err);
+			status = err;
+			goto err_out;
+		}
+	}
+	/*
+	 * Determine the number of pages per cluster for non-resident
+	 * attributes.
+	 */
+	nr_pages = 1;
+	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
+		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+	/* Finally, perform the actual write. */
+	last_vcn = -1;
+	if (likely(nr_segs == 1))
+		buf = iov->iov_base;
+	do {
+		VCN vcn;
+		pgoff_t idx, start_idx;
+		unsigned ofs, do_pages, u;
+		size_t copied;
+
+		start_idx = idx = pos >> PAGE_CACHE_SHIFT;
+		ofs = pos & ~PAGE_CACHE_MASK;
+		bytes = PAGE_CACHE_SIZE - ofs;
+		do_pages = 1;
+		if (nr_pages > 1) {
+			vcn = pos >> vol->cluster_size_bits;
+			if (vcn != last_vcn) {
+				last_vcn = vcn;
+				/*
+				 * Get the lcn of the vcn the write is in.  If
+				 * it is a hole, need to lock down all pages in
+				 * the cluster.
+				 */
+				down_read(&ni->runlist.lock);
+				lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
+						vol->cluster_size_bits, FALSE);
+				up_read(&ni->runlist.lock);
+				if (unlikely(lcn < LCN_HOLE)) {
+					status = -EIO;
+					if (lcn == LCN_ENOMEM)
+						status = -ENOMEM;
+					else
+						ntfs_error(vol->sb, "Cannot "
+							"perform write to "
+							"inode 0x%lx, "
+							"attribute type 0x%x, "
+							"because the attribute "
+							"is corrupt.",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type));
+					break;
+				}
+				if (lcn == LCN_HOLE) {
+					start_idx = (pos & ~(s64)
+							vol->cluster_size_mask)
+							>> PAGE_CACHE_SHIFT;
+					bytes = vol->cluster_size - (pos &
+							vol->cluster_size_mask);
+					do_pages = nr_pages;
+				}
+			}
+		}
+		if (bytes > count)
+			bytes = count;
+		/*
+		 * Bring in the user page(s) that we will copy from _first_.
+		 * Otherwise there is a nasty deadlock on copying from the same
+		 * page(s) as we are writing to, without it/them being marked
+		 * up-to-date.  Note, at present there is nothing to stop the
+		 * pages being swapped out between us bringing them into memory
+		 * and doing the actual copying.
+		 */
+		if (likely(nr_segs == 1))
+			ntfs_fault_in_pages_readable(buf, bytes);
+		else
+			ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+		/* Get and lock @do_pages starting at index @start_idx. */
+		status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
+				pages, &cached_page, &lru_pvec);
+		if (unlikely(status))
+			break;
+		/*
+		 * For non-resident attributes, we need to fill any holes with
+		 * actual clusters and ensure all bufferes are mapped.  We also
+		 * need to bring uptodate any buffers that are only partially
+		 * being written to.
+		 */
+		if (NInoNonResident(ni)) {
+			status = ntfs_prepare_pages_for_non_resident_write(
+					pages, do_pages, pos, bytes);
+			if (unlikely(status)) {
+				loff_t i_size;
+
+				do {
+					unlock_page(pages[--do_pages]);
+					page_cache_release(pages[do_pages]);
+				} while (do_pages);
+				/*
+				 * The write preparation may have instantiated
+				 * allocated space outside i_size.  Trim this
+				 * off again.  We can ignore any errors in this
+				 * case as we will just be waisting a bit of
+				 * allocated space, which is not a disaster.
+				 */
+				i_size = i_size_read(vi);
+				if (pos + bytes > i_size)
+					vmtruncate(vi, i_size);
+				break;
+			}
+		}
+		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+		if (likely(nr_segs == 1)) {
+			copied = ntfs_copy_from_user(pages + u, do_pages - u,
+					ofs, buf, bytes);
+			buf += copied;
+		} else
+			copied = ntfs_copy_from_user_iovec(pages + u,
+					do_pages - u, ofs, &iov, &iov_ofs,
+					bytes);
+		ntfs_flush_dcache_pages(pages + u, do_pages - u);
+		status = ntfs_commit_pages_after_write(pages, do_pages, pos,
+				bytes);
+		if (likely(!status)) {
+			written += copied;
+			count -= copied;
+			pos += copied;
+			if (unlikely(copied != bytes))
+				status = -EFAULT;
+		}
+		do {
+			unlock_page(pages[--do_pages]);
+			mark_page_accessed(pages[do_pages]);
+			page_cache_release(pages[do_pages]);
+		} while (do_pages);
+		if (unlikely(status))
+			break;
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+	} while (count);
+err_out:
+	*ppos = pos;
+	if (cached_page)
+		page_cache_release(cached_page);
+	/* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
+	if (likely(!status)) {
+		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
+			if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
+				status = generic_osync_inode(vi, mapping,
+						OSYNC_METADATA|OSYNC_DATA);
+		}
+  	}
+	pagevec_lru_add(&lru_pvec);
+	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
+			written ? "written" : "status", (unsigned long)written,
+			(long)status);
+	return written ? written : status;
+}
+
+/**
+ * ntfs_file_aio_write_nolock -
+ */
+static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
+		const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos;
+	unsigned long seg;
+	size_t count;		/* after file limit checks */
+	ssize_t written, err;
+
+	count = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		const struct iovec *iv = &iov[seg];
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		count += iv->iov_len;
+		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+			continue;
+		if (!seg)
+			return -EFAULT;
+		nr_segs = seg;
+		count -= iv->iov_len;	/* This segment is no good */
+		break;
+	}
+	pos = *ppos;
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+	/* We can write back this queue in page reclaim. */
+	current->backing_dev_info = mapping->backing_dev_info;
+	written = 0;
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+	if (!count)
+		goto out;
+	err = remove_suid(file->f_dentry);
+	if (err)
+		goto out;
+	inode_update_time(inode, 1);
+	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
+			count);
+out:
+	current->backing_dev_info = NULL;
+	return written ? written : err;
+}
+
+/**
+ * ntfs_file_aio_write -
+ */
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
+		size_t count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	struct iovec local_iov = { .iov_base = (void __user *)buf,
+				   .iov_len = count };
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	down(&inode->i_sem);
+	ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+	up(&inode->i_sem);
+	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+		int err = sync_page_range(inode, mapping, pos, ret);
+		if (err < 0)
+			ret = err;
+	}
+	return ret;
+}
+
+/**
+ * ntfs_file_writev -
+ *
+ * Basically the same as generic_file_writev() except that it ends up calling
+ * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock().
+ */
+static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
+		unsigned long nr_segs, loff_t *ppos)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	down(&inode->i_sem);
+	init_sync_kiocb(&kiocb, file);
+	ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+	up(&inode->i_sem);
+	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+		int err = sync_page_range(inode, mapping, *ppos - ret, ret);
+		if (err < 0)
+			ret = err;
+	}
+	return ret;
+}
+
+/**
+ * ntfs_file_write - simple wrapper for ntfs_file_writev()
+ */
+static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct iovec local_iov = { .iov_base = (void __user *)buf,
+				   .iov_len = count };
+
+	return ntfs_file_writev(file, &local_iov, 1, ppos);
+}
+
+/**
  * ntfs_file_fsync - sync a file to disk
  * @filp:	file to be synced
  * @dentry:	dentry describing the file to sync
@@ -94,6 +2286,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 	if (!datasync || !NInoNonResident(NTFS_I(vi)))
 		ret = ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
+	/*
+	 * NOTE: If we were to use mapping->private_list (see ext2 and
+	 * fs/buffer.c) for dirty blocks then we could optimize the below to be
+	 * sync_mapping_buffers(vi->i_mapping).
+	 */
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
 		ret = err;
@@ -108,39 +2305,39 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 #endif /* NTFS_RW */
 
 struct file_operations ntfs_file_ops = {
-	.llseek		= generic_file_llseek,	  /* Seek inside file. */
-	.read		= generic_file_read,	  /* Read from file. */
-	.aio_read	= generic_file_aio_read,  /* Async read from file. */
-	.readv		= generic_file_readv,	  /* Read from file. */
+	.llseek		= generic_file_llseek,	 /* Seek inside file. */
+	.read		= generic_file_read,	 /* Read from file. */
+	.aio_read	= generic_file_aio_read, /* Async read from file. */
+	.readv		= generic_file_readv,	 /* Read from file. */
 #ifdef NTFS_RW
-	.write		= generic_file_write,	  /* Write to file. */
-	.aio_write	= generic_file_aio_write, /* Async write to file. */
-	.writev		= generic_file_writev,	  /* Write to file. */
-	/*.release	= ,*/			  /* Last file is closed.  See
-						     fs/ext2/file.c::
-						     ext2_release_file() for
-						     how to use this to discard
-						     preallocated space for
-						     write opened files. */
-	.fsync		= ntfs_file_fsync,	  /* Sync a file to disk. */
-	/*.aio_fsync	= ,*/			  /* Sync all outstanding async
-						     i/o operations on a
-						     kiocb. */
+	.write		= ntfs_file_write,	 /* Write to file. */
+	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */
+	.writev		= ntfs_file_writev,	 /* Write to file. */
+	/*.release	= ,*/			 /* Last file is closed.  See
+						    fs/ext2/file.c::
+						    ext2_release_file() for
+						    how to use this to discard
+						    preallocated space for
+						    write opened files. */
+	.fsync		= ntfs_file_fsync,	 /* Sync a file to disk. */
+	/*.aio_fsync	= ,*/			 /* Sync all outstanding async
+						    i/o operations on a
+						    kiocb. */
 #endif /* NTFS_RW */
-	/*.ioctl	= ,*/			  /* Perform function on the
-						     mounted filesystem. */
-	.mmap		= generic_file_mmap,	  /* Mmap file. */
-	.open		= ntfs_file_open,	  /* Open file. */
-	.sendfile	= generic_file_sendfile,  /* Zero-copy data send with
-						     the data source being on
-						     the ntfs partition.  We
-						     do not need to care about
-						     the data destination. */
-	/*.sendpage	= ,*/			  /* Zero-copy data send with
-						     the data destination being
-						     on the ntfs partition.  We
-						     do not need to care about
-						     the data source. */
+	/*.ioctl	= ,*/			 /* Perform function on the
+						    mounted filesystem. */
+	.mmap		= generic_file_mmap,	 /* Mmap file. */
+	.open		= ntfs_file_open,	 /* Open file. */
+	.sendfile	= generic_file_sendfile, /* Zero-copy data send with
+						    the data source being on
+						    the ntfs partition.  We do
+						    not need to care about the
+						    data destination. */
+	/*.sendpage	= ,*/			 /* Zero-copy data send with
+						    the data destination being
+						    on the ntfs partition.  We
+						    do not need to care about
+						    the data source. */
 };
 
 struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 11fd5307d78..8f2d5727546 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len,
 				&ie->key, key_len)) {
 ir_done:
 			ictx->is_in_root = TRUE;
+			ictx->ir = ir;
 			ictx->actx = actx;
 			ictx->base_ni = base_ni;
 			ictx->ia = NULL;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 886214a77f9..b24f4c4b2c5 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -30,6 +30,7 @@
 #include "debug.h"
 #include "inode.h"
 #include "attrib.h"
+#include "lcnalloc.h"
 #include "malloc.h"
 #include "mft.h"
 #include "time.h"
@@ -1013,41 +1014,50 @@ skip_large_dir_stuff:
 		}
 		a = ctx->attr;
 		/* Setup the state. */
-		if (a->non_resident) {
-			NInoSetNonResident(ni);
-			if (a->flags & (ATTR_COMPRESSION_MASK |
-					ATTR_IS_SPARSE)) {
-				if (a->flags & ATTR_COMPRESSION_MASK) {
-					NInoSetCompressed(ni);
-					if (vol->cluster_size > 4096) {
-						ntfs_error(vi->i_sb, "Found "
+		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+			if (a->flags & ATTR_COMPRESSION_MASK) {
+				NInoSetCompressed(ni);
+				if (vol->cluster_size > 4096) {
+					ntfs_error(vi->i_sb, "Found "
 							"compressed data but "
 							"compression is "
 							"disabled due to "
 							"cluster size (%i) > "
 							"4kiB.",
 							vol->cluster_size);
-						goto unm_err_out;
-					}
-					if ((a->flags & ATTR_COMPRESSION_MASK)
-							!= ATTR_IS_COMPRESSED) {
-						ntfs_error(vi->i_sb, "Found "
-							"unknown compression "
-							"method or corrupt "
-							"file.");
-						goto unm_err_out;
-					}
+					goto unm_err_out;
 				}
-				if (a->flags & ATTR_IS_SPARSE)
-					NInoSetSparse(ni);
+				if ((a->flags & ATTR_COMPRESSION_MASK)
+						!= ATTR_IS_COMPRESSED) {
+					ntfs_error(vi->i_sb, "Found unknown "
+							"compression method "
+							"or corrupt file.");
+					goto unm_err_out;
+				}
+			}
+			if (a->flags & ATTR_IS_SPARSE)
+				NInoSetSparse(ni);
+		}
+		if (a->flags & ATTR_IS_ENCRYPTED) {
+			if (NInoCompressed(ni)) {
+				ntfs_error(vi->i_sb, "Found encrypted and "
+						"compressed data.");
+				goto unm_err_out;
+			}
+			NInoSetEncrypted(ni);
+		}
+		if (a->non_resident) {
+			NInoSetNonResident(ni);
+			if (NInoCompressed(ni) || NInoSparse(ni)) {
 				if (a->data.non_resident.compression_unit !=
 						4) {
 					ntfs_error(vi->i_sb, "Found "
-						"nonstandard compression unit "
-						"(%u instead of 4).  Cannot "
-						"handle this.",
-						a->data.non_resident.
-						compression_unit);
+							"nonstandard "
+							"compression unit (%u "
+							"instead of 4).  "
+							"Cannot handle this.",
+							a->data.non_resident.
+							compression_unit);
 					err = -EOPNOTSUPP;
 					goto unm_err_out;
 				}
@@ -1065,14 +1075,6 @@ skip_large_dir_stuff:
 						a->data.non_resident.
 						compressed_size);
 			}
-			if (a->flags & ATTR_IS_ENCRYPTED) {
-				if (a->flags & ATTR_COMPRESSION_MASK) {
-					ntfs_error(vi->i_sb, "Found encrypted "
-							"and compressed data.");
-					goto unm_err_out;
-				}
-				NInoSetEncrypted(ni);
-			}
 			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(vi->i_sb, "First extent of $DATA "
 						"attribute has non zero "
@@ -1165,6 +1167,8 @@ err_out:
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
  * have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
  */
 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 {
@@ -1212,6 +1216,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 	if (unlikely(err))
 		goto unm_err_out;
 	a = ctx->attr;
+	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+		if (a->flags & ATTR_COMPRESSION_MASK) {
+			NInoSetCompressed(ni);
+			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
+					ni->name_len)) {
+				ntfs_error(vi->i_sb, "Found compressed "
+						"non-data or named data "
+						"attribute.  Please report "
+						"you saw this message to "
+						"linux-ntfs-dev@lists."
+						"sourceforge.net");
+				goto unm_err_out;
+			}
+			if (vol->cluster_size > 4096) {
+				ntfs_error(vi->i_sb, "Found compressed "
+						"attribute but compression is "
+						"disabled due to cluster size "
+						"(%i) > 4kiB.",
+						vol->cluster_size);
+				goto unm_err_out;
+			}
+			if ((a->flags & ATTR_COMPRESSION_MASK) !=
+					ATTR_IS_COMPRESSED) {
+				ntfs_error(vi->i_sb, "Found unknown "
+						"compression method.");
+				goto unm_err_out;
+			}
+		}
+		/*
+		 * The compressed/sparse flag set in an index root just means
+		 * to compress all files.
+		 */
+		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+			ntfs_error(vi->i_sb, "Found mst protected attribute "
+					"but the attribute is %s.  Please "
+					"report you saw this message to "
+					"linux-ntfs-dev@lists.sourceforge.net",
+					NInoCompressed(ni) ? "compressed" :
+					"sparse");
+			goto unm_err_out;
+		}
+		if (a->flags & ATTR_IS_SPARSE)
+			NInoSetSparse(ni);
+	}
+	if (a->flags & ATTR_IS_ENCRYPTED) {
+		if (NInoCompressed(ni)) {
+			ntfs_error(vi->i_sb, "Found encrypted and compressed "
+					"data.");
+			goto unm_err_out;
+		}
+		/*
+		 * The encryption flag set in an index root just means to
+		 * encrypt all files.
+		 */
+		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+			ntfs_error(vi->i_sb, "Found mst protected attribute "
+					"but the attribute is encrypted.  "
+					"Please report you saw this message "
+					"to linux-ntfs-dev@lists.sourceforge."
+					"net");
+			goto unm_err_out;
+		}
+		if (ni->type != AT_DATA) {
+			ntfs_error(vi->i_sb, "Found encrypted non-data "
+					"attribute.");
+			goto unm_err_out;
+		}
+		NInoSetEncrypted(ni);
+	}
 	if (!a->non_resident) {
 		/* Ensure the attribute name is placed before the value. */
 		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
@@ -1220,11 +1293,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"the attribute value.");
 			goto unm_err_out;
 		}
-		if (NInoMstProtected(ni) || a->flags) {
+		if (NInoMstProtected(ni)) {
 			ntfs_error(vi->i_sb, "Found mst protected attribute "
-					"or attribute with non-zero flags but "
-					"the attribute is resident.  Please "
-					"report you saw this message to "
+					"but the attribute is resident.  "
+					"Please report you saw this message to "
 					"linux-ntfs-dev@lists.sourceforge.net");
 			goto unm_err_out;
 		}
@@ -1250,50 +1322,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"the mapping pairs array.");
 			goto unm_err_out;
 		}
-		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
-			if (a->flags & ATTR_COMPRESSION_MASK) {
-				NInoSetCompressed(ni);
-				if ((ni->type != AT_DATA) || (ni->type ==
-						AT_DATA && ni->name_len)) {
-					ntfs_error(vi->i_sb, "Found compressed "
-							"non-data or named "
-							"data attribute.  "
-							"Please report you "
-							"saw this message to "
-							"linux-ntfs-dev@lists."
-							"sourceforge.net");
-					goto unm_err_out;
-				}
-				if (vol->cluster_size > 4096) {
-					ntfs_error(vi->i_sb, "Found compressed "
-							"attribute but "
-							"compression is "
-							"disabled due to "
-							"cluster size (%i) > "
-							"4kiB.",
-							vol->cluster_size);
-					goto unm_err_out;
-				}
-				if ((a->flags & ATTR_COMPRESSION_MASK) !=
-						ATTR_IS_COMPRESSED) {
-					ntfs_error(vi->i_sb, "Found unknown "
-							"compression method.");
-					goto unm_err_out;
-				}
-			}
-			if (NInoMstProtected(ni)) {
-				ntfs_error(vi->i_sb, "Found mst protected "
-						"attribute but the attribute "
-						"is %s.  Please report you "
-						"saw this message to "
-						"linux-ntfs-dev@lists."
-						"sourceforge.net",
-						NInoCompressed(ni) ?
-						"compressed" : "sparse");
-				goto unm_err_out;
-			}
-			if (a->flags & ATTR_IS_SPARSE)
-				NInoSetSparse(ni);
+		if (NInoCompressed(ni) || NInoSparse(ni)) {
 			if (a->data.non_resident.compression_unit != 4) {
 				ntfs_error(vi->i_sb, "Found nonstandard "
 						"compression unit (%u instead "
@@ -1313,23 +1342,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			ni->itype.compressed.size = sle64_to_cpu(
 					a->data.non_resident.compressed_size);
 		}
-		if (a->flags & ATTR_IS_ENCRYPTED) {
-			if (a->flags & ATTR_COMPRESSION_MASK) {
-				ntfs_error(vi->i_sb, "Found encrypted and "
-						"compressed data.");
-				goto unm_err_out;
-			}
-			if (NInoMstProtected(ni)) {
-				ntfs_error(vi->i_sb, "Found mst protected "
-						"attribute but the attribute "
-						"is encrypted.  Please report "
-						"you saw this message to "
-						"linux-ntfs-dev@lists."
-						"sourceforge.net");
-				goto unm_err_out;
-			}
-			NInoSetEncrypted(ni);
-		}
 		if (a->data.non_resident.lowest_vcn) {
 			ntfs_error(vi->i_sb, "First extent of attribute has "
 					"non-zero lowest_vcn.");
@@ -1348,12 +1360,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 		vi->i_mapping->a_ops = &ntfs_mst_aops;
 	else
 		vi->i_mapping->a_ops = &ntfs_aops;
-	if (NInoCompressed(ni) || NInoSparse(ni))
+	if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
 		vi->i_blocks = ni->itype.compressed.size >> 9;
 	else
 		vi->i_blocks = ni->allocated_size >> 9;
 	/*
-	 * Make sure the base inode doesn't go away and attach it to the
+	 * Make sure the base inode does not go away and attach it to the
 	 * attribute inode.
 	 */
 	igrab(base_vi);
@@ -1480,7 +1492,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 				"after the attribute value.");
 		goto unm_err_out;
 	}
-	/* Compressed/encrypted/sparse index root is not allowed. */
+	/*
+	 * Compressed/encrypted/sparse index root is not allowed, except for
+	 * directories of course but those are not dealt with here.
+	 */
 	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
 			ATTR_IS_SPARSE)) {
 		ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
@@ -2277,11 +2292,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
 
 #ifdef NTFS_RW
 
+static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
+		"chkdsk.";
+
 /**
  * ntfs_truncate - called when the i_size of an ntfs inode is changed
  * @vi:		inode for which the i_size was changed
  *
- * We do not support i_size changes yet.
+ * We only support i_size changes for normal files at present, i.e. not
+ * compressed and not encrypted.  This is enforced in ntfs_setattr(), see
+ * below.
  *
  * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
  * that the change is allowed.
@@ -2292,80 +2312,499 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
  * Returns 0 on success or -errno on error.
  *
  * Called with ->i_sem held.  In all but one case ->i_alloc_sem is held for
- * writing.  The only case where ->i_alloc_sem is not held is
+ * writing.  The only case in the kernel where ->i_alloc_sem is not held is
  * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset which means that it is a noop as far
- * as ntfs_truncate() is concerned.
+ * with the current i_size as the offset.  The analogous place in NTFS is in
+ * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
+ * without holding ->i_alloc_sem.
  */
 int ntfs_truncate(struct inode *vi)
 {
-	ntfs_inode *ni = NTFS_I(vi);
+	s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
+	VCN highest_vcn;
+	unsigned long flags;
+	ntfs_inode *base_ni, *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *m;
 	ATTR_RECORD *a;
 	const char *te = "  Leaving file length out of sync with i_size.";
-	int err;
+	int err, mp_size, size_change, alloc_change;
+	u32 attr_len;
 
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 	BUG_ON(NInoAttr(ni));
+	BUG_ON(S_ISDIR(vi->i_mode));
+	BUG_ON(NInoMstProtected(ni));
 	BUG_ON(ni->nr_extents < 0);
-	m = map_mft_record(ni);
+retry_truncate:
+	/*
+	 * Lock the runlist for writing and map the mft record to ensure it is
+	 * safe to mess with the attribute runlist and sizes.
+	 */
+	down_write(&ni->runlist.lock);
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	m = map_mft_record(base_ni);
 	if (IS_ERR(m)) {
 		err = PTR_ERR(m);
 		ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
 				"(error code %d).%s", vi->i_ino, err, te);
 		ctx = NULL;
 		m = NULL;
-		goto err_out;
+		goto old_bad_out;
 	}
-	ctx = ntfs_attr_get_search_ctx(ni, m);
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
 	if (unlikely(!ctx)) {
 		ntfs_error(vi->i_sb, "Failed to allocate a search context for "
 				"inode 0x%lx (not enough memory).%s",
 				vi->i_ino, te);
 		err = -ENOMEM;
-		goto err_out;
+		goto old_bad_out;
 	}
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, 0, NULL, 0, ctx);
 	if (unlikely(err)) {
-		if (err == -ENOENT)
+		if (err == -ENOENT) {
 			ntfs_error(vi->i_sb, "Open attribute is missing from "
 					"mft record.  Inode 0x%lx is corrupt.  "
-					"Run chkdsk.", vi->i_ino);
-		else
+					"Run chkdsk.%s", vi->i_ino, te);
+			err = -EIO;
+		} else
 			ntfs_error(vi->i_sb, "Failed to lookup attribute in "
-					"inode 0x%lx (error code %d).",
-					vi->i_ino, err);
-		goto err_out;
+					"inode 0x%lx (error code %d).%s",
+					vi->i_ino, err, te);
+		goto old_bad_out;
 	}
+	m = ctx->mrec;
 	a = ctx->attr;
-	/* If the size has not changed there is nothing to do. */
-	if (ntfs_attr_size(a) == i_size_read(vi))
-		goto done;
-	// TODO: Implement the truncate...
-	ntfs_error(vi->i_sb, "Inode size has changed but this is not "
-			"implemented yet.  Resetting inode size to old value. "
-			" This is most likely a bug in the ntfs driver!");
-	i_size_write(vi, ntfs_attr_size(a)); 
-done:
+	/*
+	 * The i_size of the vfs inode is the new size for the attribute value.
+	 */
+	new_size = i_size_read(vi);
+	/* The current size of the attribute value is the old size. */
+	old_size = ntfs_attr_size(a);
+	/* Calculate the new allocated size. */
+	if (NInoNonResident(ni))
+		new_alloc_size = (new_size + vol->cluster_size - 1) &
+				~(s64)vol->cluster_size_mask;
+	else
+		new_alloc_size = (new_size + 7) & ~7;
+	/* The current allocated size is the old allocated size. */
+	read_lock_irqsave(&ni->size_lock, flags);
+	old_alloc_size = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * The change in the file size.  This will be 0 if no change, >0 if the
+	 * size is growing, and <0 if the size is shrinking.
+	 */
+	size_change = -1;
+	if (new_size - old_size >= 0) {
+		size_change = 1;
+		if (new_size == old_size)
+			size_change = 0;
+	}
+	/* As above for the allocated size. */
+	alloc_change = -1;
+	if (new_alloc_size - old_alloc_size >= 0) {
+		alloc_change = 1;
+		if (new_alloc_size == old_alloc_size)
+			alloc_change = 0;
+	}
+	/*
+	 * If neither the size nor the allocation are being changed there is
+	 * nothing to do.
+	 */
+	if (!size_change && !alloc_change)
+		goto unm_done;
+	/* If the size is changing, check if new size is allowed in $AttrDef. */
+	if (size_change) {
+		err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
+		if (unlikely(err)) {
+			if (err == -ERANGE) {
+				ntfs_error(vol->sb, "Truncate would cause the "
+						"inode 0x%lx to %simum size "
+						"for its attribute type "
+						"(0x%x).  Aborting truncate.",
+						vi->i_ino,
+						new_size > old_size ? "exceed "
+						"the max" : "go under the min",
+						le32_to_cpu(ni->type));
+				err = -EFBIG;
+			} else {
+				ntfs_error(vol->sb, "Inode 0x%lx has unknown "
+						"attribute type 0x%x.  "
+						"Aborting truncate.",
+						vi->i_ino,
+						le32_to_cpu(ni->type));
+				err = -EIO;
+			}
+			/* Reset the vfs inode size to the old size. */
+			i_size_write(vi, old_size);
+			goto err_out;
+		}
+	}
+	if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+		ntfs_warning(vi->i_sb, "Changes in inode size are not "
+				"supported yet for %s files, ignoring.",
+				NInoCompressed(ni) ? "compressed" :
+				"encrypted");
+		err = -EOPNOTSUPP;
+		goto bad_out;
+	}
+	if (a->non_resident)
+		goto do_non_resident_truncate;
+	BUG_ON(NInoNonResident(ni));
+	/* Resize the attribute record to best fit the new attribute size. */
+	if (new_size < vol->mft_record_size &&
+			!ntfs_resident_attr_value_resize(m, a, new_size)) {
+		unsigned long flags;
+
+		/* The resize succeeded! */
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		write_lock_irqsave(&ni->size_lock, flags);
+		/* Update the sizes in the ntfs inode and all is done. */
+		ni->allocated_size = le32_to_cpu(a->length) -
+				le16_to_cpu(a->data.resident.value_offset);
+		/*
+		 * Note ntfs_resident_attr_value_resize() has already done any
+		 * necessary data clearing in the attribute record.  When the
+		 * file is being shrunk vmtruncate() will already have cleared
+		 * the top part of the last partial page, i.e. since this is
+		 * the resident case this is the page with index 0.  However,
+		 * when the file is being expanded, the page cache page data
+		 * between the old data_size, i.e. old_size, and the new_size
+		 * has not been zeroed.  Fortunately, we do not need to zero it
+		 * either since on one hand it will either already be zero due
+		 * to both readpage and writepage clearing partial page data
+		 * beyond i_size in which case there is nothing to do or in the
+		 * case of the file being mmap()ped at the same time, POSIX
+		 * specifies that the behaviour is unspecified thus we do not
+		 * have to do anything.  This means that in our implementation
+		 * in the rare case that the file is mmap()ped and a write
+		 * occured into the mmap()ped region just beyond the file size
+		 * and writepage has not yet been called to write out the page
+		 * (which would clear the area beyond the file size) and we now
+		 * extend the file size to incorporate this dirty region
+		 * outside the file size, a write of the page would result in
+		 * this data being written to disk instead of being cleared.
+		 * Given both POSIX and the Linux mmap(2) man page specify that
+		 * this corner case is undefined, we choose to leave it like
+		 * that as this is much simpler for us as we cannot lock the
+		 * relevant page now since we are holding too many ntfs locks
+		 * which would result in a lock reversal deadlock.
+		 */
+		ni->initialized_size = new_size;
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		goto unm_done;
+	}
+	/* If the above resize failed, this must be an attribute extension. */
+	BUG_ON(size_change < 0);
+	/*
+	 * We have to drop all the locks so we can call
+	 * ntfs_attr_make_non_resident().  This could be optimised by try-
+	 * locking the first page cache page and only if that fails dropping
+	 * the locks, locking the page, and redoing all the locking and
+	 * lookups.  While this would be a huge optimisation, it is not worth
+	 * it as this is definitely a slow code path as it only ever can happen
+	 * once for any given file.
+	 */
 	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(ni);
-	NInoClearTruncateFailed(ni);
-	ntfs_debug("Done.");
-	return 0;
-err_out:
-	if (err != -ENOMEM) {
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	/*
+	 * Not enough space in the mft record, try to make the attribute
+	 * non-resident and if successful restart the truncation process.
+	 */
+	err = ntfs_attr_make_non_resident(ni, old_size);
+	if (likely(!err))
+		goto retry_truncate;
+	/*
+	 * Could not make non-resident.  If this is due to this not being
+	 * permitted for this attribute type or there not being enough space,
+	 * try to make other attributes non-resident.  Otherwise fail.
+	 */
+	if (unlikely(err != -EPERM && err != -ENOSPC)) {
+		ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
+				"type 0x%x, because the conversion from "
+				"resident to non-resident attribute failed "
+				"with error code %i.", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), err);
+		if (err != -ENOMEM)
+			err = -EIO;
+		goto conv_err_out;
+	}
+	/* TODO: Not implemented from here, abort. */
+	if (err == -ENOSPC)
+		ntfs_error(vol->sb, "Not enough space in the mft record/on "
+				"disk for the non-resident attribute value.  "
+				"This case is not implemented yet.");
+	else /* if (err == -EPERM) */
+		ntfs_error(vol->sb, "This attribute type may not be "
+				"non-resident.  This case is not implemented "
+				"yet.");
+	err = -EOPNOTSUPP;
+	goto conv_err_out;
+#if 0
+	// TODO: Attempt to make other attributes non-resident.
+	if (!err)
+		goto do_resident_extend;
+	/*
+	 * Both the attribute list attribute and the standard information
+	 * attribute must remain in the base inode.  Thus, if this is one of
+	 * these attributes, we have to try to move other attributes out into
+	 * extent mft records instead.
+	 */
+	if (ni->type == AT_ATTRIBUTE_LIST ||
+			ni->type == AT_STANDARD_INFORMATION) {
+		// TODO: Attempt to move other attributes into extent mft
+		// records.
+		err = -EOPNOTSUPP;
+		if (!err)
+			goto do_resident_extend;
+		goto err_out;
+	}
+	// TODO: Attempt to move this attribute to an extent mft record, but
+	// only if it is not already the only attribute in an mft record in
+	// which case there would be nothing to gain.
+	err = -EOPNOTSUPP;
+	if (!err)
+		goto do_resident_extend;
+	/* There is nothing we can do to make enough space. )-: */
+	goto err_out;
+#endif
+do_non_resident_truncate:
+	BUG_ON(!NInoNonResident(ni));
+	if (alloc_change < 0) {
+		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+		if (highest_vcn > 0 &&
+				old_alloc_size >> vol->cluster_size_bits >
+				highest_vcn + 1) {
+			/*
+			 * This attribute has multiple extents.  Not yet
+			 * supported.
+			 */
+			ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
+					"attribute type 0x%x, because the "
+					"attribute is highly fragmented (it "
+					"consists of multiple extents) and "
+					"this case is not implemented yet.",
+					vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type));
+			err = -EOPNOTSUPP;
+			goto bad_out;
+		}
+	}
+	/*
+	 * If the size is shrinking, need to reduce the initialized_size and
+	 * the data_size before reducing the allocation.
+	 */
+	if (size_change < 0) {
+		/*
+		 * Make the valid size smaller (i_size is already up-to-date).
+		 */
+		write_lock_irqsave(&ni->size_lock, flags);
+		if (new_size < ni->initialized_size) {
+			ni->initialized_size = new_size;
+			a->data.non_resident.initialized_size =
+					cpu_to_sle64(new_size);
+		}
+		a->data.non_resident.data_size = cpu_to_sle64(new_size);
+		write_unlock_irqrestore(&ni->size_lock, flags);
+		flush_dcache_mft_record_page(ctx->ntfs_ino);
+		mark_mft_record_dirty(ctx->ntfs_ino);
+		/* If the allocated size is not changing, we are done. */
+		if (!alloc_change)
+			goto unm_done;
+		/*
+		 * If the size is shrinking it makes no sense for the
+		 * allocation to be growing.
+		 */
+		BUG_ON(alloc_change > 0);
+	} else /* if (size_change >= 0) */ {
+		/*
+		 * The file size is growing or staying the same but the
+		 * allocation can be shrinking, growing or staying the same.
+		 */
+		if (alloc_change > 0) {
+			/*
+			 * We need to extend the allocation and possibly update
+			 * the data size.  If we are updating the data size,
+			 * since we are not touching the initialized_size we do
+			 * not need to worry about the actual data on disk.
+			 * And as far as the page cache is concerned, there
+			 * will be no pages beyond the old data size and any
+			 * partial region in the last page between the old and
+			 * new data size (or the end of the page if the new
+			 * data size is outside the page) does not need to be
+			 * modified as explained above for the resident
+			 * attribute truncate case.  To do this, we simply drop
+			 * the locks we hold and leave all the work to our
+			 * friendly helper ntfs_attr_extend_allocation().
+			 */
+			ntfs_attr_put_search_ctx(ctx);
+			unmap_mft_record(base_ni);
+			up_write(&ni->runlist.lock);
+			err = ntfs_attr_extend_allocation(ni, new_size,
+					size_change > 0 ? new_size : -1, -1);
+			/*
+			 * ntfs_attr_extend_allocation() will have done error
+			 * output already.
+			 */
+			goto done;
+		}
+		if (!alloc_change)
+			goto alloc_done;
+	}
+	/* alloc_change < 0 */
+	/* Free the clusters. */
+	nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
+			vol->cluster_size_bits, -1, ctx);
+	m = ctx->mrec;
+	a = ctx->attr;
+	if (unlikely(nr_freed < 0)) {
+		ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
+				"%lli).  Unmount and run chkdsk to recover "
+				"the lost cluster(s).", (long long)nr_freed);
 		NVolSetErrors(vol);
+		nr_freed = 0;
+	}
+	/* Truncate the runlist. */
+	err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
+			new_alloc_size >> vol->cluster_size_bits);
+	/*
+	 * If the runlist truncation failed and/or the search context is no
+	 * longer valid, we cannot resize the attribute record or build the
+	 * mapping pairs array thus we mark the inode bad so that no access to
+	 * the freed clusters can happen.
+	 */
+	if (unlikely(err || IS_ERR(m))) {
+		ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
+				IS_ERR(m) ?
+				"restore attribute search context" :
+				"truncate attribute runlist",
+				IS_ERR(m) ? PTR_ERR(m) : err, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/* Get the size for the shrunk mapping pairs array for the runlist. */
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
+	if (unlikely(mp_size <= 0)) {
+		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+				"attribute type 0x%x, because determining the "
+				"size for the mapping pairs failed with error "
+				"code %i.%s", vi->i_ino,
+				(unsigned)le32_to_cpu(ni->type), mp_size, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/*
+	 * Shrink the attribute record for the new mapping pairs array.  Note,
+	 * this cannot fail since we are making the attribute smaller thus by
+	 * definition there is enough space to do so.
+	 */
+	attr_len = le32_to_cpu(a->length);
+	err = ntfs_attr_record_resize(m, a, mp_size +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+	BUG_ON(err);
+	/*
+	 * Generate the mapping pairs array directly into the attribute record.
+	 */
+	err = ntfs_mapping_pairs_build(vol, (u8*)a +
+			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+			mp_size, ni->runlist.rl, 0, -1, NULL);
+	if (unlikely(err)) {
+		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+				"attribute type 0x%x, because building the "
+				"mapping pairs failed with error code %i.%s",
+				vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+				err, es);
+		err = -EIO;
+		goto bad_out;
+	}
+	/* Update the allocated/compressed size as well as the highest vcn. */
+	a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+			vol->cluster_size_bits) - 1);
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_alloc_size;
+	a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+	if (NInoSparse(ni) || NInoCompressed(ni)) {
+		if (nr_freed) {
+			ni->itype.compressed.size -= nr_freed <<
+					vol->cluster_size_bits;
+			BUG_ON(ni->itype.compressed.size < 0);
+			a->data.non_resident.compressed_size = cpu_to_sle64(
+					ni->itype.compressed.size);
+			vi->i_blocks = ni->itype.compressed.size >> 9;
+		}
+	} else
+		vi->i_blocks = new_alloc_size >> 9;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * We have shrunk the allocation.  If this is a shrinking truncate we
+	 * have already dealt with the initialized_size and the data_size above
+	 * and we are done.  If the truncate is only changing the allocation
+	 * and not the data_size, we are also done.  If this is an extending
+	 * truncate, need to extend the data_size now which is ensured by the
+	 * fact that @size_change is positive.
+	 */
+alloc_done:
+	/*
+	 * If the size is growing, need to update it now.  If it is shrinking,
+	 * we have already updated it above (before the allocation change).
+	 */
+	if (size_change > 0)
+		a->data.non_resident.data_size = cpu_to_sle64(new_size);
+	/* Ensure the modified mft record is written out. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+unm_done:
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+done:
+	/* Update the mtime and ctime on the base inode. */
+	inode_update_time(VFS_I(base_ni), 1);
+	if (likely(!err)) {
+		NInoClearTruncateFailed(ni);
+		ntfs_debug("Done.");
+	}
+	return err;
+old_bad_out:
+	old_size = -1;
+bad_out:
+	if (err != -ENOMEM && err != -EOPNOTSUPP) {
 		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
 	}
+	if (err != -EOPNOTSUPP)
+		NInoSetTruncateFailed(ni);
+	else if (old_size >= 0)
+		i_size_write(vi, old_size);
+err_out:
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
 	if (m)
-		unmap_mft_record(ni);
-	NInoSetTruncateFailed(ni);
+		unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+out:
+	ntfs_debug("Failed.  Returning error code %i.", err);
 	return err;
+conv_err_out:
+	if (err != -ENOMEM && err != -EOPNOTSUPP) {
+		make_bad_inode(vi);
+		make_bad_inode(VFS_I(base_ni));
+		NVolSetErrors(vol);
+	}
+	if (err != -EOPNOTSUPP)
+		NInoSetTruncateFailed(ni);
+	else
+		i_size_write(vi, old_size);
+	goto out;
 }
 
 /**
@@ -2406,8 +2845,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	err = inode_change_ok(vi, attr);
 	if (err)
-		return err;
-
+		goto out;
 	/* We do not support NTFS ACLs yet. */
 	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
 		ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
@@ -2415,14 +2853,22 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-
 	if (ia_valid & ATTR_SIZE) {
 		if (attr->ia_size != i_size_read(vi)) {
-			ntfs_warning(vi->i_sb, "Changes in inode size are not "
-					"supported yet, ignoring.");
-			err = -EOPNOTSUPP;
-			// TODO: Implement...
-			// err = vmtruncate(vi, attr->ia_size);
+			ntfs_inode *ni = NTFS_I(vi);
+			/*
+			 * FIXME: For now we do not support resizing of
+			 * compressed or encrypted files yet.
+			 */
+			if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+				ntfs_warning(vi->i_sb, "Changes in inode size "
+						"are not supported yet for "
+						"%s files, ignoring.",
+						NInoCompressed(ni) ?
+						"compressed" : "encrypted");
+				err = -EOPNOTSUPP;
+			} else
+				err = vmtruncate(vi, attr->ia_size);
 			if (err || ia_valid == ATTR_SIZE)
 				goto out;
 		} else {
@@ -2430,16 +2876,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 			 * We skipped the truncate but must still update
 			 * timestamps.
 			 */
-			ia_valid |= ATTR_MTIME|ATTR_CTIME;
+			ia_valid |= ATTR_MTIME | ATTR_CTIME;
 		}
 	}
-
 	if (ia_valid & ATTR_ATIME)
-		vi->i_atime = attr->ia_atime;
+		vi->i_atime = timespec_trunc(attr->ia_atime,
+				vi->i_sb->s_time_gran);
 	if (ia_valid & ATTR_MTIME)
-		vi->i_mtime = attr->ia_mtime;
+		vi->i_mtime = timespec_trunc(attr->ia_mtime,
+				vi->i_sb->s_time_gran);
 	if (ia_valid & ATTR_CTIME)
-		vi->i_ctime = attr->ia_ctime;
+		vi->i_ctime = timespec_trunc(attr->ia_ctime,
+				vi->i_sb->s_time_gran);
 	mark_inode_dirty(vi);
 out:
 	return err;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 609ad1728ce..f5678d5d791 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -123,7 +123,7 @@ enum {
 	magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */
 
 	/* Found in $LogFile/$DATA.  (May be found in $MFT/$DATA, also?) */
-	magic_CHKD = const_cpu_to_le32(0x424b4843), /* Modified by chkdsk. */
+	magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
 
 	/* Found in all ntfs record containing records. */
 	magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector
@@ -308,10 +308,8 @@ typedef le16 MFT_RECORD_FLAGS;
  * The _LE versions are to be applied on little endian MFT_REFs.
  * Note: The _LE versions will return a CPU endian formatted value!
  */
-typedef enum {
-	MFT_REF_MASK_CPU	= 0x0000ffffffffffffULL,
-	MFT_REF_MASK_LE		= const_cpu_to_le64(0x0000ffffffffffffULL),
-} MFT_REF_CONSTS;
+#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
+#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU)
 
 typedef u64 MFT_REF;
 typedef le64 leMFT_REF;
@@ -1023,10 +1021,17 @@ enum {
 	FILE_NAME_POSIX		= 0x00,
 	/* This is the largest namespace. It is case sensitive and allows all
 	   Unicode characters except for: '\0' and '/'.  Beware that in
-	   WinNT/2k files which eg have the same name except for their case
-	   will not be distinguished by the standard utilities and thus a "del
-	   filename" will delete both "filename" and "fileName" without
-	   warning. */
+	   WinNT/2k/2003 by default files which eg have the same name except
+	   for their case will not be distinguished by the standard utilities
+	   and thus a "del filename" will delete both "filename" and "fileName"
+	   without warning.  However if for example Services For Unix (SFU) are
+	   installed and the case sensitive option was enabled at installation
+	   time, then you can create/access/delete such files.
+	   Note that even SFU places restrictions on the filenames beyond the
+	   '\0' and '/' and in particular the following set of characters is
+	   not allowed: '"', '/', '<', '>', '\'.  All other characters,
+	   including the ones no allowed in WIN32 namespace are allowed.
+	   Tested with SFU 3.5 (this is now free) running on Windows XP. */
 	FILE_NAME_WIN32		= 0x01,
 	/* The standard WinNT/2k NTFS long filenames. Case insensitive.  All
 	   Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\',
@@ -2369,7 +2374,9 @@ typedef struct {
  * Extended attribute flags (8-bit).
  */
 enum {
-	NEED_EA	= 0x80
+	NEED_EA	= 0x80		/* If set the file to which the EA belongs
+				   cannot be interpreted without understanding
+				   the associates extended attributes. */
 } __attribute__ ((__packed__));
 
 typedef u8 EA_FLAGS;
@@ -2377,20 +2384,20 @@ typedef u8 EA_FLAGS;
 /*
  * Attribute: Extended attribute (EA) (0xe0).
  *
- * NOTE: Always non-resident. (Is this true?)
+ * NOTE: Can be resident or non-resident.
  *
  * Like the attribute list and the index buffer list, the EA attribute value is
  * a sequence of EA_ATTR variable length records.
- *
- * FIXME: It appears weird that the EA name is not unicode. Is it true?
  */
 typedef struct {
 	le32 next_entry_offset;	/* Offset to the next EA_ATTR. */
 	EA_FLAGS flags;		/* Flags describing the EA. */
-	u8 ea_name_length;	/* Length of the name of the EA in bytes. */
+	u8 ea_name_length;	/* Length of the name of the EA in bytes
+				   excluding the '\0' byte terminator. */
 	le16 ea_value_length;	/* Byte size of the EA's value. */
-	u8 ea_name[0];		/* Name of the EA. */
-	u8 ea_value[0];		/* The value of the EA. Immediately follows
+	u8 ea_name[0];		/* Name of the EA.  Note this is ASCII, not
+				   Unicode and it is zero terminated. */
+	u8 ea_value[0];		/* The value of the EA.  Immediately follows
 				   the name. */
 } __attribute__ ((__packed__)) EA_ATTR;
 
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index a4bc07616e5..29cabf93d2d 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
 	int ret = 0;
 
 	ntfs_debug("Entering.");
+	if (!rl)
+		return 0;
 	for (; rl->length; rl++) {
 		int err;
 
@@ -74,6 +76,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  * @count:	number of clusters to allocate
  * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
  * @zone:	zone from which to allocate the clusters
+ * @is_extension:	if TRUE, this is an attribute extension
  *
  * Allocate @count clusters preferably starting at cluster @start_lcn or at the
  * current allocator position if @start_lcn is -1, on the mounted ntfs volume
@@ -84,6 +87,13 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  * @start_vcn specifies the vcn of the first allocated cluster.  This makes
  * merging the resulting runlist with the old runlist easier.
  *
+ * If @is_extension is TRUE, the caller is allocating clusters to extend an
+ * attribute and if it is FALSE, the caller is allocating clusters to fill a
+ * hole in an attribute.  Practically the difference is that if @is_extension
+ * is TRUE the returned runlist will be terminated with LCN_ENOENT and if
+ * @is_extension is FALSE the runlist will be terminated with
+ * LCN_RL_NOT_MAPPED.
+ *
  * You need to check the return value with IS_ERR().  If this is false, the
  * function was successful and the return value is a runlist describing the
  * allocated cluster(s).  If IS_ERR() is true, the function failed and
@@ -135,7 +145,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  */
 runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 		const s64 count, const LCN start_lcn,
-		const NTFS_CLUSTER_ALLOCATION_ZONES zone)
+		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+		const BOOL is_extension)
 {
 	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
 	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
@@ -163,17 +174,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 	BUG_ON(zone < FIRST_ZONE);
 	BUG_ON(zone > LAST_ZONE);
 
-	/* Return empty runlist if @count == 0 */
-	// FIXME: Do we want to just return NULL instead? (AIA)
-	if (!count) {
-		rl = ntfs_malloc_nofs(PAGE_SIZE);
-		if (!rl)
-			return ERR_PTR(-ENOMEM);
-		rl[0].vcn = start_vcn;
-		rl[0].lcn = LCN_RL_NOT_MAPPED;
-		rl[0].length = 0;
-		return rl;
-	}
+	/* Return NULL if @count is zero. */
+	if (!count)
+		return NULL;
 	/* Take the lcnbmp lock for writing. */
 	down_write(&vol->lcnbmp_lock);
 	/*
@@ -316,7 +319,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 				continue;
 			}
 			bit = 1 << (lcn & 7);
-			ntfs_debug("bit %i.", bit);
+			ntfs_debug("bit 0x%x.", bit);
 			/* If the bit is already set, go onto the next one. */
 			if (*byte & bit) {
 				lcn++;
@@ -735,7 +738,7 @@ out:
 	/* Add runlist terminator element. */
 	if (likely(rl)) {
 		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
-		rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+		rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
 		rl[rlpos].length = 0;
 	}
 	if (likely(page && !IS_ERR(page))) {
@@ -785,52 +788,78 @@ out:
 
 /**
  * __ntfs_cluster_free - free clusters on an ntfs volume
- * @vi:		vfs inode whose runlist describes the clusters to free
- * @start_vcn:	vcn in the runlist of @vi at which to start freeing clusters
+ * @ni:		ntfs inode whose runlist describes the clusters to free
+ * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
- * @is_rollback:	if TRUE this is a rollback operation
+ * @ctx:	active attribute search context if present or NULL if not
+ * @is_rollback:	true if this is a rollback operation
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the vfs inode @ni.
  *
  * If @count is -1, all clusters from @start_vcn to the end of the runlist are
  * deallocated.  Thus, to completely free all clusters in a runlist, use
  * @start_vcn = 0 and @count = -1.
  *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when __ntfs_cluster_free() encounters unmapped
+ * runlist fragments and allows their mapping.  If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
+ * perform the necessary mapping and unmapping.
+ *
+ * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning.  Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry.  However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
+ * you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
  * @is_rollback should always be FALSE, it is for internal use to rollback
  * errors.  You probably want to use ntfs_cluster_free() instead.
  *
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
+ * remove from the runlist or mark sparse the freed runs later.
  *
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- *	      unlocked on return.
- *	    - This function takes the runlist lock of @vi for reading and
- *	      sometimes for writing and sometimes modifies the runlist.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
  *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
-s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
-		const BOOL is_rollback)
+s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
+		ntfs_attr_search_ctx *ctx, const BOOL is_rollback)
 {
 	s64 delta, to_free, total_freed, real_freed;
-	ntfs_inode *ni;
 	ntfs_volume *vol;
 	struct inode *lcnbmp_vi;
 	runlist_element *rl;
 	int err;
 
-	BUG_ON(!vi);
+	BUG_ON(!ni);
 	ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
-			"0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn,
+			"0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
 			(unsigned long long)count,
 			is_rollback ? " (rollback)" : "");
-	ni = NTFS_I(vi);
 	vol = ni->vol;
 	lcnbmp_vi = vol->lcnbmp_ino;
 	BUG_ON(!lcnbmp_vi);
@@ -848,8 +877,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 	total_freed = real_freed = 0;
 
-	down_read(&ni->runlist.lock);
-	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
+	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -903,7 +931,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
+			rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
@@ -950,7 +978,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 		/* Update the total done clusters. */
 		total_freed += to_free;
 	}
-	up_read(&ni->runlist.lock);
 	if (likely(!is_rollback))
 		up_write(&vol->lcnbmp_lock);
 
@@ -960,7 +987,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 	ntfs_debug("Done.");
 	return real_freed;
 err_out:
-	up_read(&ni->runlist.lock);
 	if (is_rollback)
 		return err;
 	/* If no real clusters were freed, no need to rollback. */
@@ -973,7 +999,7 @@ err_out:
 	 * If rollback fails, set the volume errors flag, emit an error
 	 * message, and return the error code.
 	 */
-	delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE);
+	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, TRUE);
 	if (delta < 0) {
 		ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
 				"inconsistent metadata!  Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index 4cac1c024af..72cbca7003b 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -2,7 +2,7 @@
  * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation.  Part of the
  *		Linux-NTFS project.
  *
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -27,7 +27,9 @@
 
 #include <linux/fs.h>
 
+#include "attrib.h"
 #include "types.h"
+#include "inode.h"
 #include "runlist.h"
 #include "volume.h"
 
@@ -40,43 +42,72 @@ typedef enum {
 
 extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
 		const VCN start_vcn, const s64 count, const LCN start_lcn,
-		const NTFS_CLUSTER_ALLOCATION_ZONES zone);
+		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+		const BOOL is_extension);
 
-extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
-		s64 count, const BOOL is_rollback);
+extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+		s64 count, ntfs_attr_search_ctx *ctx, const BOOL is_rollback);
 
 /**
  * ntfs_cluster_free - free clusters on an ntfs volume
- * @vi:		vfs inode whose runlist describes the clusters to free
- * @start_vcn:	vcn in the runlist of @vi at which to start freeing clusters
+ * @ni:		ntfs inode whose runlist describes the clusters to free
+ * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
+ * @ctx:	active attribute search context if present or NULL if not
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the ntfs inode @ni.
  *
  * If @count is -1, all clusters from @start_vcn to the end of the runlist are
  * deallocated.  Thus, to completely free all clusters in a runlist, use
  * @start_vcn = 0 and @count = -1.
  *
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record.  This is needed when ntfs_cluster_free() encounters unmapped runlist
+ * fragments and allows their mapping.  If you do not have the mft record
+ * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform
+ * the necessary mapping and unmapping.
+ *
+ * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning.  Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry.  However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(),
+ * you will probably want to do:
+ *	m = ctx->mrec;
+ *	a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
+ * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove
+ * from the runlist or mark sparse the freed runs later.
  *
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- *	      unlocked on return.
- *	    - This function takes the runlist lock of @vi for reading and
- *	      sometimes for writing and sometimes modifies the runlist.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ *	    returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
+ *	    is no longer valid, i.e. you need to either call
+ *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ *	    why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
  *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
+ *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ *	      entry and it will be left unmapped on return.
+ *	    - If @ctx is not NULL, the base mft record must be mapped on entry
+ *	      and it will be left mapped on return.
  */
-static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
-		s64 count)
+static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+		s64 count, ntfs_attr_search_ctx *ctx)
 {
-	return __ntfs_cluster_free(vi, start_vcn, count, FALSE);
+	return __ntfs_cluster_free(ni, start_vcn, count, ctx, FALSE);
 }
 
 extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
@@ -93,8 +124,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
  *
  * Return 0 on success and -errno on error.
  *
- * Locking: This function takes the volume lcn bitmap lock for writing and
- *	    modifies the bitmap contents.
+ * Locking: - This function takes the volume lcn bitmap lock for writing and
+ *	      modifies the bitmap contents.
+ *	    - The caller must have locked the runlist @rl for reading or
+ *	      writing.
  */
 static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
 		const runlist_element *rl)
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 8edb8e20fb0..0fd70295cca 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -51,7 +51,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 		RESTART_PAGE_HEADER *rp, s64 pos)
 {
 	u32 logfile_system_page_size, logfile_log_page_size;
-	u16 usa_count, usa_ofs, usa_end, ra_ofs;
+	u16 ra_ofs, usa_count, usa_ofs, usa_end = 0;
+	BOOL have_usa = TRUE;
 
 	ntfs_debug("Entering.");
 	/*
@@ -86,6 +87,14 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 				(int)sle16_to_cpu(rp->minor_ver));
 		return FALSE;
 	}
+	/*
+	 * If chkdsk has been run the restart page may not be protected by an
+	 * update sequence array.
+	 */
+	if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) {
+		have_usa = FALSE;
+		goto skip_usa_checks;
+	}
 	/* Verify the size of the update sequence array. */
 	usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS);
 	if (usa_count != le16_to_cpu(rp->usa_count)) {
@@ -102,6 +111,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 				"inconsistent update sequence array offset.");
 		return FALSE;
 	}
+skip_usa_checks:
 	/*
 	 * Verify the position of the restart area.  It must be:
 	 *	- aligned to 8-byte boundary,
@@ -109,7 +119,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 	 *	- within the system page size.
 	 */
 	ra_ofs = le16_to_cpu(rp->restart_area_offset);
-	if (ra_ofs & 7 || ra_ofs < usa_end ||
+	if (ra_ofs & 7 || (have_usa ? ra_ofs < usa_end :
+			ra_ofs < sizeof(RESTART_PAGE_HEADER)) ||
 			ra_ofs > logfile_system_page_size) {
 		ntfs_error(vi->i_sb, "$LogFile restart page specifies "
 				"inconsistent restart area offset.");
@@ -121,7 +132,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 	 */
 	if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
 		ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
-				"chkdsk but a chkdsk LSN is specified.");
+				"by chkdsk but a chkdsk LSN is specified.");
 		return FALSE;
 	}
 	ntfs_debug("Done.");
@@ -312,10 +323,12 @@ err_out:
  * @vi:		$LogFile inode to which the restart page belongs
  * @rp:		restart page to check
  * @pos:	position in @vi at which the restart page resides
- * @wrp:	copy of the multi sector transfer deprotected restart page
+ * @wrp:	[OUT] copy of the multi sector transfer deprotected restart page
+ * @lsn:	[OUT] set to the current logfile lsn on success
  *
- * Check the restart page @rp for consistency and return TRUE if it is
- * consistent and FALSE otherwise.
+ * Check the restart page @rp for consistency and return 0 if it is consistent
+ * and -errno otherwise.  The restart page may have been modified by chkdsk in
+ * which case its magic is CHKD instead of RSTR.
  *
  * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
  * require the full restart page.
@@ -323,25 +336,33 @@ err_out:
  * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
  * copy of the complete multi sector transfer deprotected page.  On failure,
  * *@wrp is undefined.
+ *
+ * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current
+ * logfile lsn according to this restart page.  On failure, *@lsn is undefined.
+ *
+ * The following error codes are defined:
+ *	-EINVAL	- The restart page is inconsistent.
+ *	-ENOMEM	- Not enough memory to load the restart page.
+ *	-EIO	- Failed to reading from $LogFile.
  */
-static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
-		RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp)
+static int ntfs_check_and_load_restart_page(struct inode *vi,
+		RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
+		LSN *lsn)
 {
 	RESTART_AREA *ra;
 	RESTART_PAGE_HEADER *trp;
-	int size;
-	BOOL ret;
+	int size, err;
 
 	ntfs_debug("Entering.");
 	/* Check the restart page header for consistency. */
 	if (!ntfs_check_restart_page_header(vi, rp, pos)) {
 		/* Error output already done inside the function. */
-		return FALSE;
+		return -EINVAL;
 	}
 	/* Check the restart area for consistency. */
 	if (!ntfs_check_restart_area(vi, rp)) {
 		/* Error output already done inside the function. */
-		return FALSE;
+		return -EINVAL;
 	}
 	ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
 	/*
@@ -352,7 +373,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
 	if (!trp) {
 		ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
 				"restart page buffer.");
-		return FALSE;
+		return -ENOMEM;
 	}
 	/*
 	 * Read the whole of the restart page into the buffer.  If it fits
@@ -379,6 +400,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
 			if (IS_ERR(page)) {
 				ntfs_error(vi->i_sb, "Error mapping $LogFile "
 						"page (index %lu).", idx);
+				err = PTR_ERR(page);
+				if (err != -EIO && err != -ENOMEM)
+					err = -EIO;
 				goto err_out;
 			}
 			size = min_t(int, to_read, PAGE_CACHE_SIZE);
@@ -389,32 +413,64 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
 			idx++;
 		} while (to_read > 0);
 	}
-	/* Perform the multi sector transfer deprotection on the buffer. */
-	if (post_read_mst_fixup((NTFS_RECORD*)trp,
+	/*
+	 * Perform the multi sector transfer deprotection on the buffer if the
+	 * restart page is protected.
+	 */
+	if ((!ntfs_is_chkd_record(trp->magic) || le16_to_cpu(trp->usa_count))
+			&& post_read_mst_fixup((NTFS_RECORD*)trp,
 			le32_to_cpu(rp->system_page_size))) {
-		ntfs_error(vi->i_sb, "Multi sector transfer error detected in "
-				"$LogFile restart page.");
-		goto err_out;
+		/*
+		 * A multi sector tranfer error was detected.  We only need to
+		 * abort if the restart page contents exceed the multi sector
+		 * transfer fixup of the first sector.
+		 */
+		if (le16_to_cpu(rp->restart_area_offset) +
+				le16_to_cpu(ra->restart_area_length) >
+				NTFS_BLOCK_SIZE - sizeof(u16)) {
+			ntfs_error(vi->i_sb, "Multi sector transfer error "
+					"detected in $LogFile restart page.");
+			err = -EINVAL;
+			goto err_out;
+		}
+	}
+	/*
+	 * If the restart page is modified by chkdsk or there are no active
+	 * logfile clients, the logfile is consistent.  Otherwise, need to
+	 * check the log client records for consistency, too.
+	 */
+	err = 0;
+	if (ntfs_is_rstr_record(rp->magic) &&
+			ra->client_in_use_list != LOGFILE_NO_CLIENT) {
+		if (!ntfs_check_log_client_array(vi, trp)) {
+			err = -EINVAL;
+			goto err_out;
+		}
+	}
+	if (lsn) {
+		if (ntfs_is_rstr_record(rp->magic))
+			*lsn = sle64_to_cpu(ra->current_lsn);
+		else /* if (ntfs_is_chkd_record(rp->magic)) */
+			*lsn = sle64_to_cpu(rp->chkdsk_lsn);
 	}
-	/* Check the log client records for consistency. */
-	ret = ntfs_check_log_client_array(vi, trp);
-	if (ret && wrp)
-		*wrp = trp;
-	else
-		ntfs_free(trp);
 	ntfs_debug("Done.");
-	return ret;
+	if (wrp)
+		*wrp = trp;
+	else {
 err_out:
-	ntfs_free(trp);
-	return FALSE;
+		ntfs_free(trp);
+	}
+	return err;
 }
 
 /**
  * ntfs_check_logfile - check the journal for consistency
  * @log_vi:	struct inode of loaded journal $LogFile to check
+ * @rp:		[OUT] on success this is a copy of the current restart page
  *
  * Check the $LogFile journal for consistency and return TRUE if it is
- * consistent and FALSE if not.
+ * consistent and FALSE if not.  On success, the current restart page is
+ * returned in *@rp.  Caller must call ntfs_free(*@rp) when finished with it.
  *
  * At present we only check the two restart pages and ignore the log record
  * pages.
@@ -424,19 +480,18 @@ err_out:
  * if the $LogFile was created on a system with a different page size to ours
  * yet and mst deprotection would fail if our page size is smaller.
  */
-BOOL ntfs_check_logfile(struct inode *log_vi)
+BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 {
-	s64 size, pos, rstr1_pos, rstr2_pos;
+	s64 size, pos;
+	LSN rstr1_lsn, rstr2_lsn;
 	ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
 	struct address_space *mapping = log_vi->i_mapping;
 	struct page *page = NULL;
 	u8 *kaddr = NULL;
 	RESTART_PAGE_HEADER *rstr1_ph = NULL;
 	RESTART_PAGE_HEADER *rstr2_ph = NULL;
-	int log_page_size, log_page_mask, ofs;
+	int log_page_size, log_page_mask, err;
 	BOOL logfile_is_empty = TRUE;
-	BOOL rstr1_found = FALSE;
-	BOOL rstr2_found = FALSE;
 	u8 log_page_bits;
 
 	ntfs_debug("Entering.");
@@ -491,7 +546,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
 			if (IS_ERR(page)) {
 				ntfs_error(vol->sb, "Error mapping $LogFile "
 						"page (index %lu).", idx);
-				return FALSE;
+				goto err_out;
 			}
 		}
 		kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
@@ -510,99 +565,100 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
 		 */
 		if (ntfs_is_rcrd_recordp((le32*)kaddr))
 			break;
-		/*
-		 * A modified by chkdsk restart page means we cannot handle
-		 * this log file.
-		 */
-		if (ntfs_is_chkd_recordp((le32*)kaddr)) {
-			ntfs_error(vol->sb, "$LogFile has been modified by "
-					"chkdsk.  Mount this volume in "
-					"Windows.");
-			goto err_out;
-		}
-		/* If not a restart page, continue. */
-		if (!ntfs_is_rstr_recordp((le32*)kaddr)) {
-			/* Skip to the minimum page size for the next one. */
+		/* If not a (modified by chkdsk) restart page, continue. */
+		if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
+				!ntfs_is_chkd_recordp((le32*)kaddr)) {
 			if (!pos)
 				pos = NTFS_BLOCK_SIZE >> 1;
 			continue;
 		}
-		/* We now know we have a restart page. */
-		if (!pos) {
-			rstr1_found = TRUE;
-			rstr1_pos = pos;
-		} else {
-			if (rstr2_found) {
-				ntfs_error(vol->sb, "Found more than two "
-						"restart pages in $LogFile.");
-				goto err_out;
-			}
-			rstr2_found = TRUE;
-			rstr2_pos = pos;
-		}
 		/*
-		 * Check the restart page for consistency and get a copy of the
-		 * complete multi sector transfer deprotected restart page.
+		 * Check the (modified by chkdsk) restart page for consistency
+		 * and get a copy of the complete multi sector transfer
+		 * deprotected restart page.
 		 */
-		if (!ntfs_check_and_load_restart_page(log_vi,
+		err = ntfs_check_and_load_restart_page(log_vi,
 				(RESTART_PAGE_HEADER*)kaddr, pos,
-				!pos ? &rstr1_ph : &rstr2_ph)) {
-			/* Error output already done inside the function. */
-			goto err_out;
+				!rstr1_ph ? &rstr1_ph : &rstr2_ph,
+				!rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
+		if (!err) {
+			/*
+			 * If we have now found the first (modified by chkdsk)
+			 * restart page, continue looking for the second one.
+			 */
+			if (!pos) {
+				pos = NTFS_BLOCK_SIZE >> 1;
+				continue;
+			}
+			/*
+			 * We have now found the second (modified by chkdsk)
+			 * restart page, so we can stop looking.
+			 */
+			break;
 		}
 		/*
-		 * We have a valid restart page.  The next one must be after
-		 * a whole system page size as specified by the valid restart
-		 * page.
+		 * Error output already done inside the function.  Note, we do
+		 * not abort if the restart page was invalid as we might still
+		 * find a valid one further in the file.
 		 */
+		if (err != -EINVAL) {
+			ntfs_unmap_page(page);
+			goto err_out;
+		}
+		/* Continue looking. */
 		if (!pos)
-			pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1;
+			pos = NTFS_BLOCK_SIZE >> 1;
 	}
-	if (page) {
+	if (page)
 		ntfs_unmap_page(page);
-		page = NULL;
-	}
 	if (logfile_is_empty) {
 		NVolSetLogFileEmpty(vol);
 is_empty:
 		ntfs_debug("Done.  ($LogFile is empty.)");
 		return TRUE;
 	}
-	if (!rstr1_found || !rstr2_found) {
-		ntfs_error(vol->sb, "Did not find two restart pages in "
-				"$LogFile.");
-		goto err_out;
+	if (!rstr1_ph) {
+		BUG_ON(rstr2_ph);
+		ntfs_error(vol->sb, "Did not find any restart pages in "
+				"$LogFile and it was not empty.");
+		return FALSE;
+	}
+	/* If both restart pages were found, use the more recent one. */
+	if (rstr2_ph) {
+		/*
+		 * If the second restart area is more recent, switch to it.
+		 * Otherwise just throw it away.
+		 */
+		if (rstr2_lsn > rstr1_lsn) {
+			ntfs_debug("Using second restart page as it is more "
+					"recent.");
+			ntfs_free(rstr1_ph);
+			rstr1_ph = rstr2_ph;
+			/* rstr1_lsn = rstr2_lsn; */
+		} else {
+			ntfs_debug("Using first restart page as it is more "
+					"recent.");
+			ntfs_free(rstr2_ph);
+		}
+		rstr2_ph = NULL;
 	}
-	/*
-	 * The two restart areas must be identical except for the update
-	 * sequence number.
-	 */
-	ofs = le16_to_cpu(rstr1_ph->usa_ofs);
-	if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16),
-			memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs,
-			le32_to_cpu(rstr1_ph->system_page_size) - ofs))) {
-		ntfs_error(vol->sb, "The two restart pages in $LogFile do not "
-				"match.");
-		goto err_out;
-	}
-	ntfs_free(rstr1_ph);
-	ntfs_free(rstr2_ph);
 	/* All consistency checks passed. */
+	if (rp)
+		*rp = rstr1_ph;
+	else
+		ntfs_free(rstr1_ph);
 	ntfs_debug("Done.");
 	return TRUE;
 err_out:
-	if (page)
-		ntfs_unmap_page(page);
 	if (rstr1_ph)
 		ntfs_free(rstr1_ph);
-	if (rstr2_ph)
-		ntfs_free(rstr2_ph);
 	return FALSE;
 }
 
 /**
  * ntfs_is_logfile_clean - check in the journal if the volume is clean
  * @log_vi:	struct inode of loaded journal $LogFile to check
+ * @rp:		copy of the current restart page
  *
  * Analyze the $LogFile journal and return TRUE if it indicates the volume was
  * shutdown cleanly and FALSE if not.
@@ -619,11 +675,9 @@ err_out:
  * is empty this function requires that NVolLogFileEmpty() is true otherwise an
  * empty volume will be reported as dirty.
  */
-BOOL ntfs_is_logfile_clean(struct inode *log_vi)
+BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
 {
 	ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
-	struct page *page;
-	RESTART_PAGE_HEADER *rp;
 	RESTART_AREA *ra;
 
 	ntfs_debug("Entering.");
@@ -632,24 +686,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
 		ntfs_debug("Done.  ($LogFile is empty.)");
 		return TRUE;
 	}
-	/*
-	 * Read the first restart page.  It will be possibly incomplete and
-	 * will not be multi sector transfer deprotected but we only need the
-	 * first NTFS_BLOCK_SIZE bytes so it does not matter.
-	 */
-	page = ntfs_map_page(log_vi->i_mapping, 0);
-	if (IS_ERR(page)) {
-		ntfs_error(vol->sb, "Error mapping $LogFile page (index 0).");
+	BUG_ON(!rp);
+	if (!ntfs_is_rstr_record(rp->magic) &&
+			!ntfs_is_chkd_record(rp->magic)) {
+		ntfs_error(vol->sb, "Restart page buffer is invalid.  This is "
+				"probably a bug in that the $LogFile should "
+				"have been consistency checked before calling "
+				"this function.");
 		return FALSE;
 	}
-	rp = (RESTART_PAGE_HEADER*)page_address(page);
-	if (!ntfs_is_rstr_record(rp->magic)) {
-		ntfs_error(vol->sb, "No restart page found at offset zero in "
-				"$LogFile.  This is probably a bug in that "
-				"the $LogFile should have been consistency "
-				"checked before calling this function.");
-		goto err_out;
-	}
 	ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
 	/*
 	 * If the $LogFile has active clients, i.e. it is open, and we do not
@@ -659,15 +704,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
 	if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
 			!(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
 		ntfs_debug("Done.  $LogFile indicates a dirty shutdown.");
-		goto err_out;
+		return FALSE;
 	}
-	ntfs_unmap_page(page);
 	/* $LogFile indicates a clean shutdown. */
 	ntfs_debug("Done.  $LogFile indicates a clean shutdown.");
 	return TRUE;
-err_out:
-	ntfs_unmap_page(page);
-	return FALSE;
 }
 
 /**
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 4ee4378de06..a51f3dd0e9e 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -2,7 +2,7 @@
  * logfile.h - Defines for NTFS kernel journal ($LogFile) handling.  Part of
  *	       the Linux-NTFS project.
  *
- * Copyright (c) 2000-2004 Anton Altaparmakov
+ * Copyright (c) 2000-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -113,7 +113,7 @@ typedef struct {
  */
 enum {
 	RESTART_VOLUME_IS_CLEAN	= const_cpu_to_le16(0x0002),
-	RESTART_SPACE_FILLER	= 0xffff, /* gcc: Force enum bit width to 16. */
+	RESTART_SPACE_FILLER	= const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
 } __attribute__ ((__packed__));
 
 typedef le16 RESTART_AREA_FLAGS;
@@ -296,9 +296,11 @@ typedef struct {
 /* sizeof() = 160 (0xa0) bytes */
 } __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
 
-extern BOOL ntfs_check_logfile(struct inode *log_vi);
+extern BOOL ntfs_check_logfile(struct inode *log_vi,
+		RESTART_PAGE_HEADER **rp);
 
-extern BOOL ntfs_is_logfile_clean(struct inode *log_vi);
+extern BOOL ntfs_is_logfile_clean(struct inode *log_vi,
+		const RESTART_PAGE_HEADER *rp);
 
 extern BOOL ntfs_empty_logfile(struct inode *log_vi);
 
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index fac5944df6d..e38e402e410 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
 /*
  * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -27,27 +27,62 @@
 #include <linux/highmem.h>
 
 /**
- * ntfs_malloc_nofs - allocate memory in multiples of pages
- * @size	number of bytes to allocate
+ * __ntfs_malloc - allocate memory in multiples of pages
+ * @size:	number of bytes to allocate
+ * @gfp_mask:	extra flags for the allocator
+ *
+ * Internal function.  You probably want ntfs_malloc_nofs()...
  *
  * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
  * returns a pointer to the allocated memory.
  *
  * If there was insufficient memory to complete the request, return NULL.
+ * Depending on @gfp_mask the allocation may be guaranteed to succeed.
  */
-static inline void *ntfs_malloc_nofs(unsigned long size)
+static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 {
 	if (likely(size <= PAGE_SIZE)) {
 		BUG_ON(!size);
 		/* kmalloc() has per-CPU caches so is faster for now. */
-		return kmalloc(PAGE_SIZE, GFP_NOFS);
-		/* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */
+		return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
+		/* return (void *)__get_free_page(gfp_mask); */
 	}
 	if (likely(size >> PAGE_SHIFT < num_physpages))
-		return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
+		return __vmalloc(size, gfp_mask, PAGE_KERNEL);
 	return NULL;
 }
 
+/**
+ * ntfs_malloc_nofs - allocate memory in multiples of pages
+ * @size:	number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs(unsigned long size)
+{
+	return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
+}
+
+/**
+ * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
+ * @size:	number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * This function guarantees that the allocation will succeed.  It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
+{
+	return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
+}
+
 static inline void ntfs_free(void *addr)
 {
 	if (likely(((unsigned long)addr < VMALLOC_START) ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index ac9ff39aa83..0c65cbb8c5c 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -49,7 +49,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 	ntfs_volume *vol = ni->vol;
 	struct inode *mft_vi = vol->mft_ino;
 	struct page *page;
-	unsigned long index, ofs, end_index;
+	unsigned long index, end_index;
+	unsigned ofs;
 
 	BUG_ON(ni->page);
 	/*
@@ -58,7 +59,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 	 * overflowing the unsigned long, but I don't think we would ever get
 	 * here if the volume was that big...
 	 */
-	index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
+	index = (u64)ni->mft_no << vol->mft_record_size_bits >>
+			PAGE_CACHE_SHIFT;
 	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
 
 	i_size = i_size_read(mft_vi);
@@ -511,7 +513,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 		} while (bh);
 		tail->b_this_page = head;
 		attach_page_buffers(page, head);
-		BUG_ON(!page_has_buffers(page));
 	}
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
@@ -533,6 +534,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 			LCN lcn;
 			unsigned int vcn_ofs;
 
+			bh->b_bdev = vol->sb->s_bdev;
 			/* Obtain the vcn and offset of the current block. */
 			vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
 					(block_start - m_start);
@@ -691,7 +693,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 	 */
 	if (!NInoTestClearDirty(ni))
 		goto done;
-	BUG_ON(!page_has_buffers(page));
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
 	rl = NULL;
@@ -725,6 +726,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 			LCN lcn;
 			unsigned int vcn_ofs;
 
+			bh->b_bdev = vol->sb->s_bdev;
 			/* Obtain the vcn and offset of the current block. */
 			vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
 					(block_start - m_start);
@@ -1307,7 +1309,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	ll = mftbmp_ni->allocated_size;
 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 	rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
-			(ll - 1) >> vol->cluster_size_bits, TRUE);
+			(ll - 1) >> vol->cluster_size_bits, NULL);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1353,7 +1355,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 		up_write(&vol->lcnbmp_lock);
 		ntfs_unmap_page(page);
 		/* Allocate a cluster from the DATA_ZONE. */
-		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
+		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
+				TRUE);
 		if (IS_ERR(rl2)) {
 			up_write(&mftbmp_ni->runlist.lock);
 			ntfs_error(vol->sb, "Failed to allocate a cluster for "
@@ -1737,7 +1740,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	ll = mft_ni->allocated_size;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	rl = ntfs_attr_find_vcn_nolock(mft_ni,
-			(ll - 1) >> vol->cluster_size_bits, TRUE);
+			(ll - 1) >> vol->cluster_size_bits, NULL);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mft_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1778,7 +1781,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 			nr > min_nr ? "default" : "minimal", (long long)nr);
 	old_last_vcn = rl[1].vcn;
 	do {
-		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
+		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
+				TRUE);
 		if (likely(!IS_ERR(rl2)))
 			break;
 		if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
@@ -1950,20 +1954,21 @@ restore_undo_alloc:
 		NVolSetErrors(vol);
 		return ret;
 	}
-	a = ctx->attr;
-	a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
+	ctx->attr->data.non_resident.highest_vcn =
+			cpu_to_sle64(old_last_vcn - 1);
 undo_alloc:
-	if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) {
+	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
 		ntfs_error(vol->sb, "Failed to free clusters from mft data "
 				"attribute.%s", es);
 		NVolSetErrors(vol);
 	}
+	a = ctx->attr;
 	if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
 		ntfs_error(vol->sb, "Failed to truncate mft data attribute "
 				"runlist.%s", es);
 		NVolSetErrors(vol);
 	}
-	if (mp_rebuilt) {
+	if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
 		if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
 				a->data.non_resident.mapping_pairs_offset),
 				old_alen - le16_to_cpu(
@@ -1980,6 +1985,10 @@ undo_alloc:
 		}
 		flush_dcache_mft_record_page(ctx->ntfs_ino);
 		mark_mft_record_dirty(ctx->ntfs_ino);
+	} else if (IS_ERR(ctx->mrec)) {
+		ntfs_error(vol->sb, "Failed to restore attribute search "
+				"context.%s", es);
+		NVolSetErrors(vol);
 	}
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 758855b0414..061b5ff6b73 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2,7 +2,7 @@
  * runlist.c - NTFS runlist handling code.  Part of the Linux-NTFS project.
  *
  * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2002-2005 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
 		int size)
 {
 	if (likely((dst != src) && (size > 0)))
-		memmove(base + dst, base + src, size * sizeof (*base));
+		memmove(base + dst, base + src, size * sizeof(*base));
 }
 
 /**
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
 }
 
 /**
+ * ntfs_rl_realloc_nofail - Reallocate memory for runlists
+ * @rl:		original runlist
+ * @old_size:	number of runlist elements in the original runlist @rl
+ * @new_size:	number of runlist elements we need space for
+ *
+ * As the runlists grow, more memory will be required.  To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns an entire page of memory.
+ *
+ * This function guarantees that the allocation will succeed.  It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * N.B.  If the new allocation doesn't require a different number of pages in
+ *       memory, the function will return the original pointer.
+ *
+ * On success, return a pointer to the newly allocated, or recycled, memory.
+ * On error, return -errno. The following error codes are defined:
+ *	-ENOMEM	- Not enough memory to allocate runlist array.
+ *	-EINVAL	- Invalid parameters were passed in.
+ */
+static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
+		int old_size, int new_size)
+{
+	runlist_element *new_rl;
+
+	old_size = PAGE_ALIGN(old_size * sizeof(*rl));
+	new_size = PAGE_ALIGN(new_size * sizeof(*rl));
+	if (old_size == new_size)
+		return rl;
+
+	new_rl = ntfs_malloc_nofs_nofail(new_size);
+	BUG_ON(!new_rl);
+
+	if (likely(rl != NULL)) {
+		if (unlikely(old_size > new_size))
+			old_size = new_size;
+		memcpy(new_rl, rl, old_size);
+		ntfs_free(rl);
+	}
+	return new_rl;
+}
+
+/**
  * ntfs_are_rl_mergeable - test if two runlists can be joined together
  * @dst:	original runlist
  * @src:	new runlist to test for mergeability with @dst
@@ -113,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	if ((dst->lcn < 0) || (src->lcn < 0)) {   /* Are we merging holes? */
-		if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
-			return TRUE;
-		return FALSE;
-	}
-	if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
-		return FALSE;
-	if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
+	/* We can merge unmapped regions even if they are misaligned. */
+	if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
+		return TRUE;
+	/* If the runs are misaligned, we cannot merge them. */
+	if ((dst->vcn + dst->length) != src->vcn)
 		return FALSE;
-
-	return TRUE;
+	/* If both runs are non-sparse and contiguous, we can merge them. */
+	if ((dst->lcn >= 0) && (src->lcn >= 0) &&
+			((dst->lcn + dst->length) == src->lcn))
+		return TRUE;
+	/* If we are merging two holes, we can merge them. */
+	if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
+		return TRUE;
+	/* Cannot merge. */
+	return FALSE;
 }
 
 /**
@@ -169,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
 static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL right;
-	int magic;
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
 	/* First, check if the right hand end needs merging. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 
 	/* Space required: @dst size + @src size, less one if we merged. */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
@@ -191,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 
-	magic = loc + ssize;
+	/* First run after the @src runs that have been inserted. */
+	marker = loc + ssize + 1;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+	ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
 	ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
 
 	/* Adjust the size of the preceding hole. */
 	dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
 
 	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic + 1].lcn == LCN_ENOENT)
-		dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+	if (dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 
 	return dst;
 }
@@ -234,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL disc = FALSE;	/* Discontinuity */
-	BOOL hole = FALSE;	/* Following a hole */
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL disc = FALSE;	/* Discontinuity between @dst and @src. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* disc => Discontinuity between the end of @dst and the start of @src.
-	 *	   This means we might need to insert a hole.
-	 * hole => @dst ends with a hole or an unmapped region which we can
-	 *	   extend to match the discontinuity. */
+	/*
+	 * disc => Discontinuity between the end of @dst and the start of @src.
+	 *	   This means we might need to insert a "not mapped" run.
+	 */
 	if (loc == 0)
 		disc = (src[0].vcn > 0);
 	else {
@@ -258,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 			merged_length += src->length;
 
 		disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
-		if (disc)
-			hole = (dst[loc - 1].lcn == LCN_HOLE);
 	}
-
-	/* Space required: @dst size + @src size, less one if we merged, plus
-	 * one if there was a discontinuity, less one for a trailing hole. */
-	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+	/*
+	 * Space required: @dst size + @src size, less one if we merged, plus
+	 * one if there was a discontinuity.
+	 */
+	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
 	if (IS_ERR(dst))
 		return dst;
 	/*
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlist.
 	 */
-
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	magic = loc + ssize - left + disc - hole;
+	/*
+	 * First run after the @src runs that have been inserted.
+	 * Nominally,  @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.  And if @disc, then @dst and @src do
+	 * not meet and we need an extra run to fill the gap.
+	 */
+	marker = loc + ssize - left + disc;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc, dsize - loc);
-	ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+	ntfs_rl_mm(dst, marker, loc, dsize - loc);
+	ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
 
-	/* Adjust the VCN of the last run ... */
-	if (dst[magic].lcn <= LCN_HOLE)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* Adjust the VCN of the first run after the insertion... */
+	dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	/* ... and the length. */
-	if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
-		dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+	if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
+		dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
 
-	/* Writing beyond the end of the file and there's a discontinuity. */
+	/* Writing beyond the end of the file and there is a discontinuity. */
 	if (disc) {
-		if (hole)
-			dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
-		else {
-			if (loc > 0) {
-				dst[loc].vcn = dst[loc - 1].vcn +
-						dst[loc - 1].length;
-				dst[loc].length = dst[loc + 1].vcn -
-						dst[loc].vcn;
-			} else {
-				dst[loc].vcn = 0;
-				dst[loc].length = dst[loc + 1].vcn;
-			}
-			dst[loc].lcn = LCN_RL_NOT_MAPPED;
+		if (loc > 0) {
+			dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+			dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+		} else {
+			dst[loc].vcn = 0;
+			dst[loc].length = dst[loc + 1].vcn;
 		}
-
-		magic += hole;
-
-		if (dst[magic].lcn == LCN_ENOENT)
-			dst[magic].vcn = dst[magic - 1].vcn +
-					dst[magic - 1].length;
+		dst[loc].lcn = LCN_RL_NOT_MAPPED;
 	}
 	return dst;
 }
@@ -340,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL right;
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int tail;		/* Start of tail of @dst. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* First, merge the left and right ends, if necessary. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	/* First, see if the left and right ends need merging. */
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 	if (loc > 0)
 		left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
-	/* Allocate some space. We'll need less if the left, right, or both
-	 * ends were merged. */
+	/*
+	 * Allocate some space.  We will need less if the left, right, or both
+	 * ends get merged.
+	 */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
 	if (IS_ERR(dst))
 		return dst;
@@ -361,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlists.
 	 */
+
+	/* First, merge the left and right ends, if necessary. */
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	/* FIXME: What does this mean? (AIA) */
-	magic = loc + ssize - left;
+	/*
+	 * Offset of the tail of @dst.  This needs to be moved out of the way
+	 * to make space for the runs to be copied from @src, i.e. the first
+	 * run of the tail of @dst.
+	 * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+	 * replaced run.  However, if @right, then one of @dst's runs is
+	 * already merged into @src.
+	 */
+	tail = loc + right + 1;
+	/*
+	 * First run after the @src runs that have been inserted, i.e. where
+	 * the tail of @dst needs to be moved to.
+	 * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.
+	 */
+	marker = loc + ssize - left;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+	ntfs_rl_mm(dst, marker, tail, dsize - tail);
 	ntfs_rl_mc(dst, loc, src, left, ssize - left);
 
-	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic].lcn == LCN_ENOENT)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* We may have changed the length of the file, so fix the end marker. */
+	if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	return dst;
 }
 
@@ -497,6 +557,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
 			/* Scan to the end of the source runlist. */
 			for (dend = 0; likely(drl[dend].length); dend++)
 				;
+			dend++;
 			drl = ntfs_rl_realloc(drl, dend, dend + 1);
 			if (IS_ERR(drl))
 				return drl;
@@ -566,8 +627,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
 		 ((drl[dins].vcn + drl[dins].length) <=      /* End of hole   */
 		  (srl[send - 1].vcn + srl[send - 1].length)));
 
-	/* Or we'll lose an end marker */
-	if (start && finish && (drl[dins].length == 0))
+	/* Or we will lose an end marker. */
+	if (finish && !drl[dins].length)
 		ss++;
 	if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
 		finish = FALSE;
@@ -621,11 +682,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
 			if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
 				/* Add an unmapped runlist element. */
 				if (!slots) {
-					/* FIXME/TODO: We need to have the
-					 * extra memory already! (AIA) */
-					drl = ntfs_rl_realloc(drl, ds, ds + 2);
-					if (!drl)
-						goto critical_error;
+					drl = ntfs_rl_realloc_nofail(drl, ds,
+							ds + 2);
 					slots = 2;
 				}
 				ds++;
@@ -640,13 +698,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
 			drl[ds].length = marker_vcn - drl[ds].vcn;
 			/* Finally add the ENOENT terminator. */
 			ds++;
-			if (!slots) {
-				/* FIXME/TODO: We need to have the extra
-				 * memory already! (AIA) */
-				drl = ntfs_rl_realloc(drl, ds, ds + 1);
-				if (!drl)
-					goto critical_error;
-			}
+			if (!slots)
+				drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
 			drl[ds].vcn = marker_vcn;
 			drl[ds].lcn = LCN_ENOENT;
 			drl[ds].length = (s64)0;
@@ -659,11 +712,6 @@ finished:
 	ntfs_debug("Merged runlist:");
 	ntfs_debug_dump_runlist(drl);
 	return drl;
-
-critical_error:
-	/* Critical error! We cannot afford to fail here. */
-	ntfs_error(NULL, "Critical error! Not enough memory.");
-	panic("NTFS: Cannot continue.");
 }
 
 /**
@@ -727,6 +775,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
 		ntfs_error(vol->sb, "Corrupt attribute.");
 		return ERR_PTR(-EIO);
 	}
+	/* If the mapping pairs array is valid but empty, nothing to do. */
+	if (!vcn && !*buf)
+		return old_rl;
 	/* Current position in runlist array. */
 	rlpos = 0;
 	/* Allocate first page and set current runlist size to one page. */
@@ -1419,6 +1470,7 @@ err_out:
 
 /**
  * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
+ * @vol:	ntfs volume (needed for error output)
  * @runlist:	runlist to truncate
  * @new_length:	the new length of the runlist in VCNs
  *
@@ -1426,12 +1478,16 @@ err_out:
  * holding the runlist elements to a length of @new_length VCNs.
  *
  * If @new_length lies within the runlist, the runlist elements with VCNs of
- * @new_length and above are discarded.
+ * @new_length and above are discarded.  As a special case if @new_length is
+ * zero, the runlist is discarded and set to NULL.
  *
  * If @new_length lies beyond the runlist, a sparse runlist element is added to
  * the end of the runlist @runlist or if the last runlist element is a sparse
  * one already, this is extended.
  *
+ * Note, no checking is done for unmapped runlist elements.  It is assumed that
+ * the caller has mapped any elements that need to be mapped already.
+ *
  * Return 0 on success and -errno on error.
  *
  * Locking: The caller must hold @runlist->lock for writing.
@@ -1446,6 +1502,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
 	BUG_ON(!runlist);
 	BUG_ON(new_length < 0);
 	rl = runlist->rl;
+	if (!new_length) {
+		ntfs_debug("Freeing runlist.");
+		runlist->rl = NULL;
+		if (rl)
+			ntfs_free(rl);
+		return 0;
+	}
 	if (unlikely(!rl)) {
 		/*
 		 * Create a runlist consisting of a sparse runlist element of
@@ -1553,4 +1616,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
 	return 0;
 }
 
+/**
+ * ntfs_rl_punch_nolock - punch a hole into a runlist
+ * @vol:	ntfs volume (needed for error output)
+ * @runlist:	runlist to punch a hole into
+ * @start:	starting VCN of the hole to be created
+ * @length:	size of the hole to be created in units of clusters
+ *
+ * Punch a hole into the runlist @runlist starting at VCN @start and of size
+ * @length clusters.
+ *
+ * Return 0 on success and -errno on error, in which case @runlist has not been
+ * modified.
+ *
+ * If @start and/or @start + @length are outside the runlist return error code
+ * -ENOENT.
+ *
+ * If the runlist contains unmapped or error elements between @start and @start
+ * + @length return error code -EINVAL.
+ *
+ * Locking: The caller must hold @runlist->lock for writing.
+ */
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+		const VCN start, const s64 length)
+{
+	const VCN end = start + length;
+	s64 delta;
+	runlist_element *rl, *rl_end, *rl_real_end, *trl;
+	int old_size;
+	BOOL lcn_fixup = FALSE;
+
+	ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
+			(long long)start, (long long)length);
+	BUG_ON(!runlist);
+	BUG_ON(start < 0);
+	BUG_ON(length < 0);
+	BUG_ON(end < 0);
+	rl = runlist->rl;
+	if (unlikely(!rl)) {
+		if (likely(!start && !length))
+			return 0;
+		return -ENOENT;
+	}
+	/* Find @start in the runlist. */
+	while (likely(rl->length && start >= rl[1].vcn))
+		rl++;
+	rl_end = rl;
+	/* Find @end in the runlist. */
+	while (likely(rl_end->length && end >= rl_end[1].vcn)) {
+		/* Verify there are no unmapped or error elements. */
+		if (unlikely(rl_end->lcn < LCN_HOLE))
+			return -EINVAL;
+		rl_end++;
+	}
+	/* Check the last element. */
+	if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
+		return -EINVAL;
+	/* This covers @start being out of bounds, too. */
+	if (!rl_end->length && end > rl_end->vcn)
+		return -ENOENT;
+	if (!length)
+		return 0;
+	if (!rl->length)
+		return -ENOENT;
+	rl_real_end = rl_end;
+	/* Determine the runlist size. */
+	while (likely(rl_real_end->length))
+		rl_real_end++;
+	old_size = rl_real_end - runlist->rl + 1;
+	/* If @start is in a hole simply extend the hole. */
+	if (rl->lcn == LCN_HOLE) {
+		/*
+		 * If both @start and @end are in the same sparse run, we are
+		 * done.
+		 */
+		if (end <= rl[1].vcn) {
+			ntfs_debug("Done (requested hole is already sparse).");
+			return 0;
+		}
+extend_hole:
+		/* Extend the hole. */
+		rl->length = end - rl->vcn;
+		/* If @end is in a hole, merge it with the current one. */
+		if (rl_end->lcn == LCN_HOLE) {
+			rl_end++;
+			rl->length = rl_end->vcn - rl->vcn;
+		}
+		/* We have done the hole.  Now deal with the remaining tail. */
+		rl++;
+		/* Cut out all runlist elements up to @end. */
+		if (rl < rl_end)
+			memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+					sizeof(*rl));
+		/* Adjust the beginning of the tail if necessary. */
+		if (end > rl->vcn) {
+			s64 delta = end - rl->vcn;
+			rl->vcn = end;
+			rl->length -= delta;
+			/* Only adjust the lcn if it is real. */
+			if (rl->lcn >= 0)
+				rl->lcn += delta;
+		}
+shrink_allocation:
+		/* Reallocate memory if the allocation changed. */
+		if (rl < rl_end) {
+			rl = ntfs_rl_realloc(runlist->rl, old_size,
+					old_size - (rl_end - rl));
+			if (IS_ERR(rl))
+				ntfs_warning(vol->sb, "Failed to shrink "
+						"runlist buffer.  This just "
+						"wastes a bit of memory "
+						"temporarily so we ignore it "
+						"and return success.");
+			else
+				runlist->rl = rl;
+		}
+		ntfs_debug("Done (extend hole).");
+		return 0;
+	}
+	/*
+	 * If @start is at the beginning of a run things are easier as there is
+	 * no need to split the first run.
+	 */
+	if (start == rl->vcn) {
+		/*
+		 * @start is at the beginning of a run.
+		 *
+		 * If the previous run is sparse, extend its hole.
+		 *
+		 * If @end is not in the same run, switch the run to be sparse
+		 * and extend the newly created hole.
+		 *
+		 * Thus both of these cases reduce the problem to the above
+		 * case of "@start is in a hole".
+		 */
+		if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
+			rl--;
+			goto extend_hole;
+		}
+		if (end >= rl[1].vcn) {
+			rl->lcn = LCN_HOLE;
+			goto extend_hole;
+		}
+		/*
+		 * The final case is when @end is in the same run as @start.
+		 * For this need to split the run into two.  One run for the
+		 * sparse region between the beginning of the old run, i.e.
+		 * @start, and @end and one for the remaining non-sparse
+		 * region, i.e. between @end and the end of the old run.
+		 */
+		trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+		if (IS_ERR(trl))
+			goto enomem_out;
+		old_size++;
+		if (runlist->rl != trl) {
+			rl = trl + (rl - runlist->rl);
+			rl_end = trl + (rl_end - runlist->rl);
+			rl_real_end = trl + (rl_real_end - runlist->rl);
+			runlist->rl = trl;
+		}
+split_end:
+		/* Shift all the runs up by one. */
+		memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+		/* Finally, setup the two split runs. */
+		rl->lcn = LCN_HOLE;
+		rl->length = length;
+		rl++;
+		rl->vcn += length;
+		/* Only adjust the lcn if it is real. */
+		if (rl->lcn >= 0 || lcn_fixup)
+			rl->lcn += length;
+		rl->length -= length;
+		ntfs_debug("Done (split one).");
+		return 0;
+	}
+	/*
+	 * @start is neither in a hole nor at the beginning of a run.
+	 *
+	 * If @end is in a hole, things are easier as simply truncating the run
+	 * @start is in to end at @start - 1, deleting all runs after that up
+	 * to @end, and finally extending the beginning of the run @end is in
+	 * to be @start is all that is needed.
+	 */
+	if (rl_end->lcn == LCN_HOLE) {
+		/* Truncate the run containing @start. */
+		rl->length = start - rl->vcn;
+		rl++;
+		/* Cut out all runlist elements up to @end. */
+		if (rl < rl_end)
+			memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+					sizeof(*rl));
+		/* Extend the beginning of the run @end is in to be @start. */
+		rl->vcn = start;
+		rl->length = rl[1].vcn - start;
+		goto shrink_allocation;
+	}
+	/* 
+	 * If @end is not in a hole there are still two cases to distinguish.
+	 * Either @end is or is not in the same run as @start.
+	 *
+	 * The second case is easier as it can be reduced to an already solved
+	 * problem by truncating the run @start is in to end at @start - 1.
+	 * Then, if @end is in the next run need to split the run into a sparse
+	 * run followed by a non-sparse run (already covered above) and if @end
+	 * is not in the next run switching it to be sparse, again reduces the
+	 * problem to the already covered case of "@start is in a hole".
+	 */
+	if (end >= rl[1].vcn) {
+		/*
+		 * If @end is not in the next run, reduce the problem to the
+		 * case of "@start is in a hole".
+		 */
+		if (rl[1].length && end >= rl[2].vcn) {
+			/* Truncate the run containing @start. */
+			rl->length = start - rl->vcn;
+			rl++;
+			rl->vcn = start;
+			rl->lcn = LCN_HOLE;
+			goto extend_hole;
+		}
+		trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+		if (IS_ERR(trl))
+			goto enomem_out;
+		old_size++;
+		if (runlist->rl != trl) {
+			rl = trl + (rl - runlist->rl);
+			rl_end = trl + (rl_end - runlist->rl);
+			rl_real_end = trl + (rl_real_end - runlist->rl);
+			runlist->rl = trl;
+		}
+		/* Truncate the run containing @start. */
+		rl->length = start - rl->vcn;
+		rl++;
+		/*
+		 * @end is in the next run, reduce the problem to the case
+		 * where "@start is at the beginning of a run and @end is in
+		 * the same run as @start".
+		 */
+		delta = rl->vcn - start;
+		rl->vcn = start;
+		if (rl->lcn >= 0) {
+			rl->lcn -= delta;
+			/* Need this in case the lcn just became negative. */
+			lcn_fixup = TRUE;
+		}
+		rl->length += delta;
+		goto split_end;
+	}
+	/*
+	 * The first case from above, i.e. @end is in the same run as @start.
+	 * We need to split the run into three.  One run for the non-sparse
+	 * region between the beginning of the old run and @start, one for the
+	 * sparse region between @start and @end, and one for the remaining
+	 * non-sparse region, i.e. between @end and the end of the old run.
+	 */
+	trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
+	if (IS_ERR(trl))
+		goto enomem_out;
+	old_size += 2;
+	if (runlist->rl != trl) {
+		rl = trl + (rl - runlist->rl);
+		rl_end = trl + (rl_end - runlist->rl);
+		rl_real_end = trl + (rl_real_end - runlist->rl);
+		runlist->rl = trl;
+	}
+	/* Shift all the runs up by two. */
+	memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+	/* Finally, setup the three split runs. */
+	rl->length = start - rl->vcn;
+	rl++;
+	rl->vcn = start;
+	rl->lcn = LCN_HOLE;
+	rl->length = length;
+	rl++;
+	delta = end - rl->vcn;
+	rl->vcn = end;
+	rl->lcn += delta;
+	rl->length -= delta;
+	ntfs_debug("Done (split both).");
+	return 0;
+enomem_out:
+	ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
+	return -ENOMEM;
+}
+
 #endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index aa0ee6540e7..47728fbb610 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
 		runlist *const runlist, const s64 new_length);
 
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+		const VCN start, const s64 length);
+
 #endif /* NTFS_RW */
 
 #endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 41aa8eb6755..6c16db9e1a8 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -126,6 +126,14 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
 		if (*v)							\
 			goto needs_val;					\
 	}
+#define NTFS_GETOPT_OCTAL(option, variable)				\
+	if (!strcmp(p, option)) {					\
+		if (!v || !*v)						\
+			goto needs_arg;					\
+		variable = simple_strtoul(ov = v, &v, 8);		\
+		if (*v)							\
+			goto needs_val;					\
+	}
 #define NTFS_GETOPT_BOOL(option, variable)				\
 	if (!strcmp(p, option)) {					\
 		BOOL val;						\
@@ -157,9 +165,9 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
 			*v++ = 0;
 		NTFS_GETOPT("uid", uid)
 		else NTFS_GETOPT("gid", gid)
-		else NTFS_GETOPT("umask", fmask = dmask)
-		else NTFS_GETOPT("fmask", fmask)
-		else NTFS_GETOPT("dmask", dmask)
+		else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
+		else NTFS_GETOPT_OCTAL("fmask", fmask)
+		else NTFS_GETOPT_OCTAL("dmask", dmask)
 		else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier)
 		else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
 		else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
@@ -1133,7 +1141,8 @@ mft_unmap_out:
  *
  * Return TRUE on success or FALSE on error.
  */
-static BOOL load_and_check_logfile(ntfs_volume *vol)
+static BOOL load_and_check_logfile(ntfs_volume *vol,
+		RESTART_PAGE_HEADER **rp)
 {
 	struct inode *tmp_ino;
 
@@ -1145,7 +1154,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
 		/* Caller will display error message. */
 		return FALSE;
 	}
-	if (!ntfs_check_logfile(tmp_ino)) {
+	if (!ntfs_check_logfile(tmp_ino, rp)) {
 		iput(tmp_ino);
 		/* ntfs_check_logfile() will have displayed error output. */
 		return FALSE;
@@ -1438,7 +1447,7 @@ not_enabled:
 	if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
 		ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
 				"attribute (size is 0x%llx but should be at "
-				"least 0x%x bytes).", i_size_read(tmp_ino),
+				"least 0x%zx bytes).", i_size_read(tmp_ino),
 				sizeof(USN_HEADER));
 		return FALSE;
 	}
@@ -1689,6 +1698,7 @@ static BOOL load_system_files(ntfs_volume *vol)
 	VOLUME_INFORMATION *vi;
 	ntfs_attr_search_ctx *ctx;
 #ifdef NTFS_RW
+	RESTART_PAGE_HEADER *rp;
 	int err;
 #endif /* NTFS_RW */
 
@@ -1841,8 +1851,9 @@ get_ctx_vol_failed:
 	 * Get the inode for the logfile, check it and determine if the volume
 	 * was shutdown cleanly.
 	 */
-	if (!load_and_check_logfile(vol) ||
-			!ntfs_is_logfile_clean(vol->logfile_ino)) {
+	rp = NULL;
+	if (!load_and_check_logfile(vol, &rp) ||
+			!ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
 		static const char *es1a = "Failed to load $LogFile";
 		static const char *es1b = "$LogFile is not clean";
 		static const char *es2 = ".  Mount in Windows.";
@@ -1857,6 +1868,10 @@ get_ctx_vol_failed:
 						"continue nor on_errors="
 						"remount-ro was specified%s",
 						es1, es2);
+				if (vol->logfile_ino) {
+					BUG_ON(!rp);
+					ntfs_free(rp);
+				}
 				goto iput_logfile_err_out;
 			}
 			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1867,6 +1882,7 @@ get_ctx_vol_failed:
 		/* This will prevent a read-write remount. */
 		NVolSetErrors(vol);
 	}
+	ntfs_free(rp);
 #endif /* NTFS_RW */
 	/* Get the root directory inode so we can do path lookups. */
 	vol->root_ino = ntfs_iget(sb, FILE_root);
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 19c42e231b4..0ea887fc859 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -1,7 +1,7 @@
 /*
  * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -372,7 +372,8 @@ retry:			wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
 	return -EINVAL;
 conversion_err:
 	ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
-			"converted to character set %s.", nls->charset);
+			"converted to character set %s.  You might want to "
+			"try to use the mount option nls=utf8.", nls->charset);
 	if (ns != *outs)
 		kfree(ns);
 	if (wc != -ENAMETOOLONG)
diff --git a/fs/open.c b/fs/open.c
index 32bf05e2996..8d06ec911fd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
 #include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
 
 #include <asm/unistd.h>
 
@@ -737,52 +738,16 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 	return error;
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc). 00 is
- * used by symlinks.
- */
-struct file *filp_open(const char * filename, int flags, int mode)
-{
-	int namei_flags, error;
-	struct nameidata nd;
-
-	namei_flags = flags;
-	if ((namei_flags+1) & O_ACCMODE)
-		namei_flags++;
-	if (namei_flags & O_TRUNC)
-		namei_flags |= 2;
-
-	error = open_namei(filename, namei_flags, mode, &nd);
-	if (!error)
-		return dentry_open(nd.dentry, nd.mnt, flags);
-
-	return ERR_PTR(error);
-}
-
-EXPORT_SYMBOL(filp_open);
-
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+					int flags, struct file *f,
+					int (*open)(struct inode *, struct file *))
 {
-	struct file * f;
 	struct inode *inode;
 	int error;
 
-	error = -ENFILE;
-	f = get_empty_filp();
-	if (!f)
-		goto cleanup_dentry;
 	f->f_flags = flags;
-	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
 		error = get_write_access(inode);
@@ -797,11 +762,14 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 	f->f_op = fops_get(inode->i_fop);
 	file_move(f, &inode->i_sb->s_files);
 
-	if (f->f_op && f->f_op->open) {
-		error = f->f_op->open(inode,f);
+	if (!open && f->f_op)
+		open = f->f_op->open;
+	if (open) {
+		error = open(inode, f);
 		if (error)
 			goto cleanup_all;
 	}
+
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
@@ -827,12 +795,110 @@ cleanup_all:
 	f->f_vfsmnt = NULL;
 cleanup_file:
 	put_filp(f);
-cleanup_dentry:
 	dput(dentry);
 	mntput(mnt);
 	return ERR_PTR(error);
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
+struct file *filp_open(const char * filename, int flags, int mode)
+{
+	int namei_flags, error;
+	struct nameidata nd;
+
+	namei_flags = flags;
+	if ((namei_flags+1) & O_ACCMODE)
+		namei_flags++;
+
+	error = open_namei(filename, namei_flags, mode, &nd);
+	if (!error)
+		return nameidata_to_filp(&nd, flags);
+
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(filp_open);
+
+/**
+ * lookup_instantiate_filp - instantiates the open intent filp
+ * @nd: pointer to nameidata
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * Helper for filesystems that want to use lookup open intents and pass back
+ * a fully instantiated struct file to the caller.
+ * This function is meant to be called from within a filesystem's
+ * lookup method.
+ * Note that in case of error, nd->intent.open.file is destroyed, but the
+ * path information remains valid.
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *))
+{
+	if (IS_ERR(nd->intent.open.file))
+		goto out;
+	if (IS_ERR(dentry))
+		goto out_err;
+	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
+					     nd->intent.open.flags - 1,
+					     nd->intent.open.file,
+					     open);
+out:
+	return nd->intent.open.file;
+out_err:
+	release_open_intent(nd);
+	nd->intent.open.file = (struct file *)dentry;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
+
+/**
+ * nameidata_to_filp - convert a nameidata to an open filp.
+ * @nd: pointer to nameidata
+ * @flags: open flags
+ *
+ * Note that this function destroys the original nameidata
+ */
+struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+{
+	struct file *filp;
+
+	/* Pick up the filp from the open intent */
+	filp = nd->intent.open.file;
+	/* Has the filesystem initialised the file for us? */
+	if (filp->f_dentry == NULL)
+		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
+	else
+		path_release(nd);
+	return filp;
+}
+
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	int error;
+	struct file *f;
+
+	error = -ENFILE;
+	f = get_empty_filp();
+	if (f == NULL)
+		return ERR_PTR(error);
+
+	return __dentry_open(dentry, mnt, flags, f, NULL);
+}
 EXPORT_SYMBOL(dentry_open);
 
 /*
@@ -842,14 +908,16 @@ int get_unused_fd(void)
 {
 	struct files_struct * files = current->files;
 	int fd, error;
+	struct fdtable *fdt;
 
   	error = -EMFILE;
 	spin_lock(&files->file_lock);
 
 repeat:
- 	fd = find_next_zero_bit(files->open_fds->fds_bits, 
-				files->max_fdset, 
-				files->next_fd);
+	fdt = files_fdtable(files);
+ 	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+				fdt->max_fdset,
+				fdt->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +940,14 @@ repeat:
 		goto repeat;
 	}
 
-	FD_SET(fd, files->open_fds);
-	FD_CLR(fd, files->close_on_exec);
-	files->next_fd = fd + 1;
+	FD_SET(fd, fdt->open_fds);
+	FD_CLR(fd, fdt->close_on_exec);
+	fdt->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
-	if (files->fd[fd] != NULL) {
+	if (fdt->fd[fd] != NULL) {
 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-		files->fd[fd] = NULL;
+		fdt->fd[fd] = NULL;
 	}
 #endif
 	error = fd;
@@ -893,9 +961,10 @@ EXPORT_SYMBOL(get_unused_fd);
 
 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
-	__FD_CLR(fd, files->open_fds);
-	if (fd < files->next_fd)
-		files->next_fd = fd;
+	struct fdtable *fdt = files_fdtable(files);
+	__FD_CLR(fd, fdt->open_fds);
+	if (fd < fdt->next_fd)
+		fdt->next_fd = fd;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
@@ -924,25 +993,21 @@ EXPORT_SYMBOL(put_unused_fd);
 void fastcall fd_install(unsigned int fd, struct file * file)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
-	if (unlikely(files->fd[fd] != NULL))
-		BUG();
-	files->fd[fd] = file;
+	fdt = files_fdtable(files);
+	BUG_ON(fdt->fd[fd] != NULL);
+	rcu_assign_pointer(fdt->fd[fd], file);
 	spin_unlock(&files->file_lock);
 }
 
 EXPORT_SYMBOL(fd_install);
 
-asmlinkage long sys_open(const char __user * filename, int flags, int mode)
+long do_sys_open(const char __user *filename, int flags, int mode)
 {
-	char * tmp;
-	int fd;
-
-	if (force_o_largefile())
-		flags |= O_LARGEFILE;
+	char *tmp = getname(filename);
+	int fd = PTR_ERR(tmp);
 
-	tmp = getname(filename);
-	fd = PTR_ERR(tmp);
 	if (!IS_ERR(tmp)) {
 		fd = get_unused_fd();
 		if (fd >= 0) {
@@ -959,6 +1024,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 	}
 	return fd;
 }
+
+asmlinkage long sys_open(const char __user *filename, int flags, int mode)
+{
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	return do_sys_open(filename, flags, mode);
+}
 EXPORT_SYMBOL_GPL(sys_open);
 
 #ifndef __alpha__
@@ -1007,15 +1080,17 @@ asmlinkage long sys_close(unsigned int fd)
 {
 	struct file * filp;
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
-	if (fd >= files->max_fds)
+	fdt = files_fdtable(files);
+	if (fd >= fdt->max_fds)
 		goto out_unlock;
-	filp = files->fd[fd];
+	filp = fdt->fd[fd];
 	if (!filp)
 		goto out_unlock;
-	files->fd[fd] = NULL;
-	FD_CLR(fd, files->close_on_exec);
+	rcu_assign_pointer(fdt->fd[fd], NULL);
+	FD_CLR(fd, fdt->close_on_exec);
 	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
 	return filp_close(filp, files);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 77e178f1316..8dc1822a702 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -192,6 +192,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
 struct part_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct hd_struct *,char *);
+	ssize_t (*store)(struct hd_struct *,const char *, size_t);
 };
 
 static ssize_t 
@@ -201,14 +202,33 @@ part_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
 	struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
 	ssize_t ret = 0;
 	if (part_attr->show)
-		ret = part_attr->show(p,page);
+		ret = part_attr->show(p, page);
+	return ret;
+}
+static ssize_t
+part_attr_store(struct kobject * kobj, struct attribute * attr,
+		const char *page, size_t count)
+{
+	struct hd_struct * p = container_of(kobj,struct hd_struct,kobj);
+	struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
+	ssize_t ret = 0;
+
+	if (part_attr->store)
+		ret = part_attr->store(p, page, count);
 	return ret;
 }
 
 static struct sysfs_ops part_sysfs_ops = {
 	.show	=	part_attr_show,
+	.store	=	part_attr_store,
 };
 
+static ssize_t part_uevent_store(struct hd_struct * p,
+				 const char *page, size_t count)
+{
+	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	return count;
+}
 static ssize_t part_dev_read(struct hd_struct * p, char *page)
 {
 	struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj);
@@ -226,9 +246,13 @@ static ssize_t part_size_read(struct hd_struct * p, char *page)
 static ssize_t part_stat_read(struct hd_struct * p, char *page)
 {
 	return sprintf(page, "%8u %8llu %8u %8llu\n",
-		       p->reads, (unsigned long long)p->read_sectors,
-		       p->writes, (unsigned long long)p->write_sectors);
+		       p->ios[0], (unsigned long long)p->sectors[0],
+		       p->ios[1], (unsigned long long)p->sectors[1]);
 }
+static struct part_attribute part_attr_uevent = {
+	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.store	= part_uevent_store
+};
 static struct part_attribute part_attr_dev = {
 	.attr = {.name = "dev", .mode = S_IRUGO },
 	.show	= part_dev_read
@@ -247,6 +271,7 @@ static struct part_attribute part_attr_stat = {
 };
 
 static struct attribute * default_attrs[] = {
+	&part_attr_uevent.attr,
 	&part_attr_dev.attr,
 	&part_attr_start.attr,
 	&part_attr_size.attr,
@@ -278,7 +303,8 @@ void delete_partition(struct gendisk *disk, int part)
 	disk->part[part-1] = NULL;
 	p->start_sect = 0;
 	p->nr_sects = 0;
-	p->reads = p->writes = p->read_sectors = p->write_sectors = 0;
+	p->ios[0] = p->ios[1] = 0;
+	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
 	kobject_unregister(&p->kobj);
 }
@@ -430,7 +456,7 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
 	disk_stat_set_all(disk, 0);
-	disk->stamp = disk->stamp_idle = 0;
+	disk->stamp = 0;
 
 	devfs_remove_disk(disk);
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 25aa09f9d09..66aa0b938d6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode)
 {
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
+	/*
+	 * Pipes are system-local resources, so sleeping on them
+	 * is considered a noninteractive wait:
+	 */
+	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
 	up(PIPE_SEM(*inode));
 	schedule();
 	finish_wait(PIPE_WAIT(*inode), &wait);
@@ -415,6 +419,10 @@ pipe_poll(struct file *filp, poll_table *wait)
 
 	if (filp->f_mode & FMODE_WRITE) {
 		mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
+		/*
+		 * Most Unices do not set POLLERR for FIFOs but on Linux they
+		 * behave exactly like pipes for poll().
+		 */
 		if (!PIPE_READERS(*inode))
 			mask |= POLLERR;
 	}
@@ -422,9 +430,6 @@ pipe_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
-/* FIXME: most Unices do not set POLLERR for fifos */
-#define fifo_poll pipe_poll
-
 static int
 pipe_release(struct inode *inode, int decr, int decw)
 {
@@ -568,7 +573,7 @@ struct file_operations read_fifo_fops = {
 	.read		= pipe_read,
 	.readv		= pipe_readv,
 	.write		= bad_pipe_w,
-	.poll		= fifo_poll,
+	.poll		= pipe_poll,
 	.ioctl		= pipe_ioctl,
 	.open		= pipe_read_open,
 	.release	= pipe_read_release,
@@ -580,7 +585,7 @@ struct file_operations write_fifo_fops = {
 	.read		= bad_pipe_r,
 	.write		= pipe_write,
 	.writev		= pipe_writev,
-	.poll		= fifo_poll,
+	.poll		= pipe_poll,
 	.ioctl		= pipe_ioctl,
 	.open		= pipe_write_open,
 	.release	= pipe_write_release,
@@ -593,7 +598,7 @@ struct file_operations rdwr_fifo_fops = {
 	.readv		= pipe_readv,
 	.write		= pipe_write,
 	.writev		= pipe_writev,
-	.poll		= fifo_poll,
+	.poll		= pipe_poll,
 	.ioctl		= pipe_ioctl,
 	.open		= pipe_rdwr_open,
 	.release	= pipe_rdwr_release,
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 296480e96dd..6c8dcf7613f 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL(posix_acl_permission);
  * Allocate a new ACL with the specified number of entries.
  */
 struct posix_acl *
-posix_acl_alloc(int count, unsigned int __nocast flags)
+posix_acl_alloc(int count, gfp_t flags)
 {
 	const size_t size = sizeof(struct posix_acl) +
 	                    count * sizeof(struct posix_acl_entry);
@@ -51,7 +51,7 @@ posix_acl_alloc(int count, unsigned int __nocast flags)
  * Clone an ACL.
  */
 struct posix_acl *
-posix_acl_clone(const struct posix_acl *acl, unsigned int __nocast flags)
+posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
 {
 	struct posix_acl *clone = NULL;
 
@@ -185,7 +185,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p)
  * Create an ACL representing the file mode permission bits of an inode.
  */
 struct posix_acl *
-posix_acl_from_mode(mode_t mode, unsigned int __nocast flags)
+posix_acl_from_mode(mode_t mode, gfp_t flags)
 {
 	struct posix_acl *acl = posix_acl_alloc(3, flags);
 	if (!acl)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998a..3e1239e4b30 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -74,6 +74,7 @@
 #include <linux/file.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -159,6 +160,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 {
 	struct group_info *group_info;
 	int g;
+	struct fdtable *fdt = NULL;
 
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
@@ -179,10 +181,14 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
 	task_lock(p);
+	rcu_read_lock();
+	if (p->files)
+		fdt = files_fdtable(p->files);
 	buffer += sprintf(buffer,
 		"FDSize:\t%d\n"
 		"Groups:\t",
-		p->files ? p->files->max_fds : 0);
+		fdt ? fdt->max_fds : 0);
+	rcu_read_unlock();
 
 	group_info = p->group_info;
 	get_group_info(group_info);
@@ -432,7 +438,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		jiffies_to_clock_t(it_real_value),
 		start_time,
 		vsize,
-		mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */
+		mm ? get_mm_rss(mm) : 0,
 	        rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ace151fa487..a170450aadb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -11,6 +11,40 @@
  *  go into icache. We cache the reference to task_struct upon lookup too.
  *  Eventually it should become a filesystem in its own. We don't use the
  *  rest of procfs anymore.
+ *
+ *
+ *  Changelog:
+ *  17-Jan-2005
+ *  Allan Bezerra
+ *  Bruna Moreira <bruna.moreira@indt.org.br>
+ *  Edjard Mota <edjard.mota@indt.org.br>
+ *  Ilias Biris <ilias.biris@indt.org.br>
+ *  Mauricio Lin <mauricio.lin@indt.org.br>
+ *
+ *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
+ *
+ *  A new process specific entry (smaps) included in /proc. It shows the
+ *  size of rss for each memory area. The maps entry lacks information
+ *  about physical memory size (rss) for each mapped file, i.e.,
+ *  rss information for executables and library files.
+ *  This additional information is useful for any tools that need to know
+ *  about physical memory consumption for a process specific library.
+ *
+ *  Changelog:
+ *  21-Feb-2005
+ *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
+ *  Pud inclusion in the page table walking.
+ *
+ *  ChangeLog:
+ *  10-Mar-2005
+ *  10LE Instituto Nokia de Tecnologia - INdT:
+ *  A better way to walks through the page table as suggested by Hugh Dickins.
+ *
+ *  Simo Piiroinen <simo.piiroinen@nokia.com>:
+ *  Smaps information related to shared, private, clean and dirty pages.
+ *
+ *  Paul Mundt <paul.mundt@nokia.com>:
+ *  Overall revision about smaps.
  */
 
 #include <asm/uaccess.h>
@@ -28,6 +62,7 @@
 #include <linux/namespace.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
 #include <linux/mount.h>
 #include <linux/security.h>
@@ -65,8 +100,12 @@ enum pid_directory_inos {
 	PROC_TGID_STAT,
 	PROC_TGID_STATM,
 	PROC_TGID_MAPS,
+	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
 	PROC_TGID_WCHAN,
+#ifdef CONFIG_MMU
+	PROC_TGID_SMAPS,
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	PROC_TGID_SCHEDSTAT,
 #endif
@@ -83,7 +122,6 @@ enum pid_directory_inos {
 #ifdef CONFIG_AUDITSYSCALL
 	PROC_TGID_LOGINUID,
 #endif
-	PROC_TGID_FD_DIR,
 	PROC_TGID_OOM_SCORE,
 	PROC_TGID_OOM_ADJUST,
 	PROC_TID_INO,
@@ -102,8 +140,12 @@ enum pid_directory_inos {
 	PROC_TID_STAT,
 	PROC_TID_STATM,
 	PROC_TID_MAPS,
+	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
 	PROC_TID_WCHAN,
+#ifdef CONFIG_MMU
+	PROC_TID_SMAPS,
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	PROC_TID_SCHEDSTAT,
 #endif
@@ -120,9 +162,11 @@ enum pid_directory_inos {
 #ifdef CONFIG_AUDITSYSCALL
 	PROC_TID_LOGINUID,
 #endif
-	PROC_TID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
 	PROC_TID_OOM_SCORE,
 	PROC_TID_OOM_ADJUST,
+
+	/* Add new entries before this */
+	PROC_TID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
 };
 
 struct pid_entry {
@@ -144,6 +188,9 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
 	E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
 	E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
+#ifdef CONFIG_NUMA
+	E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
+#endif
 	E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
 #ifdef CONFIG_SECCOMP
 	E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -152,6 +199,9 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+#ifdef CONFIG_MMU
+	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
@@ -180,6 +230,9 @@ static struct pid_entry tid_base_stuff[] = {
 	E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
 	E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
 	E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
+#ifdef CONFIG_NUMA
+	E(PROC_TID_NUMA_MAPS,  "numa_maps",    S_IFREG|S_IRUGO),
+#endif
 	E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
 #ifdef CONFIG_SECCOMP
 	E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -188,6 +241,9 @@ static struct pid_entry tid_base_stuff[] = {
 	E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
+#ifdef CONFIG_MMU
+	E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
@@ -236,30 +292,36 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
 
 	files = get_files_struct(task);
 	if (files) {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
 			*mnt = mntget(file->f_vfsmnt);
 			*dentry = dget(file->f_dentry);
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 			return 0;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 	return -ENOENT;
 }
 
-static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+static struct fs_struct *get_fs_struct(struct task_struct *task)
 {
 	struct fs_struct *fs;
-	int result = -ENOENT;
-	task_lock(proc_task(inode));
-	fs = proc_task(inode)->fs;
+	task_lock(task);
+	fs = task->fs;
 	if(fs)
 		atomic_inc(&fs->count);
-	task_unlock(proc_task(inode));
+	task_unlock(task);
+	return fs;
+}
+
+static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+	struct fs_struct *fs = get_fs_struct(proc_task(inode));
+	int result = -ENOENT;
 	if (fs) {
 		read_lock(&fs->lock);
 		*mnt = mntget(fs->pwdmnt);
@@ -273,13 +335,55 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
 
 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
 {
+	struct fs_struct *fs = get_fs_struct(proc_task(inode));
+	int result = -ENOENT;
+	if (fs) {
+		read_lock(&fs->lock);
+		*mnt = mntget(fs->rootmnt);
+		*dentry = dget(fs->root);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	return result;
+}
+
+
+/* Same as proc_root_link, but this addionally tries to get fs from other
+ * threads in the group */
+static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
+				struct vfsmount **mnt)
+{
 	struct fs_struct *fs;
 	int result = -ENOENT;
-	task_lock(proc_task(inode));
-	fs = proc_task(inode)->fs;
-	if(fs)
+	struct task_struct *leader = proc_task(inode);
+
+	task_lock(leader);
+	fs = leader->fs;
+	if (fs) {
 		atomic_inc(&fs->count);
-	task_unlock(proc_task(inode));
+		task_unlock(leader);
+	} else {
+		/* Try to get fs from other threads */
+		task_unlock(leader);
+		read_lock(&tasklist_lock);
+		if (pid_alive(leader)) {
+			struct task_struct *task = leader;
+
+			while ((task = next_thread(task)) != leader) {
+				task_lock(task);
+				fs = task->fs;
+				if (fs) {
+					atomic_inc(&fs->count);
+					task_unlock(task);
+					break;
+				}
+				task_unlock(task);
+			}
+		}
+		read_unlock(&tasklist_lock);
+	}
+
 	if (fs) {
 		read_lock(&fs->lock);
 		*mnt = mntget(fs->rootmnt);
@@ -291,6 +395,7 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	return result;
 }
 
+
 #define MAY_PTRACE(task) \
 	(task == current || \
 	(task->parent == current && \
@@ -298,33 +403,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
 	 security_ptrace(current,task) == 0))
 
-static int may_ptrace_attach(struct task_struct *task)
-{
-	int retval = 0;
-
-	task_lock(task);
-
-	if (!task->mm)
-		goto out;
-	if (((current->uid != task->euid) ||
-	     (current->uid != task->suid) ||
-	     (current->uid != task->uid) ||
-	     (current->gid != task->egid) ||
-	     (current->gid != task->sgid) ||
-	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
-		goto out;
-	rmb();
-	if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
-		goto out;
-	if (security_ptrace(current, task))
-		goto out;
-
-	retval = 1;
-out:
-	task_unlock(task);
-	return retval;
-}
-
 static int proc_pid_environ(struct task_struct *task, char * buffer)
 {
 	int res = 0;
@@ -334,7 +412,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
 		if (len > PAGE_SIZE)
 			len = PAGE_SIZE;
 		res = access_process_vm(task, mm->env_start, buffer, len, 0);
-		if (!may_ptrace_attach(task))
+		if (!ptrace_may_attach(task))
 			res = -ESRCH;
 		mmput(mm);
 	}
@@ -449,14 +527,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 
 /* permission checks */
 
-static int proc_check_root(struct inode *inode)
+/* If the process being read is separated by chroot from the reading process,
+ * don't let the reader access the threads.
+ */
+static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
 {
-	struct dentry *de, *base, *root;
-	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
+	struct dentry *de, *base;
+	struct vfsmount *our_vfsmnt, *mnt;
 	int res = 0;
-
-	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
-		return -ENOENT;
 	read_lock(&current->fs->lock);
 	our_vfsmnt = mntget(current->fs->rootmnt);
 	base = dget(current->fs->root);
@@ -489,6 +567,16 @@ out:
 	goto exit;
 }
 
+static int proc_check_root(struct inode *inode)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
+		return -ENOENT;
+	return proc_check_chroot(root, vfsmnt);
+}
+
 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	if (generic_permission(inode, mask, NULL) != 0)
@@ -496,6 +584,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return proc_check_root(inode);
 }
 
+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (generic_permission(inode, mask, NULL) != 0)
+		return -EACCES;
+
+	if (proc_task_root_link(inode, &root, &vfsmnt))
+		return -ENOENT;
+
+	return proc_check_chroot(root, vfsmnt);
+}
+
 extern struct seq_operations proc_pid_maps_op;
 static int maps_open(struct inode *inode, struct file *file)
 {
@@ -515,6 +617,48 @@ static struct file_operations proc_maps_operations = {
 	.release	= seq_release,
 };
 
+#ifdef CONFIG_NUMA
+extern struct seq_operations proc_pid_numa_maps_op;
+static int numa_maps_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &proc_pid_numa_maps_op);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = task;
+	}
+	return ret;
+}
+
+static struct file_operations proc_numa_maps_operations = {
+	.open		= numa_maps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif
+
+#ifdef CONFIG_MMU
+extern struct seq_operations proc_pid_smaps_op;
+static int smaps_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &proc_pid_smaps_op);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = task;
+	}
+	return ret;
+}
+
+static struct file_operations proc_smaps_operations = {
+	.open		= smaps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif
+
 extern struct seq_operations mounts_op;
 static int mounts_open(struct inode *inode, struct file *file)
 {
@@ -597,7 +741,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
 	int ret = -ESRCH;
 	struct mm_struct *mm;
 
-	if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
+	if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
 		goto out;
 
 	ret = -ENOMEM;
@@ -623,7 +767,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
 
 		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 		retval = access_process_vm(task, src, page, this_len, 0);
-		if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
+		if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
 			if (!ret)
 				ret = -EIO;
 			break;
@@ -661,7 +805,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
 	struct task_struct *task = proc_task(file->f_dentry->d_inode);
 	unsigned long dst = *ppos;
 
-	if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
+	if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
 		return -ESRCH;
 
 	page = (char *)__get_free_page(GFP_USER);
@@ -890,7 +1034,7 @@ static struct file_operations proc_seccomp_operations = {
 };
 #endif /* CONFIG_SECCOMP */
 
-static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
 	int error = -EACCES;
@@ -907,7 +1051,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
 	nd->last_type = LAST_BIND;
 out:
-	return error;
+	return ERR_PTR(error);
 }
 
 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
@@ -978,6 +1122,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 	int retval;
 	char buf[NUMBUF];
 	struct files_struct * files;
+	struct fdtable *fdt;
 
 	retval = -ENOENT;
 	if (!pid_alive(p))
@@ -1000,15 +1145,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 			files = get_files_struct(p);
 			if (!files)
 				goto out;
-			spin_lock(&files->file_lock);
+			rcu_read_lock();
+			fdt = files_fdtable(files);
 			for (fd = filp->f_pos-2;
-			     fd < files->max_fds;
+			     fd < fdt->max_fds;
 			     fd++, filp->f_pos++) {
 				unsigned int i,j;
 
 				if (!fcheck_files(files, fd))
 					continue;
-				spin_unlock(&files->file_lock);
+				rcu_read_unlock();
 
 				j = NUMBUF;
 				i = fd;
@@ -1020,12 +1166,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 
 				ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
 				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
-					spin_lock(&files->file_lock);
+					rcu_read_lock();
 					break;
 				}
-				spin_lock(&files->file_lock);
+				rcu_read_lock();
 			}
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 	}
 out:
@@ -1200,9 +1346,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	files = get_files_struct(task);
 	if (files) {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		if (fcheck_files(files, fd)) {
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 			if (task_dumpable(task)) {
 				inode->i_uid = task->euid;
@@ -1214,7 +1360,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 			security_task_to_inode(task, inode);
 			return 1;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 	d_drop(dentry);
@@ -1306,7 +1452,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 	if (!files)
 		goto out_unlock;
 	inode->i_mode = S_IFLNK;
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (!file)
 		goto out_unlock2;
@@ -1314,7 +1460,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 		inode->i_mode |= S_IRUSR | S_IXUSR;
 	if (file->f_mode & 2)
 		inode->i_mode |= S_IWUSR | S_IXUSR;
-	spin_unlock(&files->file_lock);
+	rcu_read_unlock();
 	put_files_struct(files);
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
@@ -1324,7 +1470,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 	return NULL;
 
 out_unlock2:
-	spin_unlock(&files->file_lock);
+	rcu_read_unlock();
 	put_files_struct(files);
 out_unlock:
 	iput(inode);
@@ -1355,7 +1501,7 @@ static struct inode_operations proc_fd_inode_operations = {
 
 static struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
-	.permission	= proc_permission,
+	.permission	= proc_task_permission,
 };
 
 #ifdef CONFIG_SECURITY
@@ -1524,6 +1670,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 		case PROC_TGID_MAPS:
 			inode->i_fop = &proc_maps_operations;
 			break;
+#ifdef CONFIG_NUMA
+		case PROC_TID_NUMA_MAPS:
+		case PROC_TGID_NUMA_MAPS:
+			inode->i_fop = &proc_numa_maps_operations;
+			break;
+#endif
 		case PROC_TID_MEM:
 		case PROC_TGID_MEM:
 			inode->i_op = &proc_mem_inode_operations;
@@ -1539,6 +1691,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 		case PROC_TGID_MOUNTS:
 			inode->i_fop = &proc_mounts_operations;
 			break;
+#ifdef CONFIG_MMU
+		case PROC_TID_SMAPS:
+		case PROC_TGID_SMAPS:
+			inode->i_fop = &proc_smaps_operations;
+			break;
+#endif
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;
@@ -1692,11 +1850,11 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
-static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[30];
 	sprintf(tmp, "%d", current->tgid);
-	return vfs_follow_link(nd,tmp);
+	return ERR_PTR(vfs_follow_link(nd,tmp));
 }	
 
 static struct inode_operations proc_self_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6c6315d0402..b638fb50074 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -249,6 +249,18 @@ out:
 	return error;
 }
 
+static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct proc_dir_entry *de = PROC_I(inode)->pde;
+	if (de && de->nlink)
+		inode->i_nlink = de->nlink;
+
+	generic_fillattr(inode, stat);
+	return 0;
+}
+
 static struct inode_operations proc_file_inode_operations = {
 	.setattr	= proc_notify_change,
 };
@@ -329,10 +341,10 @@ static void release_inode_number(unsigned int inum)
 	spin_unlock(&proc_inum_lock);
 }
 
-static int proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	nd_set_link(nd, PDE(dentry->d_inode)->data);
-	return 0;
+	return NULL;
 }
 
 static struct inode_operations proc_link_inode_operations = {
@@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = {
  */
 static struct inode_operations proc_dir_inode_operations = {
 	.lookup		= proc_lookup,
+	.getattr	= proc_getattr,
 	.setattr	= proc_notify_change,
 };
 
@@ -520,7 +533,7 @@ static void proc_kill_inodes(struct proc_dir_entry *de)
 	 */
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
-		struct file * filp = list_entry(p, struct file, f_list);
+		struct file * filp = list_entry(p, struct file, f_u.fu_list);
 		struct dentry * dentry = filp->f_dentry;
 		struct inode * inode;
 		struct file_operations *fops;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c2868510..e6a818a93f3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
 	struct proc_dir_entry *de;
 	struct task_struct *tsk;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	/* Let go of any associated process */
 	tsk = PROC_I(inode)->task;
 	if (tsk)
@@ -154,10 +156,13 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 
 	WARN_ON(de && de->deleted);
 
+	if (de != NULL && !try_module_get(de->owner))
+		goto out_mod;
+
 	inode = iget(sb, ino);
 	if (!inode)
-		goto out_fail;
-	
+		goto out_ino;
+
 	PROC_I(inode)->pde = de;
 	if (de) {
 		if (de->mode) {
@@ -169,20 +174,20 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 			inode->i_size = de->size;
 		if (de->nlink)
 			inode->i_nlink = de->nlink;
-		if (!try_module_get(de->owner))
-			goto out_fail;
 		if (de->proc_iops)
 			inode->i_op = de->proc_iops;
 		if (de->proc_fops)
 			inode->i_fop = de->proc_fops;
 	}
 
-out:
 	return inode;
 
-out_fail:
+out_ino:
+	if (de != NULL)
+		module_put(de->owner);
+out_mod:
 	de_put(de);
-	goto out;
+	return NULL;
 }			
 
 int proc_fill_super(struct super_block *s, void *data, int silent)
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index f3bf016d5ee..cff10ab1af6 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -91,6 +91,7 @@ static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
 			next = _rb;
 			break;
 		}
+		pos--;
 	}
 
 	return next;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a3453555a94..5b6b0b6038a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -629,12 +629,4 @@ void __init proc_misc_init(void)
 	if (entry)
 		entry->proc_fops = &proc_sysrq_trigger_operations;
 #endif
-#ifdef CONFIG_PPC32
-	{
-		extern struct file_operations ppc_htab_operations;
-		entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL);
-		if (entry)
-			entry->proc_fops = &ppc_htab_operations;
-	}
-#endif
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 28b4a0253a9..d2fa42006d8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2,29 +2,53 @@
 #include <linux/hugetlb.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/mempolicy.h>
+
 #include <asm/elf.h>
 #include <asm/uaccess.h>
+#include <asm/tlbflush.h>
 #include "internal.h"
 
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
 	unsigned long data, text, lib;
+	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+
+	/*
+	 * Note: to minimize their overhead, mm maintains hiwater_vm and
+	 * hiwater_rss only when about to *lower* total_vm or rss.  Any
+	 * collector of these hiwater stats must therefore get total_vm
+	 * and rss too, which will usually be the higher.  Barriers? not
+	 * worth the effort, such snapshots can always be inconsistent.
+	 */
+	hiwater_vm = total_vm = mm->total_vm;
+	if (hiwater_vm < mm->hiwater_vm)
+		hiwater_vm = mm->hiwater_vm;
+	hiwater_rss = total_rss = get_mm_rss(mm);
+	if (hiwater_rss < mm->hiwater_rss)
+		hiwater_rss = mm->hiwater_rss;
 
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	buffer += sprintf(buffer,
+		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
 		"VmLck:\t%8lu kB\n"
+		"VmHWM:\t%8lu kB\n"
 		"VmRSS:\t%8lu kB\n"
 		"VmData:\t%8lu kB\n"
 		"VmStk:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
 		"VmPTE:\t%8lu kB\n",
-		(mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
+		hiwater_vm << (PAGE_SHIFT-10),
+		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
-		get_mm_counter(mm, rss) << (PAGE_SHIFT-10),
+		hiwater_rss << (PAGE_SHIFT-10),
+		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
 		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
@@ -39,13 +63,11 @@ unsigned long task_vsize(struct mm_struct *mm)
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 	       int *data, int *resident)
 {
-	int rss = get_mm_counter(mm, rss);
-
-	*shared = rss - get_mm_counter(mm, anon_rss);
+	*shared = get_mm_counter(mm, file_rss);
 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
 								>> PAGE_SHIFT;
 	*data = mm->total_vm - mm->shared_vm;
-	*resident = rss;
+	*resident = *shared + get_mm_counter(mm, anon_rss);
 	return mm->total_vm;
 }
 
@@ -87,49 +109,58 @@ static void pad_len_spaces(struct seq_file *m, int len)
 	seq_printf(m, "%*c", len, ' ');
 }
 
-static int show_map(struct seq_file *m, void *v)
+struct mem_size_stats
+{
+	unsigned long resident;
+	unsigned long shared_clean;
+	unsigned long shared_dirty;
+	unsigned long private_clean;
+	unsigned long private_dirty;
+};
+
+static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
 {
 	struct task_struct *task = m->private;
-	struct vm_area_struct *map = v;
-	struct mm_struct *mm = map->vm_mm;
-	struct file *file = map->vm_file;
-	int flags = map->vm_flags;
+	struct vm_area_struct *vma = v;
+	struct mm_struct *mm = vma->vm_mm;
+	struct file *file = vma->vm_file;
+	int flags = vma->vm_flags;
 	unsigned long ino = 0;
 	dev_t dev = 0;
 	int len;
 
 	if (file) {
-		struct inode *inode = map->vm_file->f_dentry->d_inode;
+		struct inode *inode = vma->vm_file->f_dentry->d_inode;
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
-			map->vm_start,
-			map->vm_end,
+			vma->vm_start,
+			vma->vm_end,
 			flags & VM_READ ? 'r' : '-',
 			flags & VM_WRITE ? 'w' : '-',
 			flags & VM_EXEC ? 'x' : '-',
 			flags & VM_MAYSHARE ? 's' : 'p',
-			map->vm_pgoff << PAGE_SHIFT,
+			vma->vm_pgoff << PAGE_SHIFT,
 			MAJOR(dev), MINOR(dev), ino, &len);
 
 	/*
 	 * Print the dentry name for named mappings, and a
 	 * special [heap] marker for the heap:
 	 */
-	if (map->vm_file) {
+	if (file) {
 		pad_len_spaces(m, len);
-		seq_path(m, file->f_vfsmnt, file->f_dentry, "");
+		seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
 	} else {
 		if (mm) {
-			if (map->vm_start <= mm->start_brk &&
-						map->vm_end >= mm->brk) {
+			if (vma->vm_start <= mm->start_brk &&
+						vma->vm_end >= mm->brk) {
 				pad_len_spaces(m, len);
 				seq_puts(m, "[heap]");
 			} else {
-				if (map->vm_start <= mm->start_stack &&
-					map->vm_end >= mm->start_stack) {
+				if (vma->vm_start <= mm->start_stack &&
+					vma->vm_end >= mm->start_stack) {
 
 					pad_len_spaces(m, len);
 					seq_puts(m, "[stack]");
@@ -141,24 +172,141 @@ static int show_map(struct seq_file *m, void *v)
 		}
 	}
 	seq_putc(m, '\n');
-	if (m->count < m->size)  /* map is copied successfully */
-		m->version = (map != get_gate_vma(task))? map->vm_start: 0;
+
+	if (mss)
+		seq_printf(m,
+			   "Size:          %8lu kB\n"
+			   "Rss:           %8lu kB\n"
+			   "Shared_Clean:  %8lu kB\n"
+			   "Shared_Dirty:  %8lu kB\n"
+			   "Private_Clean: %8lu kB\n"
+			   "Private_Dirty: %8lu kB\n",
+			   (vma->vm_end - vma->vm_start) >> 10,
+			   mss->resident >> 10,
+			   mss->shared_clean  >> 10,
+			   mss->shared_dirty  >> 10,
+			   mss->private_clean >> 10,
+			   mss->private_dirty >> 10);
+
+	if (m->count < m->size)  /* vma is copied successfully */
+		m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
 	return 0;
 }
 
+static int show_map(struct seq_file *m, void *v)
+{
+	return show_map_internal(m, v, 0);
+}
+
+static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+				unsigned long addr, unsigned long end,
+				struct mem_size_stats *mss)
+{
+	pte_t *pte, ptent;
+	spinlock_t *ptl;
+	unsigned long pfn;
+	struct page *page;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	do {
+		ptent = *pte;
+		if (!pte_present(ptent))
+			continue;
+
+		mss->resident += PAGE_SIZE;
+		pfn = pte_pfn(ptent);
+		if (!pfn_valid(pfn))
+			continue;
+
+		page = pfn_to_page(pfn);
+		if (page_count(page) >= 2) {
+			if (pte_dirty(ptent))
+				mss->shared_dirty += PAGE_SIZE;
+			else
+				mss->shared_clean += PAGE_SIZE;
+		} else {
+			if (pte_dirty(ptent))
+				mss->private_dirty += PAGE_SIZE;
+			else
+				mss->private_clean += PAGE_SIZE;
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+}
+
+static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+				unsigned long addr, unsigned long end,
+				struct mem_size_stats *mss)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		smaps_pte_range(vma, pmd, addr, next, mss);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				struct mem_size_stats *mss)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		smaps_pmd_range(vma, pud, addr, next, mss);
+	} while (pud++, addr = next, addr != end);
+}
+
+static inline void smaps_pgd_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end,
+				struct mem_size_stats *mss)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		smaps_pud_range(vma, pgd, addr, next, mss);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static int show_smap(struct seq_file *m, void *v)
+{
+	struct vm_area_struct *vma = v;
+	struct mem_size_stats mss;
+
+	memset(&mss, 0, sizeof mss);
+	if (vma->vm_mm)
+		smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
+	return show_map_internal(m, v, &mss);
+}
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct task_struct *task = m->private;
 	unsigned long last_addr = m->version;
 	struct mm_struct *mm;
-	struct vm_area_struct *map, *tail_map;
+	struct vm_area_struct *vma, *tail_vma;
 	loff_t l = *pos;
 
 	/*
 	 * We remember last_addr rather than next_addr to hit with
 	 * mmap_cache most of the time. We have zero last_addr at
-	 * the begining and also after lseek. We will have -1 last_addr
-	 * after the end of the maps.
+	 * the beginning and also after lseek. We will have -1 last_addr
+	 * after the end of the vmas.
 	 */
 
 	if (last_addr == -1UL)
@@ -168,47 +316,47 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 	if (!mm)
 		return NULL;
 
-	tail_map = get_gate_vma(task);
+	tail_vma = get_gate_vma(task);
 	down_read(&mm->mmap_sem);
 
 	/* Start with last addr hint */
-	if (last_addr && (map = find_vma(mm, last_addr))) {
-		map = map->vm_next;
+	if (last_addr && (vma = find_vma(mm, last_addr))) {
+		vma = vma->vm_next;
 		goto out;
 	}
 
 	/*
-	 * Check the map index is within the range and do
+	 * Check the vma index is within the range and do
 	 * sequential scan until m_index.
 	 */
-	map = NULL;
+	vma = NULL;
 	if ((unsigned long)l < mm->map_count) {
-		map = mm->mmap;
-		while (l-- && map)
-			map = map->vm_next;
+		vma = mm->mmap;
+		while (l-- && vma)
+			vma = vma->vm_next;
 		goto out;
 	}
 
 	if (l != mm->map_count)
-		tail_map = NULL; /* After gate map */
+		tail_vma = NULL; /* After gate vma */
 
 out:
-	if (map)
-		return map;
+	if (vma)
+		return vma;
 
-	/* End of maps has reached */
-	m->version = (tail_map != NULL)? 0: -1UL;
+	/* End of vmas has been reached */
+	m->version = (tail_vma != NULL)? 0: -1UL;
 	up_read(&mm->mmap_sem);
 	mmput(mm);
-	return tail_map;
+	return tail_vma;
 }
 
 static void m_stop(struct seq_file *m, void *v)
 {
 	struct task_struct *task = m->private;
-	struct vm_area_struct *map = v;
-	if (map && map != get_gate_vma(task)) {
-		struct mm_struct *mm = map->vm_mm;
+	struct vm_area_struct *vma = v;
+	if (vma && vma != get_gate_vma(task)) {
+		struct mm_struct *mm = vma->vm_mm;
 		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
@@ -217,14 +365,14 @@ static void m_stop(struct seq_file *m, void *v)
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct task_struct *task = m->private;
-	struct vm_area_struct *map = v;
-	struct vm_area_struct *tail_map = get_gate_vma(task);
+	struct vm_area_struct *vma = v;
+	struct vm_area_struct *tail_vma = get_gate_vma(task);
 
 	(*pos)++;
-	if (map && (map != tail_map) && map->vm_next)
-		return map->vm_next;
+	if (vma && (vma != tail_vma) && vma->vm_next)
+		return vma->vm_next;
 	m_stop(m, v);
-	return (map != tail_map)? tail_map: NULL;
+	return (vma != tail_vma)? tail_vma: NULL;
 }
 
 struct seq_operations proc_pid_maps_op = {
@@ -233,3 +381,139 @@ struct seq_operations proc_pid_maps_op = {
 	.stop	= m_stop,
 	.show	= show_map
 };
+
+struct seq_operations proc_pid_smaps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_smap
+};
+
+#ifdef CONFIG_NUMA
+
+struct numa_maps {
+	unsigned long pages;
+	unsigned long anon;
+	unsigned long mapped;
+	unsigned long mapcount_max;
+	unsigned long node[MAX_NUMNODES];
+};
+
+/*
+ * Calculate numa node maps for a vma
+ */
+static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
+{
+	struct page *page;
+	unsigned long vaddr;
+	struct mm_struct *mm = vma->vm_mm;
+	int i;
+	struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
+
+	if (!md)
+		return NULL;
+	md->pages = 0;
+	md->anon = 0;
+	md->mapped = 0;
+	md->mapcount_max = 0;
+	for_each_node(i)
+		md->node[i] =0;
+
+ 	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+		page = follow_page(mm, vaddr, 0);
+		if (page) {
+			int count = page_mapcount(page);
+
+			if (count)
+				md->mapped++;
+			if (count > md->mapcount_max)
+				md->mapcount_max = count;
+			md->pages++;
+			if (PageAnon(page))
+				md->anon++;
+			md->node[page_to_nid(page)]++;
+		}
+		cond_resched();
+	}
+	return md;
+}
+
+static int show_numa_map(struct seq_file *m, void *v)
+{
+	struct task_struct *task = m->private;
+	struct vm_area_struct *vma = v;
+	struct mempolicy *pol;
+	struct numa_maps *md;
+	struct zone **z;
+	int n;
+	int first;
+
+	if (!vma->vm_mm)
+		return 0;
+
+	md = get_numa_maps(vma);
+	if (!md)
+		return 0;
+
+	seq_printf(m, "%08lx", vma->vm_start);
+	pol = get_vma_policy(task, vma, vma->vm_start);
+	/* Print policy */
+	switch (pol->policy) {
+	case MPOL_PREFERRED:
+		seq_printf(m, " prefer=%d", pol->v.preferred_node);
+		break;
+	case MPOL_BIND:
+		seq_printf(m, " bind={");
+		first = 1;
+		for (z = pol->v.zonelist->zones; *z; z++) {
+
+			if (!first)
+				seq_putc(m, ',');
+			else
+				first = 0;
+			seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
+					(*z)->name);
+		}
+		seq_putc(m, '}');
+		break;
+	case MPOL_INTERLEAVE:
+		seq_printf(m, " interleave={");
+		first = 1;
+		for_each_node(n) {
+			if (node_isset(n, pol->v.nodes)) {
+				if (!first)
+					seq_putc(m,',');
+				else
+					first = 0;
+				seq_printf(m, "%d",n);
+			}
+		}
+		seq_putc(m, '}');
+		break;
+	default:
+		seq_printf(m," default");
+		break;
+	}
+	seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
+			md->mapcount_max, md->pages, md->mapped);
+	if (md->anon)
+		seq_printf(m," Anon=%lu",md->anon);
+
+	for_each_online_node(n) {
+		if (md->node[n])
+			seq_printf(m, " N%d=%lu", n, md->node[n]);
+	}
+	seq_putc(m, '\n');
+	kfree(md);
+	if (m->count < m->size)  /* vma is copied successfully */
+		m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+	return 0;
+}
+
+struct seq_operations proc_pid_numa_maps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_numa_map
+};
+#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a3547..80f32911c0c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
 static void qnx4_delete_inode(struct inode *inode)
 {
 	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	qnx4_truncate(inode);
 	lock_kernel();
diff --git a/fs/quota.c b/fs/quota.c
index f5d1cff5519..1df7832b4e0 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -118,6 +118,10 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i
 			if (!sb->s_qcop->get_xquota)
 				return -ENOSYS;
 			break;
+		case Q_XQUOTASYNC:
+			if (!sb->s_qcop->quota_sync)
+				return -ENOSYS;
+			break;
 		default:
 			return -EINVAL;
 	}
@@ -128,7 +132,7 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i
 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
 		     !capable(CAP_SYS_ADMIN))
 			return -EPERM;
-	} else if (cmd != Q_XGETQSTAT) {
+	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 	}
@@ -322,6 +326,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
 				return -EFAULT;
 			return 0;
 		}
+		case Q_XQUOTASYNC:
+			return sb->s_qcop->quota_sync(sb, type);
 		/* We never reach here unless validity check is broken */
 		default:
 			BUG();
diff --git a/fs/read_write.c b/fs/read_write.c
index 563abd09b5c..a091ee4f430 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 	struct inode *inode;
 	loff_t pos;
 
-	if (unlikely(count > file->f_maxcount))
+	if (unlikely(count > INT_MAX))
 		goto Einval;
 	pos = *ppos;
 	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
@@ -499,6 +499,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret)
 		goto out;
+	ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE);
+	if (ret)
+		goto out;
 
 	fnv = NULL;
 	if (type == READ) {
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index c9f178fb494..c20babd6216 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -667,7 +667,7 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 	if (th->t_trans_id) {
 		int err;
 		// update any changes we made to blk count
-		reiserfs_update_sd(th, inode);
+		mark_inode_dirty(inode);
 		err =
 		    journal_end(th, inode->i_sb,
 				JOURNAL_PER_BALANCE_CNT * 3 + 1 +
@@ -855,17 +855,18 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han
 
 		if (th->t_trans_id) {
 			reiserfs_write_lock(inode->i_sb);
-			reiserfs_update_sd(th, inode);	// And update on-disk metadata
+			// this sets the proper flags for O_SYNC to trigger a commit
+			mark_inode_dirty(inode);
 			reiserfs_write_unlock(inode->i_sb);
 		} else
-			inode->i_sb->s_op->dirty_inode(inode);
+			mark_inode_dirty(inode);
 
 		sd_update = 1;
 	}
 	if (th->t_trans_id) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!sd_update)
-			reiserfs_update_sd(th, inode);
+			mark_inode_dirty(inode);
 		status = journal_end(th, th->t_super, th->t_blocks_allocated);
 		if (status)
 			retval = status;
@@ -1320,7 +1321,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 				return err;
 			}
 			reiserfs_update_inode_transaction(inode);
-			reiserfs_update_sd(&th, inode);
+			mark_inode_dirty(inode);
 			err = journal_end(&th, inode->i_sb, 1);
 			if (err) {
 				reiserfs_write_unlock(inode->i_sb);
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 2706e2adffa..45829889dcd 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2022,7 +2022,7 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
 }
 
 #ifdef CONFIG_REISERFS_CHECK
-void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s)
+void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
 {
 	void *vp;
 	static size_t malloced;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d9f614a5773..5f82352b97e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
 	struct reiserfs_transaction_handle th;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	reiserfs_write_lock(inode->i_sb);
 
 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
@@ -1985,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
 	 * code really needs to be reworked, but this will take care of it
 	 * for now. -jeffm */
-	if (REISERFS_I(dir)->i_acl_default) {
+	if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
 		reiserfs_write_unlock_xattrs(dir->i_sb);
 		iput(inode);
 		reiserfs_write_lock_xattrs(dir->i_sb);
@@ -2637,6 +2639,12 @@ static int reiserfs_commit_write(struct file *f, struct page *page,
 		}
 		reiserfs_update_inode_transaction(inode);
 		inode->i_size = pos;
+		/*
+		 * this will just nest into our transaction.  It's important
+		 * to use mark_inode_dirty so the inode gets pushed around on the
+		 * dirty lists, and so that O_SYNC works as expected
+		 */
+		mark_inode_dirty(inode);
 		reiserfs_update_sd(&myth, inode);
 		update_sd = 1;
 		ret = journal_end(&myth, inode->i_sb, 1);
@@ -2647,21 +2655,13 @@ static int reiserfs_commit_write(struct file *f, struct page *page,
 	if (th) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!update_sd)
-			reiserfs_update_sd(th, inode);
+			mark_inode_dirty(inode);
 		ret = reiserfs_end_persistent_transaction(th);
 		reiserfs_write_unlock(inode->i_sb);
 		if (ret)
 			goto out;
 	}
 
-	/* we test for O_SYNC here so we can commit the transaction
-	 ** for any packed tails the file might have had
-	 */
-	if (f && (f->f_flags & O_SYNC)) {
-		reiserfs_write_lock(inode->i_sb);
-		ret = reiserfs_commit_for_inode(inode);
-		reiserfs_write_unlock(inode->i_sb);
-	}
       out:
 	return ret;
 
@@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struct page *page)
  * even in -o notail mode, we can't be sure an old mount without -o notail
  * didn't create files with tails.
  */
-static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
+static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
 	struct inode *inode = page->mapping->host;
 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ca7989b04be..4b15761434b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1034,7 +1034,7 @@ static int flush_commit_list(struct super_block *s,
 		    SB_ONDISK_JOURNAL_SIZE(s);
 		tbh = journal_find_get_block(s, bn);
 		if (buffer_dirty(tbh))	/* redundant, ll_rw_block() checks */
-			ll_rw_block(WRITE, 1, &tbh);
+			ll_rw_block(SWRITE, 1, &tbh);
 		put_bh(tbh);
 	}
 	atomic_dec(&journal->j_async_throttle);
@@ -2172,7 +2172,7 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	/* flush out the real blocks */
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		set_buffer_dirty(real_blocks[i]);
-		ll_rw_block(WRITE, 1, real_blocks + i);
+		ll_rw_block(SWRITE, 1, real_blocks + i);
 	}
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		wait_on_buffer(real_blocks[i]);
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	unsigned long bcount = journal->j_bcount;
 	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(1);
+		schedule_timeout_uninterruptible(1);
 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
 		while ((atomic_read(&journal->j_wcount) > 0 ||
 			atomic_read(&journal->j_jlock)) &&
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index a20bbc1642d..3549067c42d 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,6 +593,9 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
 	 */
 	inode->i_uid = current->fsuid;
 	inode->i_mode = mode;
+	/* Make inode invalid - just in case we are going to drop it before
+	 * the initialization happens */
+	INODE_PKEY(inode)->k_objectid = 0;
 
 	if (dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6951c35755b..42afb5bef11 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1024,12 +1024,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 				strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
 				*mount_options |= 1 << REISERFS_QUOTA;
 			} else {
-				if (REISERFS_SB(s)->s_qf_names[qtype]) {
-					kfree(REISERFS_SB(s)->
-					      s_qf_names[qtype]);
-					REISERFS_SB(s)->s_qf_names[qtype] =
-					    NULL;
-				}
+				kfree(REISERFS_SB(s)->s_qf_names[qtype]);
+				REISERFS_SB(s)->s_qf_names[qtype] = NULL;
 			}
 		}
 		if (c == 'f') {
@@ -1158,11 +1154,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	if (!reiserfs_parse_options
 	    (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
 #ifdef CONFIG_QUOTA
-		for (i = 0; i < MAXQUOTAS; i++)
-			if (REISERFS_SB(s)->s_qf_names[i]) {
-				kfree(REISERFS_SB(s)->s_qf_names[i]);
-				REISERFS_SB(s)->s_qf_names[i] = NULL;
-			}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			kfree(REISERFS_SB(s)->s_qf_names[i]);
+			REISERFS_SB(s)->s_qf_names[i] = NULL;
+		}
 #endif
 		return -EINVAL;
 	}
@@ -1934,20 +1929,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 			if (SB_AP_BITMAP(s))
 				brelse(SB_AP_BITMAP(s)[j].bh);
 		}
-		if (SB_AP_BITMAP(s))
-			vfree(SB_AP_BITMAP(s));
+		vfree(SB_AP_BITMAP(s));
 	}
 	if (SB_BUFFER_WITH_SB(s))
 		brelse(SB_BUFFER_WITH_SB(s));
 #ifdef CONFIG_QUOTA
 	for (j = 0; j < MAXQUOTAS; j++) {
-		if (sbi->s_qf_names[j])
-			kfree(sbi->s_qf_names[j]);
+		kfree(sbi->s_qf_names[j]);
+		sbi->s_qf_names[j] = NULL;
 	}
 #endif
-	if (sbi != NULL) {
-		kfree(sbi);
-	}
+	kfree(sbi);
 
 	s->s_fs_info = NULL;
 	return errval;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 87ac9dc8b38..72e12079867 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -453,7 +453,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
 	struct page *page;
 	/* We can deadlock if we try to free dentries,
 	   and an unlink/rmdir has just occured - GFP_NOFS avoids this */
-	mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS;
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	page = read_cache_page(mapping, n,
 			       (filler_t *) mapping->a_ops->readpage, NULL);
 	if (!IS_ERR(page)) {
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6703efa3c43..a47ac9aac8b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -296,8 +296,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		}
 	}
 
-	if (value)
-		kfree(value);
+	kfree(value);
 
 	if (!error) {
 		/* Release the old one */
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
new file mode 100644
index 00000000000..e76e182cdb3
--- /dev/null
+++ b/fs/relayfs/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_RELAYFS_FS) += relayfs.o
+
+relayfs-y := relay.o inode.o buffers.o
+
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
new file mode 100644
index 00000000000..84e21ffa5ca
--- /dev/null
+++ b/fs/relayfs/buffers.c
@@ -0,0 +1,189 @@
+/*
+ * RelayFS buffer management code.
+ *
+ * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/relayfs_fs.h>
+#include "relay.h"
+#include "buffers.h"
+
+/*
+ * close() vm_op implementation for relayfs file mapping.
+ */
+static void relay_file_mmap_close(struct vm_area_struct *vma)
+{
+	struct rchan_buf *buf = vma->vm_private_data;
+	buf->chan->cb->buf_unmapped(buf, vma->vm_file);
+}
+
+/*
+ * nopage() vm_op implementation for relayfs file mapping.
+ */
+static struct page *relay_buf_nopage(struct vm_area_struct *vma,
+				     unsigned long address,
+				     int *type)
+{
+	struct page *page;
+	struct rchan_buf *buf = vma->vm_private_data;
+	unsigned long offset = address - vma->vm_start;
+
+	if (address > vma->vm_end)
+		return NOPAGE_SIGBUS; /* Disallow mremap */
+	if (!buf)
+		return NOPAGE_OOM;
+
+	page = vmalloc_to_page(buf->start + offset);
+	if (!page)
+		return NOPAGE_OOM;
+	get_page(page);
+
+	if (type)
+		*type = VM_FAULT_MINOR;
+
+	return page;
+}
+
+/*
+ * vm_ops for relay file mappings.
+ */
+static struct vm_operations_struct relay_file_mmap_ops = {
+	.nopage = relay_buf_nopage,
+	.close = relay_file_mmap_close,
+};
+
+/**
+ *	relay_mmap_buf: - mmap channel buffer to process address space
+ *	@buf: relay channel buffer
+ *	@vma: vm_area_struct describing memory to be mapped
+ *
+ *	Returns 0 if ok, negative on error
+ *
+ *	Caller should already have grabbed mmap_sem.
+ */
+int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
+{
+	unsigned long length = vma->vm_end - vma->vm_start;
+	struct file *filp = vma->vm_file;
+
+	if (!buf)
+		return -EBADF;
+
+	if (length != (unsigned long)buf->chan->alloc_size)
+		return -EINVAL;
+
+	vma->vm_ops = &relay_file_mmap_ops;
+	vma->vm_private_data = buf;
+	buf->chan->cb->buf_mapped(buf, filp);
+
+	return 0;
+}
+
+/**
+ *	relay_alloc_buf - allocate a channel buffer
+ *	@buf: the buffer struct
+ *	@size: total size of the buffer
+ *
+ *	Returns a pointer to the resulting buffer, NULL if unsuccessful
+ */
+static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
+{
+	void *mem;
+	unsigned int i, j, n_pages;
+
+	size = PAGE_ALIGN(size);
+	n_pages = size >> PAGE_SHIFT;
+
+	buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!buf->page_array)
+		return NULL;
+
+	for (i = 0; i < n_pages; i++) {
+		buf->page_array[i] = alloc_page(GFP_KERNEL);
+		if (unlikely(!buf->page_array[i]))
+			goto depopulate;
+	}
+	mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
+	if (!mem)
+		goto depopulate;
+
+	memset(mem, 0, size);
+	buf->page_count = n_pages;
+	return mem;
+
+depopulate:
+	for (j = 0; j < i; j++)
+		__free_page(buf->page_array[j]);
+	kfree(buf->page_array);
+	return NULL;
+}
+
+/**
+ *	relay_create_buf - allocate and initialize a channel buffer
+ *	@alloc_size: size of the buffer to allocate
+ *	@n_subbufs: number of sub-buffers in the channel
+ *
+ *	Returns channel buffer if successful, NULL otherwise
+ */
+struct rchan_buf *relay_create_buf(struct rchan *chan)
+{
+	struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
+	if (!buf->padding)
+		goto free_buf;
+
+	buf->start = relay_alloc_buf(buf, chan->alloc_size);
+	if (!buf->start)
+		goto free_buf;
+
+	buf->chan = chan;
+	kref_get(&buf->chan->kref);
+	return buf;
+
+free_buf:
+	kfree(buf->padding);
+	kfree(buf);
+	return NULL;
+}
+
+/**
+ *	relay_destroy_buf - destroy an rchan_buf struct and associated buffer
+ *	@buf: the buffer struct
+ */
+void relay_destroy_buf(struct rchan_buf *buf)
+{
+	struct rchan *chan = buf->chan;
+	unsigned int i;
+
+	if (likely(buf->start)) {
+		vunmap(buf->start);
+		for (i = 0; i < buf->page_count; i++)
+			__free_page(buf->page_array[i]);
+		kfree(buf->page_array);
+	}
+	kfree(buf->padding);
+	kfree(buf);
+	kref_put(&chan->kref, relay_destroy_channel);
+}
+
+/**
+ *	relay_remove_buf - remove a channel buffer
+ *
+ *	Removes the file from the relayfs fileystem, which also frees the
+ *	rchan_buf_struct and the channel buffer.  Should only be called from
+ *	kref_put().
+ */
+void relay_remove_buf(struct kref *kref)
+{
+	struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
+	relayfs_remove(buf->dentry);
+}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
new file mode 100644
index 00000000000..37a12493f64
--- /dev/null
+++ b/fs/relayfs/buffers.h
@@ -0,0 +1,12 @@
+#ifndef _BUFFERS_H
+#define _BUFFERS_H
+
+/* This inspired by rtai/shmem */
+#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
+
+extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
+extern struct rchan_buf *relay_create_buf(struct rchan *chan);
+extern void relay_destroy_buf(struct rchan_buf *buf);
+extern void relay_remove_buf(struct kref *kref);
+
+#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 00000000000..0f7f88d067a
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,609 @@
+/*
+ * VFS-related code for RelayFS, a high-speed data relay filesystem.
+ *
+ * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
+ * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
+ *
+ * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/backing-dev.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/relayfs_fs.h>
+#include "relay.h"
+#include "buffers.h"
+
+#define RELAYFS_MAGIC			0xF0B4A981
+
+static struct vfsmount *		relayfs_mount;
+static int				relayfs_mount_count;
+static kmem_cache_t *			relayfs_inode_cachep;
+
+static struct backing_dev_info		relayfs_backing_dev_info = {
+	.ra_pages	= 0,	/* No readahead */
+	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
+				       struct rchan *chan)
+{
+	struct rchan_buf *buf = NULL;
+	struct inode *inode;
+
+	if (S_ISREG(mode)) {
+		BUG_ON(!chan);
+		buf = relay_create_buf(chan);
+		if (!buf)
+			return NULL;
+	}
+
+	inode = new_inode(sb);
+	if (!inode) {
+		relay_destroy_buf(buf);
+		return NULL;
+	}
+
+	inode->i_mode = mode;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_blocks = 0;
+	inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_fop = &relayfs_file_operations;
+		RELAYFS_I(inode)->buf = buf;
+		break;
+	case S_IFDIR:
+		inode->i_op = &simple_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+
+		/* directory inodes start off with i_nlink == 2 (for "." entry) */
+		inode->i_nlink++;
+		break;
+	default:
+		break;
+	}
+
+	return inode;
+}
+
+/**
+ *	relayfs_create_entry - create a relayfs directory or file
+ *	@name: the name of the file to create
+ *	@parent: parent directory
+ *	@mode: mode
+ *	@chan: relay channel associated with the file
+ *
+ *	Returns the new dentry, NULL on failure
+ *
+ *	Creates a file or directory with the specifed permissions.
+ */
+static struct dentry *relayfs_create_entry(const char *name,
+					   struct dentry *parent,
+					   int mode,
+					   struct rchan *chan)
+{
+	struct dentry *d;
+	struct inode *inode;
+	int error = 0;
+
+	BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
+
+	error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
+	if (error) {
+		printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
+		return NULL;
+	}
+
+	if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
+		parent = relayfs_mount->mnt_sb->s_root;
+
+	if (!parent) {
+		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+		return NULL;
+	}
+
+	parent = dget(parent);
+	down(&parent->d_inode->i_sem);
+	d = lookup_one_len(name, parent, strlen(name));
+	if (IS_ERR(d)) {
+		d = NULL;
+		goto release_mount;
+	}
+
+	if (d->d_inode) {
+		d = NULL;
+		goto release_mount;
+	}
+
+	inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
+	if (!inode) {
+		d = NULL;
+		goto release_mount;
+	}
+
+	d_instantiate(d, inode);
+	dget(d);	/* Extra count - pin the dentry in core */
+
+	if (S_ISDIR(mode))
+		parent->d_inode->i_nlink++;
+
+	goto exit;
+
+release_mount:
+	simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+
+exit:
+	up(&parent->d_inode->i_sem);
+	dput(parent);
+	return d;
+}
+
+/**
+ *	relayfs_create_file - create a file in the relay filesystem
+ *	@name: the name of the file to create
+ *	@parent: parent directory
+ *	@mode: mode, if not specied the default perms are used
+ *	@chan: channel associated with the file
+ *
+ *	Returns file dentry if successful, NULL otherwise.
+ *
+ *	The file will be created user r on behalf of current user.
+ */
+struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
+				   int mode, struct rchan *chan)
+{
+	if (!mode)
+		mode = S_IRUSR;
+	mode = (mode & S_IALLUGO) | S_IFREG;
+
+	return relayfs_create_entry(name, parent, mode, chan);
+}
+
+/**
+ *	relayfs_create_dir - create a directory in the relay filesystem
+ *	@name: the name of the directory to create
+ *	@parent: parent directory, NULL if parent should be fs root
+ *
+ *	Returns directory dentry if successful, NULL otherwise.
+ *
+ *	The directory will be created world rwx on behalf of current user.
+ */
+struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
+{
+	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	return relayfs_create_entry(name, parent, mode, NULL);
+}
+
+/**
+ *	relayfs_remove - remove a file or directory in the relay filesystem
+ *	@dentry: file or directory dentry
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int relayfs_remove(struct dentry *dentry)
+{
+	struct dentry *parent;
+	int error = 0;
+
+	if (!dentry)
+		return -EINVAL;
+	parent = dentry->d_parent;
+	if (!parent)
+		return -EINVAL;
+
+	parent = dget(parent);
+	down(&parent->d_inode->i_sem);
+	if (dentry->d_inode) {
+		if (S_ISDIR(dentry->d_inode->i_mode))
+			error = simple_rmdir(parent->d_inode, dentry);
+		else
+			error = simple_unlink(parent->d_inode, dentry);
+		if (!error)
+			d_delete(dentry);
+	}
+	if (!error)
+		dput(dentry);
+	up(&parent->d_inode->i_sem);
+	dput(parent);
+
+	if (!error)
+		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+
+	return error;
+}
+
+/**
+ *	relayfs_remove_dir - remove a directory in the relay filesystem
+ *	@dentry: directory dentry
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int relayfs_remove_dir(struct dentry *dentry)
+{
+	return relayfs_remove(dentry);
+}
+
+/**
+ *	relayfs_open - open file op for relayfs files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Increments the channel buffer refcount.
+ */
+static int relayfs_open(struct inode *inode, struct file *filp)
+{
+	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	kref_get(&buf->kref);
+
+	return 0;
+}
+
+/**
+ *	relayfs_mmap - mmap file op for relayfs files
+ *	@filp: the file
+ *	@vma: the vma describing what to map
+ *
+ *	Calls upon relay_mmap_buf to map the file into user space.
+ */
+static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
+}
+
+/**
+ *	relayfs_poll - poll file op for relayfs files
+ *	@filp: the file
+ *	@wait: poll table
+ *
+ *	Poll implemention.
+ */
+static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+
+	if (buf->finalized)
+		return POLLERR;
+
+	if (filp->f_mode & FMODE_READ) {
+		poll_wait(filp, &buf->read_wait, wait);
+		if (!relay_buf_empty(buf))
+			mask |= POLLIN | POLLRDNORM;
+	}
+
+	return mask;
+}
+
+/**
+ *	relayfs_release - release file op for relayfs files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Decrements the channel refcount, as the filesystem is
+ *	no longer using it.
+ */
+static int relayfs_release(struct inode *inode, struct file *filp)
+{
+	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	kref_put(&buf->kref, relay_remove_buf);
+
+	return 0;
+}
+
+/**
+ *	relayfs_read_consume - update the consumed count for the buffer
+ */
+static void relayfs_read_consume(struct rchan_buf *buf,
+				 size_t read_pos,
+				 size_t bytes_consumed)
+{
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+	size_t read_subbuf;
+
+	if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
+		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
+		buf->bytes_consumed = 0;
+	}
+
+	buf->bytes_consumed += bytes_consumed;
+	read_subbuf = read_pos / buf->chan->subbuf_size;
+	if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
+		if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
+		    (buf->offset == subbuf_size))
+			return;
+		relay_subbufs_consumed(buf->chan, buf->cpu, 1);
+		buf->bytes_consumed = 0;
+	}
+}
+
+/**
+ *	relayfs_read_avail - boolean, are there unconsumed bytes available?
+ */
+static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
+{
+	size_t bytes_produced, bytes_consumed, write_offset;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+	size_t produced = buf->subbufs_produced % n_subbufs;
+	size_t consumed = buf->subbufs_consumed % n_subbufs;
+
+	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
+
+	if (consumed > produced) {
+		if ((produced > n_subbufs) &&
+		    (produced + n_subbufs - consumed <= n_subbufs))
+			produced += n_subbufs;
+	} else if (consumed == produced) {
+		if (buf->offset > subbuf_size) {
+			produced += n_subbufs;
+			if (buf->subbufs_produced == buf->subbufs_consumed)
+				consumed += n_subbufs;
+		}
+	}
+
+	if (buf->offset > subbuf_size)
+		bytes_produced = (produced - 1) * subbuf_size + write_offset;
+	else
+		bytes_produced = produced * subbuf_size + write_offset;
+	bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
+
+	if (bytes_produced == bytes_consumed)
+		return 0;
+
+	relayfs_read_consume(buf, read_pos, 0);
+
+	return 1;
+}
+
+/**
+ *	relayfs_read_subbuf_avail - return bytes available in sub-buffer
+ */
+static size_t relayfs_read_subbuf_avail(size_t read_pos,
+					struct rchan_buf *buf)
+{
+	size_t padding, avail = 0;
+	size_t read_subbuf, read_offset, write_subbuf, write_offset;
+	size_t subbuf_size = buf->chan->subbuf_size;
+
+	write_subbuf = (buf->data - buf->start) / subbuf_size;
+	write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
+	read_subbuf = read_pos / subbuf_size;
+	read_offset = read_pos % subbuf_size;
+	padding = buf->padding[read_subbuf];
+
+	if (read_subbuf == write_subbuf) {
+		if (read_offset + padding < write_offset)
+			avail = write_offset - (read_offset + padding);
+	} else
+		avail = (subbuf_size - padding) - read_offset;
+
+	return avail;
+}
+
+/**
+ *	relayfs_read_start_pos - find the first available byte to read
+ *
+ *	If the read_pos is in the middle of padding, return the
+ *	position of the first actually available byte, otherwise
+ *	return the original value.
+ */
+static size_t relayfs_read_start_pos(size_t read_pos,
+				     struct rchan_buf *buf)
+{
+	size_t read_subbuf, padding, padding_start, padding_end;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+
+	read_subbuf = read_pos / subbuf_size;
+	padding = buf->padding[read_subbuf];
+	padding_start = (read_subbuf + 1) * subbuf_size - padding;
+	padding_end = (read_subbuf + 1) * subbuf_size;
+	if (read_pos >= padding_start && read_pos < padding_end) {
+		read_subbuf = (read_subbuf + 1) % n_subbufs;
+		read_pos = read_subbuf * subbuf_size;
+	}
+
+	return read_pos;
+}
+
+/**
+ *	relayfs_read_end_pos - return the new read position
+ */
+static size_t relayfs_read_end_pos(struct rchan_buf *buf,
+				   size_t read_pos,
+				   size_t count)
+{
+	size_t read_subbuf, padding, end_pos;
+	size_t subbuf_size = buf->chan->subbuf_size;
+	size_t n_subbufs = buf->chan->n_subbufs;
+
+	read_subbuf = read_pos / subbuf_size;
+	padding = buf->padding[read_subbuf];
+	if (read_pos % subbuf_size + count + padding == subbuf_size)
+		end_pos = (read_subbuf + 1) * subbuf_size;
+	else
+		end_pos = read_pos + count;
+	if (end_pos >= subbuf_size * n_subbufs)
+		end_pos = 0;
+
+	return end_pos;
+}
+
+/**
+ *	relayfs_read - read file op for relayfs files
+ *	@filp: the file
+ *	@buffer: the userspace buffer
+ *	@count: number of bytes to read
+ *	@ppos: position to read from
+ *
+ *	Reads count bytes or the number of bytes available in the
+ *	current sub-buffer being read, whichever is smaller.
+ */
+static ssize_t relayfs_read(struct file *filp,
+			    char __user *buffer,
+			    size_t count,
+			    loff_t *ppos)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+	size_t read_start, avail;
+	ssize_t ret = 0;
+	void *from;
+
+	down(&inode->i_sem);
+	if(!relayfs_read_avail(buf, *ppos))
+		goto out;
+
+	read_start = relayfs_read_start_pos(*ppos, buf);
+	avail = relayfs_read_subbuf_avail(read_start, buf);
+	if (!avail)
+		goto out;
+
+	from = buf->start + read_start;
+	ret = count = min(count, avail);
+	if (copy_to_user(buffer, from, count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	relayfs_read_consume(buf, read_start, count);
+	*ppos = relayfs_read_end_pos(buf, read_start, count);
+out:
+	up(&inode->i_sem);
+	return ret;
+}
+
+/**
+ *	relayfs alloc_inode() implementation
+ */
+static struct inode *relayfs_alloc_inode(struct super_block *sb)
+{
+	struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
+	if (!p)
+		return NULL;
+	p->buf = NULL;
+
+	return &p->vfs_inode;
+}
+
+/**
+ *	relayfs destroy_inode() implementation
+ */
+static void relayfs_destroy_inode(struct inode *inode)
+{
+	if (RELAYFS_I(inode)->buf)
+		relay_destroy_buf(RELAYFS_I(inode)->buf);
+
+	kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
+}
+
+static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct relayfs_inode_info *i = p;
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(&i->vfs_inode);
+}
+
+struct file_operations relayfs_file_operations = {
+	.open		= relayfs_open,
+	.poll		= relayfs_poll,
+	.mmap		= relayfs_mmap,
+	.read		= relayfs_read,
+	.llseek		= no_llseek,
+	.release	= relayfs_release,
+};
+
+static struct super_operations relayfs_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+	.alloc_inode	= relayfs_alloc_inode,
+	.destroy_inode	= relayfs_destroy_inode,
+};
+
+static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+	int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = RELAYFS_MAGIC;
+	sb->s_op = &relayfs_ops;
+	inode = relayfs_get_inode(sb, mode, NULL);
+
+	if (!inode)
+		return -ENOMEM;
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = root;
+
+	return 0;
+}
+
+static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
+					   int flags, const char *dev_name,
+					   void *data)
+{
+	return get_sb_single(fs_type, flags, data, relayfs_fill_super);
+}
+
+static struct file_system_type relayfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "relayfs",
+	.get_sb		= relayfs_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static int __init init_relayfs_fs(void)
+{
+	int err;
+
+	relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
+				sizeof(struct relayfs_inode_info), 0,
+				0, init_once, NULL);
+	if (!relayfs_inode_cachep)
+		return -ENOMEM;
+
+	err = register_filesystem(&relayfs_fs_type);
+	if (err)
+		kmem_cache_destroy(relayfs_inode_cachep);
+
+	return err;
+}
+
+static void __exit exit_relayfs_fs(void)
+{
+	unregister_filesystem(&relayfs_fs_type);
+	kmem_cache_destroy(relayfs_inode_cachep);
+}
+
+module_init(init_relayfs_fs)
+module_exit(exit_relayfs_fs)
+
+EXPORT_SYMBOL_GPL(relayfs_file_operations);
+EXPORT_SYMBOL_GPL(relayfs_create_dir);
+EXPORT_SYMBOL_GPL(relayfs_remove_dir);
+
+MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
+MODULE_DESCRIPTION("Relay Filesystem");
+MODULE_LICENSE("GPL");
+
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
new file mode 100644
index 00000000000..16446a15c96
--- /dev/null
+++ b/fs/relayfs/relay.c
@@ -0,0 +1,431 @@
+/*
+ * Public API and common code for RelayFS.
+ *
+ * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
+ *
+ * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/relayfs_fs.h>
+#include "relay.h"
+#include "buffers.h"
+
+/**
+ *	relay_buf_empty - boolean, is the channel buffer empty?
+ *	@buf: channel buffer
+ *
+ *	Returns 1 if the buffer is empty, 0 otherwise.
+ */
+int relay_buf_empty(struct rchan_buf *buf)
+{
+	return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
+}
+
+/**
+ *	relay_buf_full - boolean, is the channel buffer full?
+ *	@buf: channel buffer
+ *
+ *	Returns 1 if the buffer is full, 0 otherwise.
+ */
+int relay_buf_full(struct rchan_buf *buf)
+{
+	size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
+	return (ready >= buf->chan->n_subbufs) ? 1 : 0;
+}
+
+/*
+ * High-level relayfs kernel API and associated functions.
+ */
+
+/*
+ * rchan_callback implementations defining default channel behavior.  Used
+ * in place of corresponding NULL values in client callback struct.
+ */
+
+/*
+ * subbuf_start() default callback.  Does nothing.
+ */
+static int subbuf_start_default_callback (struct rchan_buf *buf,
+					  void *subbuf,
+					  void *prev_subbuf,
+					  size_t prev_padding)
+{
+	if (relay_buf_full(buf))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * buf_mapped() default callback.  Does nothing.
+ */
+static void buf_mapped_default_callback(struct rchan_buf *buf,
+					struct file *filp)
+{
+}
+
+/*
+ * buf_unmapped() default callback.  Does nothing.
+ */
+static void buf_unmapped_default_callback(struct rchan_buf *buf,
+					  struct file *filp)
+{
+}
+
+/* relay channel default callbacks */
+static struct rchan_callbacks default_channel_callbacks = {
+	.subbuf_start = subbuf_start_default_callback,
+	.buf_mapped = buf_mapped_default_callback,
+	.buf_unmapped = buf_unmapped_default_callback,
+};
+
+/**
+ *	wakeup_readers - wake up readers waiting on a channel
+ *	@private: the channel buffer
+ *
+ *	This is the work function used to defer reader waking.  The
+ *	reason waking is deferred is that calling directly from write
+ *	causes problems if you're writing from say the scheduler.
+ */
+static void wakeup_readers(void *private)
+{
+	struct rchan_buf *buf = private;
+	wake_up_interruptible(&buf->read_wait);
+}
+
+/**
+ *	__relay_reset - reset a channel buffer
+ *	@buf: the channel buffer
+ *	@init: 1 if this is a first-time initialization
+ *
+ *	See relay_reset for description of effect.
+ */
+static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
+{
+	size_t i;
+
+	if (init) {
+		init_waitqueue_head(&buf->read_wait);
+		kref_init(&buf->kref);
+		INIT_WORK(&buf->wake_readers, NULL, NULL);
+	} else {
+		cancel_delayed_work(&buf->wake_readers);
+		flush_scheduled_work();
+	}
+
+	buf->subbufs_produced = 0;
+	buf->subbufs_consumed = 0;
+	buf->bytes_consumed = 0;
+	buf->finalized = 0;
+	buf->data = buf->start;
+	buf->offset = 0;
+
+	for (i = 0; i < buf->chan->n_subbufs; i++)
+		buf->padding[i] = 0;
+
+	buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
+}
+
+/**
+ *	relay_reset - reset the channel
+ *	@chan: the channel
+ *
+ *	This has the effect of erasing all data from all channel buffers
+ *	and restarting the channel in its initial state.  The buffers
+ *	are not freed, so any mappings are still in effect.
+ *
+ *	NOTE: Care should be taken that the channel isn't actually
+ *	being used by anything when this call is made.
+ */
+void relay_reset(struct rchan *chan)
+{
+	unsigned int i;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i])
+			continue;
+		__relay_reset(chan->buf[i], 0);
+	}
+}
+
+/**
+ *	relay_open_buf - create a new channel buffer in relayfs
+ *
+ *	Internal - used by relay_open().
+ */
+static struct rchan_buf *relay_open_buf(struct rchan *chan,
+					const char *filename,
+					struct dentry *parent)
+{
+	struct rchan_buf *buf;
+	struct dentry *dentry;
+
+	/* Create file in fs */
+	dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
+	if (!dentry)
+		return NULL;
+
+	buf = RELAYFS_I(dentry->d_inode)->buf;
+	buf->dentry = dentry;
+	__relay_reset(buf, 1);
+
+	return buf;
+}
+
+/**
+ *	relay_close_buf - close a channel buffer
+ *	@buf: channel buffer
+ *
+ *	Marks the buffer finalized and restores the default callbacks.
+ *	The channel buffer and channel buffer data structure are then freed
+ *	automatically when the last reference is given up.
+ */
+static inline void relay_close_buf(struct rchan_buf *buf)
+{
+	buf->finalized = 1;
+	buf->chan->cb = &default_channel_callbacks;
+	cancel_delayed_work(&buf->wake_readers);
+	flush_scheduled_work();
+	kref_put(&buf->kref, relay_remove_buf);
+}
+
+static inline void setup_callbacks(struct rchan *chan,
+				   struct rchan_callbacks *cb)
+{
+	if (!cb) {
+		chan->cb = &default_channel_callbacks;
+		return;
+	}
+
+	if (!cb->subbuf_start)
+		cb->subbuf_start = subbuf_start_default_callback;
+	if (!cb->buf_mapped)
+		cb->buf_mapped = buf_mapped_default_callback;
+	if (!cb->buf_unmapped)
+		cb->buf_unmapped = buf_unmapped_default_callback;
+	chan->cb = cb;
+}
+
+/**
+ *	relay_open - create a new relayfs channel
+ *	@base_filename: base name of files to create
+ *	@parent: dentry of parent directory, NULL for root directory
+ *	@subbuf_size: size of sub-buffers
+ *	@n_subbufs: number of sub-buffers
+ *	@cb: client callback functions
+ *
+ *	Returns channel pointer if successful, NULL otherwise.
+ *
+ *	Creates a channel buffer for each cpu using the sizes and
+ *	attributes specified.  The created channel buffer files
+ *	will be named base_filename0...base_filenameN-1.  File
+ *	permissions will be S_IRUSR.
+ */
+struct rchan *relay_open(const char *base_filename,
+			 struct dentry *parent,
+			 size_t subbuf_size,
+			 size_t n_subbufs,
+			 struct rchan_callbacks *cb)
+{
+	unsigned int i;
+	struct rchan *chan;
+	char *tmpname;
+
+	if (!base_filename)
+		return NULL;
+
+	if (!(subbuf_size && n_subbufs))
+		return NULL;
+
+	chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
+	if (!chan)
+		return NULL;
+
+	chan->version = RELAYFS_CHANNEL_VERSION;
+	chan->n_subbufs = n_subbufs;
+	chan->subbuf_size = subbuf_size;
+	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
+	setup_callbacks(chan, cb);
+	kref_init(&chan->kref);
+
+	tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+	if (!tmpname)
+		goto free_chan;
+
+	for_each_online_cpu(i) {
+		sprintf(tmpname, "%s%d", base_filename, i);
+		chan->buf[i] = relay_open_buf(chan, tmpname, parent);
+		chan->buf[i]->cpu = i;
+		if (!chan->buf[i])
+			goto free_bufs;
+	}
+
+	kfree(tmpname);
+	return chan;
+
+free_bufs:
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i])
+			break;
+		relay_close_buf(chan->buf[i]);
+	}
+	kfree(tmpname);
+
+free_chan:
+	kref_put(&chan->kref, relay_destroy_channel);
+	return NULL;
+}
+
+/**
+ *	relay_switch_subbuf - switch to a new sub-buffer
+ *	@buf: channel buffer
+ *	@length: size of current event
+ *
+ *	Returns either the length passed in or 0 if full.
+
+ *	Performs sub-buffer-switch tasks such as invoking callbacks,
+ *	updating padding counts, waking up readers, etc.
+ */
+size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
+{
+	void *old, *new;
+	size_t old_subbuf, new_subbuf;
+
+	if (unlikely(length > buf->chan->subbuf_size))
+		goto toobig;
+
+	if (buf->offset != buf->chan->subbuf_size + 1) {
+		buf->prev_padding = buf->chan->subbuf_size - buf->offset;
+		old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
+		buf->padding[old_subbuf] = buf->prev_padding;
+		buf->subbufs_produced++;
+		if (waitqueue_active(&buf->read_wait)) {
+			PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
+			schedule_delayed_work(&buf->wake_readers, 1);
+		}
+	}
+
+	old = buf->data;
+	new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
+	new = buf->start + new_subbuf * buf->chan->subbuf_size;
+	buf->offset = 0;
+	if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
+		buf->offset = buf->chan->subbuf_size + 1;
+		return 0;
+	}
+	buf->data = new;
+	buf->padding[new_subbuf] = 0;
+
+	if (unlikely(length + buf->offset > buf->chan->subbuf_size))
+		goto toobig;
+
+	return length;
+
+toobig:
+	printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length);
+	WARN_ON(1);
+	return 0;
+}
+
+/**
+ *	relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
+ *	@chan: the channel
+ *	@cpu: the cpu associated with the channel buffer to update
+ *	@subbufs_consumed: number of sub-buffers to add to current buf's count
+ *
+ *	Adds to the channel buffer's consumed sub-buffer count.
+ *	subbufs_consumed should be the number of sub-buffers newly consumed,
+ *	not the total consumed.
+ *
+ *	NOTE: kernel clients don't need to call this function if the channel
+ *	mode is 'overwrite'.
+ */
+void relay_subbufs_consumed(struct rchan *chan,
+			    unsigned int cpu,
+			    size_t subbufs_consumed)
+{
+	struct rchan_buf *buf;
+
+	if (!chan)
+		return;
+
+	if (cpu >= NR_CPUS || !chan->buf[cpu])
+		return;
+
+	buf = chan->buf[cpu];
+	buf->subbufs_consumed += subbufs_consumed;
+	if (buf->subbufs_consumed > buf->subbufs_produced)
+		buf->subbufs_consumed = buf->subbufs_produced;
+}
+
+/**
+ *	relay_destroy_channel - free the channel struct
+ *
+ *	Should only be called from kref_put().
+ */
+void relay_destroy_channel(struct kref *kref)
+{
+	struct rchan *chan = container_of(kref, struct rchan, kref);
+	kfree(chan);
+}
+
+/**
+ *	relay_close - close the channel
+ *	@chan: the channel
+ *
+ *	Closes all channel buffers and frees the channel.
+ */
+void relay_close(struct rchan *chan)
+{
+	unsigned int i;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i])
+			continue;
+		relay_close_buf(chan->buf[i]);
+	}
+
+	kref_put(&chan->kref, relay_destroy_channel);
+}
+
+/**
+ *	relay_flush - close the channel
+ *	@chan: the channel
+ *
+ *	Flushes all channel buffers i.e. forces buffer switch.
+ */
+void relay_flush(struct rchan *chan)
+{
+	unsigned int i;
+
+	if (!chan)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!chan->buf[i])
+			continue;
+		relay_switch_subbuf(chan->buf[i], 0);
+	}
+}
+
+EXPORT_SYMBOL_GPL(relay_open);
+EXPORT_SYMBOL_GPL(relay_close);
+EXPORT_SYMBOL_GPL(relay_flush);
+EXPORT_SYMBOL_GPL(relay_reset);
+EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
+EXPORT_SYMBOL_GPL(relay_switch_subbuf);
+EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
new file mode 100644
index 00000000000..703503fa22b
--- /dev/null
+++ b/fs/relayfs/relay.h
@@ -0,0 +1,12 @@
+#ifndef _RELAY_H
+#define _RELAY_H
+
+struct dentry *relayfs_create_file(const char *name,
+				   struct dentry *parent,
+				   int mode,
+				   struct rchan *chan);
+extern int relayfs_remove(struct dentry *dentry);
+extern int relay_buf_empty(struct rchan_buf *buf);
+extern void relay_destroy_channel(struct kref *kref);
+
+#endif /* _RELAY_H */
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0..f10a10317d5 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
 	unsigned long *open_fds;
 	unsigned long set;
 	int max;
+	struct fdtable *fdt;
 
 	/* handle last in-complete long-word first */
 	set = ~(~0UL << (n & (__NFDBITS-1)));
 	n /= __NFDBITS;
-	open_fds = current->files->open_fds->fds_bits+n;
+	fdt = files_fdtable(current->files);
+	open_fds = fdt->open_fds->fds_bits+n;
 	max = 0;
 	if (set) {
 		set &= BITS(fds, n);
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
 	int retval, i;
 	long __timeout = *timeout;
 
- 	spin_lock(&current->files->file_lock);
+	rcu_read_lock();
 	retval = max_select_fd(n, fds);
-	spin_unlock(&current->files->file_lock);
+	rcu_read_unlock();
 
 	if (retval < 0)
 		return retval;
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 	char *bits;
 	long timeout;
 	int ret, size, max_fdset;
+	struct fdtable *fdt;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
-	max_fdset = current->files->max_fdset;
+	rcu_read_lock();
+	fdt = files_fdtable(current->files);
+	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
 	if (n > max_fdset)
 		n = max_fdset;
 
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
  	unsigned int i;
 	struct poll_list *head;
  	struct poll_list *walk;
+	struct fdtable *fdt;
+	int max_fdset;
 
 	/* Do a sanity check on nfds ... */
-	if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
+	rcu_read_lock();
+	fdt = files_fdtable(current->files);
+	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
+	if (nfds > max_fdset && nfds > OPEN_MAX)
 		return -EINVAL;
 
 	if (timeout) {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd..10b994428fe 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
 smb_delete_inode(struct inode *ino)
 {
 	DEBUG1("ino=%ld\n", ino->i_ino);
+	truncate_inode_pages(&ino->i_data, 0);
 	lock_kernel();
 	if (smb_close(ino))
 		PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 220babe91ef..38ab558835c 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
 		if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
 			/* a damn Win95 bug - sometimes it clags if you 
 			   ask it too fast */
-			current->state = TASK_INTERRUPTIBLE;
-			schedule_timeout(HZ/5);
+			schedule_timeout_interruptible(msecs_to_jiffies(200));
 			continue;
                 }
 
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
index 93f3cd22a2e..6815b1b12b6 100644
--- a/fs/smbfs/sock.c
+++ b/fs/smbfs/sock.c
@@ -15,12 +15,12 @@
 #include <linux/file.h>
 #include <linux/in.h>
 #include <linux/net.h>
-#include <linux/tcp.h>
 #include <linux/mm.h>
 #include <linux/netdevice.h>
 #include <linux/smp_lock.h>
 #include <linux/workqueue.h>
 #include <net/scm.h>
+#include <net/tcp_states.h>
 #include <net/ip.h>
 
 #include <linux/smb_fs.h>
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index 8b069e06433..0c64bc3a012 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -34,7 +34,7 @@ int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
 	return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
 }
 
-static int smb_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char *link = __getname();
 	DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
@@ -52,10 +52,10 @@ static int smb_follow_link(struct dentry *dentry, struct nameidata *nd)
 		}
 	}
 	nd_set_link(nd, link);
-	return 0;
+	return NULL;
 }
 
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd)
+static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
 	char *s = nd_get_link(nd);
 	if (!IS_ERR(s))
diff --git a/fs/super.c b/fs/super.c
index 6e57ee252e1..f60155ec778 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -513,7 +513,7 @@ static void mark_files_ro(struct super_block *sb)
 	struct file *f;
 
 	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_list) {
+	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
 		if (S_ISREG(f->f_dentry->d_inode->i_mode) && file_count(f))
 			f->f_mode &= ~FMODE_WRITE;
 	}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d727dc96063..970a33f0329 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -228,6 +228,10 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 	struct sysfs_dirent * sd;
 	struct sysfs_dirent * parent_sd = dir->d_fsdata;
 
+	if (dir->d_inode == NULL)
+		/* no inode means this hasn't been made visible yet */
+		return;
+
 	down(&dir->d_inode->i_sem);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index fae57c83a72..de402fa915f 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -151,17 +151,17 @@ static int sysfs_getlink(struct dentry *dentry, char * path)
 
 }
 
-static int sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	int error = -ENOMEM;
 	unsigned long page = get_zeroed_page(GFP_KERNEL);
 	if (page)
 		error = sysfs_getlink(dentry, (char *) page); 
 	nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
-	return 0;
+	return NULL;
 }
 
-static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd)
+static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
 	char *page = nd_get_link(nd);
 	if (!IS_ERR(page))
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd..fa33eceb001 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
 
 static void sysv_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	sysv_truncate(inode);
 	lock_kernel();
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
index ed637db2dcb..b85ce61d635 100644
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -8,10 +8,10 @@
 #include "sysv.h"
 #include <linux/namei.h>
 
-static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations sysv_fast_symlink_inode_operations = {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad..b83890beaaa 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
  */
 void udf_delete_inode(struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 997640c99c7..faf1512173e 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,8 +114,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 	sb->s_dirt = 1;
@@ -200,8 +199,7 @@ do_more:
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 
@@ -459,8 +457,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 	sb->s_dirt = 1;
@@ -585,8 +582,7 @@ succed:
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 	sb->s_dirt = 1;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 61a6b1542fc..0938945b9cb 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -124,8 +124,7 @@ void ufs_free_inode (struct inode * inode)
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 	
@@ -249,8 +248,7 @@ cg_found:
 	ubh_mark_buffer_dirty (USPI_UBH);
 	ubh_mark_buffer_dirty (UCPI_UBH);
 	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_wait_on_buffer (UCPI_UBH);
-		ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
+		ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
 		ubh_wait_on_buffer (UCPI_UBH);
 	}
 	sb->s_dirt = 1;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5..55f4aa16e3f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
 
 void ufs_delete_inode (struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
 	lock_kernel();
 	mark_inode_dirty(inode);
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index a0e49149098..337512ed578 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -29,11 +29,11 @@
 #include <linux/namei.h>
 #include <linux/ufs_fs.h>
 
-static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct ufs_inode_info *p = UFS_I(dentry->d_inode);
 	nd_set_link(nd, (char*)p->i_u1.i_symlink);
-	return 0;
+	return NULL;
 }
 
 struct inode_operations ufs_fast_symlink_inode_operations = {
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e312bf8bad9..61d2e35012a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -285,8 +285,7 @@ next:;
 		}
 	}
 	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
-		ubh_wait_on_buffer (ind_ubh);
-		ubh_ll_rw_block (WRITE, 1, &ind_ubh);
+		ubh_ll_rw_block (SWRITE, 1, &ind_ubh);
 		ubh_wait_on_buffer (ind_ubh);
 	}
 	ubh_brelse (ind_ubh);
@@ -353,8 +352,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
 		}
 	}
 	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
-		ubh_wait_on_buffer (dind_bh);
-		ubh_ll_rw_block (WRITE, 1, &dind_bh);
+		ubh_ll_rw_block (SWRITE, 1, &dind_bh);
 		ubh_wait_on_buffer (dind_bh);
 	}
 	ubh_brelse (dind_bh);
@@ -418,8 +416,7 @@ static int ufs_trunc_tindirect (struct inode * inode)
 		}
 	}
 	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
-		ubh_wait_on_buffer (tind_bh);
-		ubh_ll_rw_block (WRITE, 1, &tind_bh);
+		ubh_ll_rw_block (SWRITE, 1, &tind_bh);
 		ubh_wait_on_buffer (tind_bh);
 	}
 	ubh_brelse (tind_bh);
diff --git a/fs/umsdos/notes b/fs/umsdos/notes
deleted file mode 100644
index 3c47d1f4fc4..00000000000
--- a/fs/umsdos/notes
+++ /dev/null
@@ -1,17 +0,0 @@
-This file contain idea and things I don't want to forget
-
-Possible bug in fs/read_write.c
-Function sys_readdir()
-
-	There is a call the verify_area that does not take in account
-	the count parameter. I guess it should read
-
-	error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent));
-
-	instead of
-
-	error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent));
-
-	Of course, now , count is always 1
-
-
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 1c6f6b57ef1..ef46939c0c1 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,8 +621,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
 	}
 
 	/* build the entry of long file name */
-	for (cksum = i = 0; i < 11; i++)
-		cksum = (((cksum&1)<<7)|((cksum&0xfe)>>1)) + msdos_name[i];
+	cksum = fat_checksum(msdos_name);
 
 	*nr_slots = usize / 13;
 	for (ps = slots, i = *nr_slots; i > 0; i--, ps++) {
@@ -888,10 +887,10 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct buffer_head *dotdot_bh;
 	struct msdos_dir_entry *dotdot_de;
-	loff_t dotdot_i_pos;
 	struct inode *old_inode, *new_inode;
 	struct fat_slot_info old_sinfo, sinfo;
 	struct timespec ts;
+	loff_t dotdot_i_pos, new_i_pos;
 	int err, is_dir, update_dotdot, corrupt = 0;
 
 	old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
@@ -914,31 +913,24 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	ts = CURRENT_TIME_SEC;
 	if (new_inode) {
-		err = vfat_find(new_dir, &new_dentry->d_name, &sinfo);
-		if (err)
-			goto out;
-		if (MSDOS_I(new_inode)->i_pos != sinfo.i_pos) {
-			/* WTF??? Cry and fail. */
-			printk(KERN_WARNING "vfat_rename: fs corrupted\n");
-			goto out;
-		}
-
 		if (is_dir) {
 			err = fat_dir_empty(new_inode);
 			if (err)
 				goto out;
 		}
+		new_i_pos = MSDOS_I(new_inode)->i_pos;
 		fat_detach(new_inode);
 	} else {
 		err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0,
 				     &ts, &sinfo);
 		if (err)
 			goto out;
+		new_i_pos = sinfo.i_pos;
 	}
 	new_dir->i_version++;
 
 	fat_detach(old_inode);
-	fat_attach(old_inode, sinfo.i_pos);
+	fat_attach(old_inode, new_i_pos);
 	if (IS_DIRSYNC(new_dir)) {
 		err = fat_sync_inode(old_inode);
 		if (err)
@@ -1002,7 +994,7 @@ error_inode:
 	fat_detach(old_inode);
 	fat_attach(old_inode, old_sinfo.i_pos);
 	if (new_inode) {
-		fat_attach(new_inode, sinfo.i_pos);
+		fat_attach(new_inode, new_i_pos);
 		if (corrupt)
 			corrupt |= fat_sync_inode(new_inode);
 	} else {
diff --git a/fs/xattr.c b/fs/xattr.c
index 6acd5c63da9..f6e00c0e114 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -51,20 +51,29 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
 		}
 	}
 
+	down(&d->d_inode->i_sem);
+	error = security_inode_setxattr(d, kname, kvalue, size, flags);
+	if (error)
+		goto out;
 	error = -EOPNOTSUPP;
 	if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
-		down(&d->d_inode->i_sem);
-		error = security_inode_setxattr(d, kname, kvalue, size, flags);
-		if (error)
-			goto out;
-		error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
+		error = d->d_inode->i_op->setxattr(d, kname, kvalue,
+						   size, flags);
 		if (!error) {
 			fsnotify_xattr(d);
-			security_inode_post_setxattr(d, kname, kvalue, size, flags);
+			security_inode_post_setxattr(d, kname, kvalue,
+						     size, flags);
 		}
-out:
-		up(&d->d_inode->i_sem);
+	} else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
+			    sizeof XATTR_SECURITY_PREFIX - 1)) {
+		const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
+		error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
+						   size, flags);
+		if (!error)
+			fsnotify_xattr(d);
 	}
+out:
+	up(&d->d_inode->i_sem);
 	if (kvalue)
 		kfree(kvalue);
 	return error;
@@ -134,25 +143,34 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
 	if (size) {
 		if (size > XATTR_SIZE_MAX)
 			size = XATTR_SIZE_MAX;
-		kvalue = kmalloc(size, GFP_KERNEL);
+		kvalue = kzalloc(size, GFP_KERNEL);
 		if (!kvalue)
 			return -ENOMEM;
 	}
 
+	error = security_inode_getxattr(d, kname);
+	if (error)
+		goto out;
 	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op && d->d_inode->i_op->getxattr) {
-		error = security_inode_getxattr(d, kname);
-		if (error)
-			goto out;
+	if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
 		error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
-		if (error > 0) {
-			if (size && copy_to_user(value, kvalue, error))
-				error = -EFAULT;
-		} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
-			/* The file system tried to returned a value bigger
-			   than XATTR_SIZE_MAX bytes. Not possible. */
-			error = -E2BIG;
-		}
+
+	if (!strncmp(kname, XATTR_SECURITY_PREFIX,
+		     sizeof XATTR_SECURITY_PREFIX - 1)) {
+		const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
+		int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue,
+						    size, error);
+		/* Security module active: overwrite error value */
+		if (rv != -EOPNOTSUPP)
+			error = rv;
+	}
+	if (error > 0) {
+		if (size && copy_to_user(value, kvalue, error))
+			error = -EFAULT;
+	} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+		/* The file system tried to returned a value bigger
+		   than XATTR_SIZE_MAX bytes. Not possible. */
+		error = -E2BIG;
 	}
 out:
 	if (kvalue)
@@ -221,20 +239,24 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 			return -ENOMEM;
 	}
 
+	error = security_inode_listxattr(d);
+	if (error)
+		goto out;
 	error = -EOPNOTSUPP;
 	if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
-		error = security_inode_listxattr(d);
-		if (error)
-			goto out;
 		error = d->d_inode->i_op->listxattr(d, klist, size);
-		if (error > 0) {
-			if (size && copy_to_user(list, klist, error))
-				error = -EFAULT;
-		} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
-			/* The file system tried to returned a list bigger
-			   than XATTR_LIST_MAX bytes. Not possible. */
-			error = -E2BIG;
-		}
+	} else {
+		error = security_inode_listsecurity(d->d_inode, klist, size);
+		if (size && error >= size)
+			error = -ERANGE;
+	}
+	if (error > 0) {
+		if (size && copy_to_user(list, klist, error))
+			error = -EFAULT;
+	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
+		/* The file system tried to returned a list bigger
+		   than XATTR_LIST_MAX bytes. Not possible. */
+		error = -E2BIG;
 	}
 out:
 	if (klist)
@@ -307,6 +329,8 @@ removexattr(struct dentry *d, char __user *name)
 		down(&d->d_inode->i_sem);
 		error = d->d_inode->i_op->removexattr(d, kname);
 		up(&d->d_inode->i_sem);
+		if (!error)
+			fsnotify_xattr(d);
 	}
 out:
 	return error;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index c92306f0fdc..bac27d66151 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,3 @@
-menu "XFS support"
-
 config XFS_FS
 	tristate "XFS filesystem support"
 	select EXPORTFS if NFSD!=n
@@ -22,27 +20,11 @@ config XFS_FS
 
 config XFS_EXPORT
 	bool
-	default y if XFS_FS && EXPORTFS
-
-config XFS_RT
-	bool "Realtime support (EXPERIMENTAL)"
-	depends on XFS_FS && EXPERIMENTAL
-	help
-	  If you say Y here you will be able to mount and use XFS filesystems
-	  which contain a realtime subvolume. The realtime subvolume is a
-	  separate area of disk space where only file data is stored. The
-	  realtime subvolume is designed to provide very deterministic
-	  data rates suitable for media streaming applications.
-
-	  See the xfs man page in section 5 for a bit more information.
-
-	  This feature is unsupported at this time, is not yet fully
-	  functional, and may cause serious problems.
-
-	  If unsure, say N.
+	depends on XFS_FS && EXPORTFS
+	default y
 
 config XFS_QUOTA
-	bool "Quota support"
+	bool "XFS Quota support"
 	depends on XFS_FS
 	help
 	  If you say Y here, you will be able to set limits for disk usage on
@@ -59,7 +41,7 @@ config XFS_QUOTA
 	  they are completely independent subsystems.
 
 config XFS_SECURITY
-	bool "Security Label support"
+	bool "XFS Security Label support"
 	depends on XFS_FS
 	help
 	  Security labels support alternative access control models
@@ -71,7 +53,7 @@ config XFS_SECURITY
 	  extended attributes for inode security labels, say N.
 
 config XFS_POSIX_ACL
-	bool "POSIX ACL support"
+	bool "XFS POSIX ACL support"
 	depends on XFS_FS
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
@@ -82,4 +64,19 @@ config XFS_POSIX_ACL
 
 	  If you don't know what Access Control Lists are, say N.
 
-endmenu
+config XFS_RT
+	bool "XFS Realtime support (EXPERIMENTAL)"
+	depends on XFS_FS && EXPERIMENTAL
+	help
+	  If you say Y here you will be able to mount and use XFS filesystems
+	  which contain a realtime subvolume. The realtime subvolume is a
+	  separate area of disk space where only file data is stored. The
+	  realtime subvolume is designed to provide very deterministic
+	  data rates suitable for media streaming applications.
+
+	  See the xfs man page in section 5 for a bit more information.
+
+	  This feature is unsupported at this time, is not yet fully
+	  functional, and may cause serious problems.
+
+	  If unsure, say N.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d3ff7835463..49e3e7e5e3d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -1,150 +1 @@
-#
-# Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like.  Any license provided herein, whether implied or
-# otherwise, applies only to this software file.  Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write the Free Software Foundation, Inc., 59
-# Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
-# Mountain View, CA  94043, or:
-#
-# http://www.sgi.com
-#
-# For further information regarding this notice, see:
-#
-# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
-#
-
-EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
-
-ifeq ($(CONFIG_XFS_DEBUG),y)
-	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-	EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
-	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
-	EXTRA_CFLAGS += -DXFS_ATTR_TRACE
-	EXTRA_CFLAGS += -DXFS_BLI_TRACE
-	EXTRA_CFLAGS += -DXFS_BMAP_TRACE
-	EXTRA_CFLAGS += -DXFS_BMBT_TRACE
-	EXTRA_CFLAGS += -DXFS_DIR_TRACE
-	EXTRA_CFLAGS += -DXFS_DIR2_TRACE
-	EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
-	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
-	EXTRA_CFLAGS += -DXFS_LOG_TRACE
-	EXTRA_CFLAGS += -DXFS_RW_TRACE
-	EXTRA_CFLAGS += -DPAGEBUF_TRACE
-	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
-endif
-
-obj-$(CONFIG_XFS_FS)		+= xfs.o
-
-xfs-$(CONFIG_XFS_QUOTA)		+= $(addprefix quota/, \
-				   xfs_dquot.o \
-				   xfs_dquot_item.o \
-				   xfs_trans_dquot.o \
-				   xfs_qm_syscalls.o \
-				   xfs_qm_bhv.o \
-				   xfs_qm.o)
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
-xfs-$(CONFIG_PROC_FS)		+= linux-2.6/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)		+= linux-2.6/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)		+= linux-2.6/xfs_ioctl32.o
-xfs-$(CONFIG_XFS_EXPORT)	+= linux-2.6/xfs_export.o
-
-
-xfs-y				+= xfs_alloc.o \
-				   xfs_alloc_btree.o \
-				   xfs_attr.o \
-				   xfs_attr_leaf.o \
-				   xfs_behavior.o \
-				   xfs_bit.o \
-				   xfs_bmap.o \
-				   xfs_bmap_btree.o \
-				   xfs_btree.o \
-				   xfs_buf_item.o \
-				   xfs_da_btree.o \
-				   xfs_dir.o \
-				   xfs_dir2.o \
-				   xfs_dir2_block.o \
-				   xfs_dir2_data.o \
-				   xfs_dir2_leaf.o \
-				   xfs_dir2_node.o \
-				   xfs_dir2_sf.o \
-				   xfs_dir_leaf.o \
-				   xfs_error.o \
-				   xfs_extfree_item.o \
-				   xfs_fsops.o \
-				   xfs_ialloc.o \
-				   xfs_ialloc_btree.o \
-				   xfs_iget.o \
-				   xfs_inode.o \
-				   xfs_inode_item.o \
-				   xfs_iocore.o \
-				   xfs_iomap.o \
-				   xfs_itable.o \
-				   xfs_dfrag.o \
-				   xfs_log.o \
-				   xfs_log_recover.o \
-				   xfs_macros.o \
-				   xfs_mount.o \
-				   xfs_rename.o \
-				   xfs_trans.o \
-				   xfs_trans_ail.o \
-				   xfs_trans_buf.o \
-				   xfs_trans_extfree.o \
-				   xfs_trans_inode.o \
-				   xfs_trans_item.o \
-				   xfs_utils.o \
-				   xfs_vfsops.o \
-				   xfs_vnodeops.o \
-				   xfs_rw.o \
-				   xfs_dmops.o \
-				   xfs_qmops.o
-
-xfs-$(CONFIG_XFS_TRACE)		+= xfs_dir2_trace.o
-
-# Objects in linux-2.6/
-xfs-y				+= $(addprefix linux-2.6/, \
-				   kmem.o \
-				   xfs_aops.o \
-				   xfs_buf.o \
-				   xfs_file.o \
-				   xfs_fs_subr.o \
-				   xfs_globals.o \
-				   xfs_ioctl.o \
-				   xfs_iops.o \
-				   xfs_lrw.o \
-				   xfs_super.o \
-				   xfs_vfs.o \
-				   xfs_vnode.o)
-
-# Objects in support/
-xfs-y				+= $(addprefix support/, \
-				   debug.o \
-				   move.o \
-				   qsort.o \
-				   uuid.o)
-
-xfs-$(CONFIG_XFS_TRACE)		+= support/ktrace.o
-
+include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL)
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
new file mode 100644
index 00000000000..97bd4743b46
--- /dev/null
+++ b/fs/xfs/Makefile-linux-2.6
@@ -0,0 +1,151 @@
+#
+# Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+#
+# http://www.sgi.com
+#
+# For further information regarding this notice, see:
+#
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
+
+XFS_LINUX := linux-2.6
+
+ifeq ($(CONFIG_XFS_DEBUG),y)
+	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
+	EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
+endif
+ifeq ($(CONFIG_XFS_TRACE),y)
+	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
+	EXTRA_CFLAGS += -DXFS_ATTR_TRACE
+	EXTRA_CFLAGS += -DXFS_BLI_TRACE
+	EXTRA_CFLAGS += -DXFS_BMAP_TRACE
+	EXTRA_CFLAGS += -DXFS_BMBT_TRACE
+	EXTRA_CFLAGS += -DXFS_DIR_TRACE
+	EXTRA_CFLAGS += -DXFS_DIR2_TRACE
+	EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
+	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
+	EXTRA_CFLAGS += -DXFS_LOG_TRACE
+	EXTRA_CFLAGS += -DXFS_RW_TRACE
+	EXTRA_CFLAGS += -DPAGEBUF_TRACE
+	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+endif
+
+obj-$(CONFIG_XFS_FS)		+= xfs.o
+
+xfs-$(CONFIG_XFS_QUOTA)		+= $(addprefix quota/, \
+				   xfs_dquot.o \
+				   xfs_dquot_item.o \
+				   xfs_trans_dquot.o \
+				   xfs_qm_syscalls.o \
+				   xfs_qm_bhv.o \
+				   xfs_qm.o)
+
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
+endif
+
+xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
+xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
+xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
+xfs-$(CONFIG_XFS_EXPORT)	+= $(XFS_LINUX)/xfs_export.o
+
+
+xfs-y				+= xfs_alloc.o \
+				   xfs_alloc_btree.o \
+				   xfs_attr.o \
+				   xfs_attr_leaf.o \
+				   xfs_behavior.o \
+				   xfs_bit.o \
+				   xfs_bmap.o \
+				   xfs_bmap_btree.o \
+				   xfs_btree.o \
+				   xfs_buf_item.o \
+				   xfs_da_btree.o \
+				   xfs_dir.o \
+				   xfs_dir2.o \
+				   xfs_dir2_block.o \
+				   xfs_dir2_data.o \
+				   xfs_dir2_leaf.o \
+				   xfs_dir2_node.o \
+				   xfs_dir2_sf.o \
+				   xfs_dir_leaf.o \
+				   xfs_error.o \
+				   xfs_extfree_item.o \
+				   xfs_fsops.o \
+				   xfs_ialloc.o \
+				   xfs_ialloc_btree.o \
+				   xfs_iget.o \
+				   xfs_inode.o \
+				   xfs_inode_item.o \
+				   xfs_iocore.o \
+				   xfs_iomap.o \
+				   xfs_itable.o \
+				   xfs_dfrag.o \
+				   xfs_log.o \
+				   xfs_log_recover.o \
+				   xfs_mount.o \
+				   xfs_rename.o \
+				   xfs_trans.o \
+				   xfs_trans_ail.o \
+				   xfs_trans_buf.o \
+				   xfs_trans_extfree.o \
+				   xfs_trans_inode.o \
+				   xfs_trans_item.o \
+				   xfs_utils.o \
+				   xfs_vfsops.o \
+				   xfs_vnodeops.o \
+				   xfs_rw.o \
+				   xfs_dmops.o \
+				   xfs_qmops.o
+
+xfs-$(CONFIG_XFS_TRACE)		+= xfs_dir2_trace.o
+
+# Objects in linux/
+xfs-y				+= $(addprefix $(XFS_LINUX)/, \
+				   kmem.o \
+				   xfs_aops.o \
+				   xfs_buf.o \
+				   xfs_file.o \
+				   xfs_fs_subr.o \
+				   xfs_globals.o \
+				   xfs_ioctl.o \
+				   xfs_iops.o \
+				   xfs_lrw.o \
+				   xfs_super.o \
+				   xfs_vfs.o \
+				   xfs_vnode.o)
+
+# Objects in support/
+xfs-y				+= $(addprefix support/, \
+				   debug.o \
+				   move.o \
+				   uuid.o)
+
+xfs-$(CONFIG_XFS_TRACE)		+= support/ktrace.o
+
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 364ea8c386b..aba7fcf881a 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -1,54 +1,37 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/blkdev.h>
-
 #include "time.h"
 #include "kmem.h"
 
 #define MAX_VMALLOCS	6
 #define MAX_SLAB_SIZE	0x20000
 
-
 void *
-kmem_alloc(size_t size, int flags)
+kmem_alloc(size_t size, unsigned int __nocast flags)
 {
 	int	retries = 0;
-	int	lflags = kmem_flags_convert(flags);
+	gfp_t	lflags = kmem_flags_convert(flags);
 	void	*ptr;
 
 	do {
@@ -67,7 +50,7 @@ kmem_alloc(size_t size, int flags)
 }
 
 void *
-kmem_zalloc(size_t size, int flags)
+kmem_zalloc(size_t size, unsigned int __nocast flags)
 {
 	void	*ptr;
 
@@ -89,7 +72,8 @@ kmem_free(void *ptr, size_t size)
 }
 
 void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+	     unsigned int __nocast flags)
 {
 	void	*new;
 
@@ -104,10 +88,10 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
 }
 
 void *
-kmem_zone_alloc(kmem_zone_t *zone, int flags)
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
 	int	retries = 0;
-	int	lflags = kmem_flags_convert(flags);
+	gfp_t	lflags = kmem_flags_convert(flags);
 	void	*ptr;
 
 	do {
@@ -123,7 +107,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags)
 }
 
 void *
-kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
 	void	*ptr;
 
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 1397b669b05..8f82c1a20dc 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_KMEM_H__
 #define __XFS_SUPPORT_KMEM_H__
@@ -39,10 +25,10 @@
 /*
  * memory management routines
  */
-#define KM_SLEEP	0x0001
-#define KM_NOSLEEP	0x0002
-#define KM_NOFS		0x0004
-#define KM_MAYFAIL	0x0008
+#define KM_SLEEP	0x0001u
+#define KM_NOSLEEP	0x0002u
+#define KM_NOFS		0x0004u
+#define KM_MAYFAIL	0x0008u
 
 #define	kmem_zone	kmem_cache_s
 #define kmem_zone_t	kmem_cache_t
@@ -81,9 +67,9 @@ typedef unsigned long xfs_pflags_t;
 	*(NSTATEP) = *(OSTATEP);	\
 } while (0)
 
-static __inline unsigned int kmem_flags_convert(int flags)
+static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
 {
-	int	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
+	gfp_t	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
 
 #ifdef DEBUG
 	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -102,7 +88,7 @@ static __inline unsigned int kmem_flags_convert(int flags)
 		if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
 			lflags &= ~__GFP_FS;
 	}
-        
+
         return lflags;
 }
 
@@ -125,16 +111,16 @@ kmem_zone_destroy(kmem_zone_t *zone)
 		BUG();
 }
 
-extern void	    *kmem_zone_zalloc(kmem_zone_t *, int);
-extern void	    *kmem_zone_alloc(kmem_zone_t *, int);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 
-extern void	    *kmem_alloc(size_t, int);
-extern void	    *kmem_realloc(void *, size_t, size_t, int);
-extern void	    *kmem_zalloc(size_t, int);
-extern void         kmem_free(void *, size_t);
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void  kmem_free(void *, size_t);
 
 typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, unsigned int);
+typedef int (*kmem_shake_func_t)(int, gfp_t);
 
 static __inline kmem_shaker_t
 kmem_shake_register(kmem_shake_func_t sfunc)
@@ -149,7 +135,7 @@ kmem_shake_deregister(kmem_shaker_t shrinker)
 }
 
 static __inline int
-kmem_shake_allow(unsigned int gfp_mask)
+kmem_shake_allow(gfp_t gfp_mask)
 {
 	return (gfp_mask & __GFP_WAIT);
 }
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index d2c11a098ff..16b44c3c236 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_MRLOCK_H__
 #define __XFS_SUPPORT_MRLOCK_H__
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
index 0b296bb944c..ce773d89a92 100644
--- a/fs/xfs/linux-2.6/mutex.h
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_MUTEX_H__
 #define __XFS_SUPPORT_MUTEX_H__
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 30b67b4e1cb..194a84490bd 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_SEMA_H__
 #define __XFS_SUPPORT_SEMA_H__
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
index bcf60a0b8df..50a6191178f 100644
--- a/fs/xfs/linux-2.6/spin.h
+++ b/fs/xfs/linux-2.6/spin.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_SPIN_H__
 #define __XFS_SUPPORT_SPIN_H__
@@ -45,6 +31,9 @@
 typedef spinlock_t lock_t;
 
 #define SPLDECL(s)			unsigned long s
+#ifndef DEFINE_SPINLOCK
+#define DEFINE_SPINLOCK(s)		spinlock_t s = SPIN_LOCK_UNLOCKED
+#endif
 
 #define spinlock_init(lock, name)	spin_lock_init(lock)
 #define	spinlock_destroy(lock)
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 821d3167e05..9a8ad481b00 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_SV_H__
 #define __XFS_SUPPORT_SV_H__
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
index 6c6fd0faa8e..387e695a184 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/linux-2.6/time.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_TIME_H__
 #define __XFS_SUPPORT_TIME_H__
@@ -39,8 +25,7 @@ typedef struct timespec timespec_t;
 
 static inline void delay(long ticks)
 {
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(ticks);
+	schedule_timeout_uninterruptible(ticks);
 }
 
 static inline void nanotime(struct timespec *tvp)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a3a4b5aaf5d..c6108971b4e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1,39 +1,26 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_trans.h"
@@ -42,13 +29,13 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
 #include "xfs_iomap.h"
@@ -104,66 +91,114 @@ xfs_page_trace(
 #define xfs_page_trace(tag, inode, page, mask)
 #endif
 
-void
-linvfs_unwritten_done(
-	struct buffer_head	*bh,
-	int			uptodate)
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+	xfs_ioend_t		*ioend)
 {
-	xfs_buf_t		*pb = (xfs_buf_t *)bh->b_private;
+	if (atomic_dec_and_test(&ioend->io_remaining))
+		queue_work(xfsdatad_workqueue, &ioend->io_work);
+}
 
-	ASSERT(buffer_unwritten(bh));
-	bh->b_end_io = NULL;
-	clear_buffer_unwritten(bh);
-	if (!uptodate)
-		pagebuf_ioerror(pb, EIO);
-	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-		pagebuf_iodone(pb, 1, 1);
-	}
-	end_buffer_async_write(bh, uptodate);
+STATIC void
+xfs_destroy_ioend(
+	xfs_ioend_t		*ioend)
+{
+	vn_iowake(ioend->io_vnode);
+	mempool_free(ioend, xfs_ioend_pool);
 }
 
 /*
  * Issue transactions to convert a buffer range from unwritten
- * to written extents (buffered IO).
+ * to written extents.
  */
 STATIC void
-linvfs_unwritten_convert(
-	xfs_buf_t	*bp)
+xfs_end_bio_unwritten(
+	void			*data)
 {
-	vnode_t		*vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
-	int		error;
+	xfs_ioend_t		*ioend = data;
+	vnode_t			*vp = ioend->io_vnode;
+	xfs_off_t		offset = ioend->io_offset;
+	size_t			size = ioend->io_size;
+	struct buffer_head	*bh, *next;
+	int			error;
+
+	if (ioend->io_uptodate)
+		VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+
+	/* ioend->io_buffer_head is only non-NULL for buffered I/O */
+	for (bh = ioend->io_buffer_head; bh; bh = next) {
+		next = bh->b_private;
 
-	BUG_ON(atomic_read(&bp->pb_hold) < 1);
-	VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
-			BMAPI_UNWRITTEN, NULL, NULL, error);
-	XFS_BUF_SET_FSPRIVATE(bp, NULL);
-	XFS_BUF_CLR_IODONE_FUNC(bp);
-	XFS_BUF_UNDATAIO(bp);
-	iput(LINVFS_GET_IP(vp));
-	pagebuf_iodone(bp, 0, 0);
+		bh->b_end_io = NULL;
+		clear_buffer_unwritten(bh);
+		end_buffer_async_write(bh, ioend->io_uptodate);
+	}
+
+	xfs_destroy_ioend(ioend);
 }
 
 /*
- * Issue transactions to convert a buffer range from unwritten
- * to written extents (direct IO).
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
  */
-STATIC void
-linvfs_unwritten_convert_direct(
-	struct kiocb	*iocb,
-	loff_t		offset,
-	ssize_t		size,
-	void		*private)
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+	struct inode		*inode)
 {
-	struct inode	*inode = iocb->ki_filp->f_dentry->d_inode;
-	ASSERT(!private || inode == (struct inode *)private);
+	xfs_ioend_t		*ioend;
 
-	/* private indicates an unwritten extent lay beneath this IO */
-	if (private && size > 0) {
-		vnode_t	*vp = LINVFS_GET_VP(inode);
-		int	error;
+	ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
 
-		VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
-	}
+	/*
+	 * Set the count to 1 initially, which will prevent an I/O
+	 * completion callback from happening before we have started
+	 * all the I/O from calling the completion routine too early.
+	 */
+	atomic_set(&ioend->io_remaining, 1);
+	ioend->io_uptodate = 1; /* cleared if any I/O fails */
+	ioend->io_vnode = LINVFS_GET_VP(inode);
+	ioend->io_buffer_head = NULL;
+	atomic_inc(&ioend->io_vnode->v_iocount);
+	ioend->io_offset = 0;
+	ioend->io_size = 0;
+
+	INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+
+	return ioend;
+}
+
+void
+linvfs_unwritten_done(
+	struct buffer_head	*bh,
+	int			uptodate)
+{
+	xfs_ioend_t		*ioend = bh->b_private;
+	static spinlock_t	unwritten_done_lock = SPIN_LOCK_UNLOCKED;
+	unsigned long		flags;
+
+	ASSERT(buffer_unwritten(bh));
+	bh->b_end_io = NULL;
+
+	if (!uptodate)
+		ioend->io_uptodate = 0;
+
+	/*
+	 * Deep magic here.  We reuse b_private in the buffer_heads to build
+	 * a chain for completing the I/O from user context after we've issued
+	 * a transaction to convert the unwritten extent.
+	 */
+	spin_lock_irqsave(&unwritten_done_lock, flags);
+	bh->b_private = ioend->io_buffer_head;
+	ioend->io_buffer_head = bh;
+	spin_unlock_irqrestore(&unwritten_done_lock, flags);
+
+	xfs_finish_ioend(ioend);
 }
 
 STATIC int
@@ -255,7 +290,7 @@ xfs_probe_unwritten_page(
 	struct address_space	*mapping,
 	pgoff_t			index,
 	xfs_iomap_t		*iomapp,
-	xfs_buf_t		*pb,
+	xfs_ioend_t		*ioend,
 	unsigned long		max_offset,
 	unsigned long		*fsbs,
 	unsigned int            bbits)
@@ -283,7 +318,7 @@ xfs_probe_unwritten_page(
 				break;
 			xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
 			set_buffer_unwritten_io(bh);
-			bh->b_private = pb;
+			bh->b_private = ioend;
 			p_offset += bh->b_size;
 			(*fsbs)++;
 		} while ((bh = bh->b_this_page) != head);
@@ -434,34 +469,15 @@ xfs_map_unwritten(
 {
 	struct buffer_head	*bh = curr;
 	xfs_iomap_t		*tmp;
-	xfs_buf_t		*pb;
-	loff_t			offset, size;
+	xfs_ioend_t		*ioend;
+	loff_t			offset;
 	unsigned long		nblocks = 0;
 
 	offset = start_page->index;
 	offset <<= PAGE_CACHE_SHIFT;
 	offset += p_offset;
 
-	/* get an "empty" pagebuf to manage IO completion
-	 * Proper values will be set before returning */
-	pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
-	if (!pb)
-		return -EAGAIN;
-
-	/* Take a reference to the inode to prevent it from
-	 * being reclaimed while we have outstanding unwritten
-	 * extent IO on it.
-	 */
-	if ((igrab(inode)) != inode) {
-		pagebuf_free(pb);
-		return -EAGAIN;
-	}
-
-	/* Set the count to 1 initially, this will stop an I/O
-	 * completion callout which happens before we have started
-	 * all the I/O from calling pagebuf_iodone too early.
-	 */
-	atomic_set(&pb->pb_io_remaining, 1);
+	ioend = xfs_alloc_ioend(inode);
 
 	/* First map forwards in the page consecutive buffers
 	 * covering this unwritten extent
@@ -474,12 +490,12 @@ xfs_map_unwritten(
 			break;
 		xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
 		set_buffer_unwritten_io(bh);
-		bh->b_private = pb;
+		bh->b_private = ioend;
 		p_offset += bh->b_size;
 		nblocks++;
 	} while ((bh = bh->b_this_page) != head);
 
-	atomic_add(nblocks, &pb->pb_io_remaining);
+	atomic_add(nblocks, &ioend->io_remaining);
 
 	/* If we reached the end of the page, map forwards in any
 	 * following pages which are also covered by this extent.
@@ -496,13 +512,13 @@ xfs_map_unwritten(
 		tloff = min(tlast, tloff);
 		for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
 			page = xfs_probe_unwritten_page(mapping,
-						tindex, iomapp, pb,
+						tindex, iomapp, ioend,
 						PAGE_CACHE_SIZE, &bs, bbits);
 			if (!page)
 				break;
 			nblocks += bs;
-			atomic_add(bs, &pb->pb_io_remaining);
-			xfs_convert_page(inode, page, iomapp, wbc, pb,
+			atomic_add(bs, &ioend->io_remaining);
+			xfs_convert_page(inode, page, iomapp, wbc, ioend,
 							startio, all_bh);
 			/* stop if converting the next page might add
 			 * enough blocks that the corresponding byte
@@ -514,12 +530,12 @@ xfs_map_unwritten(
 		if (tindex == tlast &&
 		    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
 			page = xfs_probe_unwritten_page(mapping,
-							tindex, iomapp, pb,
+							tindex, iomapp, ioend,
 							pg_offset, &bs, bbits);
 			if (page) {
 				nblocks += bs;
-				atomic_add(bs, &pb->pb_io_remaining);
-				xfs_convert_page(inode, page, iomapp, wbc, pb,
+				atomic_add(bs, &ioend->io_remaining);
+				xfs_convert_page(inode, page, iomapp, wbc, ioend,
 							startio, all_bh);
 				if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
 					goto enough;
@@ -528,21 +544,9 @@ xfs_map_unwritten(
 	}
 
 enough:
-	size = nblocks;		/* NB: using 64bit number here */
-	size <<= block_bits;	/* convert fsb's to byte range */
-
-	XFS_BUF_DATAIO(pb);
-	XFS_BUF_ASYNC(pb);
-	XFS_BUF_SET_SIZE(pb, size);
-	XFS_BUF_SET_COUNT(pb, size);
-	XFS_BUF_SET_OFFSET(pb, offset);
-	XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
-	XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
-
-	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-		pagebuf_iodone(pb, 1, 1);
-	}
-
+	ioend->io_size = (xfs_off_t)nblocks << block_bits;
+	ioend->io_offset = offset;
+	xfs_finish_ioend(ioend);
 	return 0;
 }
 
@@ -744,8 +748,9 @@ xfs_page_state_convert(
 	if (page->index >= end_index) {
 		if ((page->index >= end_index + 1) ||
 		    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-			err = -EIO;
-			goto error;
+			if (startio)
+				unlock_page(page);
+			return 0;
 		}
 	}
 
@@ -787,7 +792,7 @@ xfs_page_state_convert(
 				continue;
 			if (!iomp) {
 				err = xfs_map_blocks(inode, offset, len, &iomap,
-						BMAPI_READ|BMAPI_IGNSTATE);
+						BMAPI_WRITE|BMAPI_IGNSTATE);
 				if (err) {
 					goto error;
 				}
@@ -931,15 +936,18 @@ __linvfs_get_block(
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
 	xfs_iomap_t		iomap;
+	xfs_off_t		offset;
+	ssize_t			size;
 	int			retpbbm = 1;
 	int			error;
-	ssize_t			size;
-	loff_t			offset = (loff_t)iblock << inode->i_blkbits;
 
-	if (blocks)
-		size = blocks << inode->i_blkbits;
-	else
+	if (blocks) {
+		offset = blocks << inode->i_blkbits;	/* 64 bit goodness */
+		size = (ssize_t) min_t(xfs_off_t, offset, LONG_MAX);
+	} else {
 		size = 1 << inode->i_blkbits;
+	}
+	offset = (xfs_off_t)iblock << inode->i_blkbits;
 
 	VOP_BMAP(vp, offset, size,
 		create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
@@ -950,8 +958,8 @@ __linvfs_get_block(
 		return 0;
 
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
-		xfs_daddr_t		bn;
-		loff_t			delta;
+		xfs_daddr_t	bn;
+		xfs_off_t	delta;
 
 		/* For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
@@ -983,9 +991,8 @@ __linvfs_get_block(
 	 */
 	if (create &&
 	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-	     (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
+	     (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW)))
 		set_buffer_new(bh_result);
-	}
 
 	if (iomap.iomap_flags & IOMAP_DELAY) {
 		BUG_ON(direct);
@@ -997,9 +1004,11 @@ __linvfs_get_block(
 	}
 
 	if (blocks) {
-		bh_result->b_size = (ssize_t)min(
-			(loff_t)(iomap.iomap_bsize - iomap.iomap_delta),
-			(loff_t)(blocks << inode->i_blkbits));
+		ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
+		offset = min_t(xfs_off_t,
+				iomap.iomap_bsize - iomap.iomap_delta,
+				blocks << inode->i_blkbits);
+		bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
 	}
 
 	return 0;
@@ -1028,6 +1037,44 @@ linvfs_get_blocks_direct(
 					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
+STATIC void
+linvfs_end_io_direct(
+	struct kiocb	*iocb,
+	loff_t		offset,
+	ssize_t		size,
+	void		*private)
+{
+	xfs_ioend_t	*ioend = iocb->private;
+
+	/*
+	 * Non-NULL private data means we need to issue a transaction to
+	 * convert a range from unwritten to written extents.  This needs
+	 * to happen from process contect but aio+dio I/O completion
+	 * happens from irq context so we need to defer it to a workqueue.
+	 * This is not nessecary for synchronous direct I/O, but we do
+	 * it anyway to keep the code uniform and simpler.
+	 *
+	 * The core direct I/O code might be changed to always call the
+	 * completion handler in the future, in which case all this can
+	 * go away.
+	 */
+	if (private && size > 0) {
+		ioend->io_offset = offset;
+		ioend->io_size = size;
+		xfs_finish_ioend(ioend);
+	} else {
+		ASSERT(size >= 0);
+		xfs_destroy_ioend(ioend);
+	}
+
+	/*
+	 * blockdev_direct_IO can return an error even afer the I/O
+	 * completion handler was called.  Thus we need to protect
+	 * against double-freeing.
+	 */
+	iocb->private = NULL;
+}
+
 STATIC ssize_t
 linvfs_direct_IO(
 	int			rw,
@@ -1042,16 +1089,23 @@ linvfs_direct_IO(
 	xfs_iomap_t	iomap;
 	int		maps = 1;
 	int		error;
+	ssize_t		ret;
 
 	VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
 	if (error)
 		return -error;
 
-	return blockdev_direct_IO_own_locking(rw, iocb, inode,
+	iocb->private = xfs_alloc_ioend(inode);
+
+	ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
 		iomap.iomap_target->pbr_bdev,
 		iov, offset, nr_segs,
 		linvfs_get_blocks_direct,
-		linvfs_unwritten_convert_direct);
+		linvfs_end_io_direct);
+
+	if (unlikely(ret <= 0 && iocb->private))
+		xfs_destroy_ioend(iocb->private);
+	return ret;
 }
 
 
@@ -1202,6 +1256,16 @@ out_unlock:
 	return error;
 }
 
+STATIC int
+linvfs_invalidate_page(
+	struct page		*page,
+	unsigned long		offset)
+{
+	xfs_page_trace(XFS_INVALIDPAGE_ENTER,
+			page->mapping->host, page, offset);
+	return block_invalidatepage(page, offset);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. Possibly the page is already clean. We always
@@ -1224,7 +1288,7 @@ out_unlock:
 STATIC int
 linvfs_release_page(
 	struct page		*page,
-	int			gfp_mask)
+	gfp_t			gfp_mask)
 {
 	struct inode		*inode = page->mapping->host;
 	int			dirty, delalloc, unmapped, unwritten;
@@ -1279,6 +1343,7 @@ struct address_space_operations linvfs_aops = {
 	.writepage		= linvfs_writepage,
 	.sync_page		= block_sync_page,
 	.releasepage		= linvfs_release_page,
+	.invalidatepage		= linvfs_invalidate_page,
 	.prepare_write		= linvfs_prepare_write,
 	.commit_write		= generic_commit_write,
 	.bmap			= linvfs_bmap,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
new file mode 100644
index 00000000000..4720758a9ad
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+typedef void (*xfs_ioend_func_t)(void *);
+
+typedef struct xfs_ioend {
+	unsigned int		io_uptodate;	/* I/O status register */
+	atomic_t		io_remaining;	/* hold count */
+	struct vnode		*io_vnode;	/* file being written to */
+	struct buffer_head	*io_buffer_head;/* buffer linked list head */
+	size_t			io_size;	/* size of the extent */
+	xfs_off_t		io_offset;	/* offset in the file */
+	struct work_struct	io_work;	/* xfsdatad work queue */
+} xfs_ioend_t;
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index df0cba239dd..6fe21d2b884 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,46 +1,20 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- *	The xfs_buf.c code provides an abstract buffer cache model on top
- *	of the Linux page cache.  Cached metadata blocks for a file system
- *	are hashed to the inode for the block device.  xfs_buf.c assembles
- *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
- *
- *      Written by Steve Lord, Jim Mostek, Russell Cattelan
- *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -54,24 +28,16 @@
 #include <linux/percpu.h>
 #include <linux/blkdev.h>
 #include <linux/hash.h>
-
+#include <linux/kthread.h>
 #include "xfs_linux.h"
 
-/*
- * File wide globals
- */
-
 STATIC kmem_cache_t *pagebuf_zone;
 STATIC kmem_shaker_t pagebuf_shake;
-STATIC int xfsbufd_wakeup(int, unsigned int);
+STATIC int xfsbufd_wakeup(int, gfp_t);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
 
 STATIC struct workqueue_struct *xfslogd_workqueue;
-STATIC struct workqueue_struct *xfsdatad_workqueue;
-
-/*
- * Pagebuf debugging
- */
+struct workqueue_struct *xfsdatad_workqueue;
 
 #ifdef PAGEBUF_TRACE
 void
@@ -111,10 +77,6 @@ ktrace_t *pagebuf_trace_buf;
 # define PB_GET_OWNER(pb)	do { } while (0)
 #endif
 
-/*
- * Pagebuf allocation / freeing.
- */
-
 #define pb_to_gfp(flags) \
 	((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
 	  ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
@@ -122,7 +84,6 @@ ktrace_t *pagebuf_trace_buf;
 #define pb_to_km(flags) \
 	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
 
-
 #define pagebuf_allocate(flags) \
 	kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
 #define pagebuf_deallocate(pb) \
@@ -180,8 +141,9 @@ set_page_region(
 	size_t		offset,
 	size_t		length)
 {
-	page->private |= page_region_mask(offset, length);
-	if (page->private == ~0UL)
+	set_page_private(page,
+		page_private(page) | page_region_mask(offset, length));
+	if (page_private(page) == ~0UL)
 		SetPageUptodate(page);
 }
 
@@ -193,7 +155,7 @@ test_page_region(
 {
 	unsigned long	mask = page_region_mask(offset, length);
 
-	return (mask && (page->private & mask) == mask);
+	return (mask && (page_private(page) & mask) == mask);
 }
 
 /*
@@ -284,7 +246,7 @@ _pagebuf_initialize(
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
 	pb->pb_buffer_length = pb->pb_count_desired = range_length;
-	pb->pb_flags = flags | PBF_NONE;
+	pb->pb_flags = flags;
 	pb->pb_bn = XFS_BUF_DADDR_NULL;
 	atomic_set(&pb->pb_pin_count, 0);
 	init_waitqueue_head(&pb->pb_waiters);
@@ -382,7 +344,7 @@ _pagebuf_lookup_pages(
 	size_t			blocksize = bp->pb_target->pbr_bsize;
 	size_t			size = bp->pb_count_desired;
 	size_t			nbytes, offset;
-	int			gfp_mask = pb_to_gfp(flags);
+	gfp_t			gfp_mask = pb_to_gfp(flags);
 	unsigned short		page_count, i;
 	pgoff_t			first;
 	loff_t			end;
@@ -456,14 +418,8 @@ _pagebuf_lookup_pages(
 			unlock_page(bp->pb_pages[i]);
 	}
 
-	if (page_count) {
-		/* if we have any uptodate pages, mark that in the buffer */
-		bp->pb_flags &= ~PBF_NONE;
-
-		/* if some pages aren't uptodate, mark that in the buffer */
-		if (page_count != bp->pb_page_count)
-			bp->pb_flags |= PBF_PARTIAL;
-	}
+	if (page_count == bp->pb_page_count)
+		bp->pb_flags |= PBF_DONE;
 
 	PB_TRACE(bp, "lookup_pages", (long)page_count);
 	return error;
@@ -590,8 +546,10 @@ found:
 		PB_SET_OWNER(pb);
 	}
 
-	if (pb->pb_flags & PBF_STALE)
+	if (pb->pb_flags & PBF_STALE) {
+		ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
 		pb->pb_flags &= PBF_MAPPED;
+	}
 	PB_TRACE(pb, "got_lock", 0);
 	XFS_STATS_INC(pb_get_locked);
 	return (pb);
@@ -672,7 +630,7 @@ xfs_buf_read_flags(
 
 	pb = xfs_buf_get_flags(target, ioff, isize, flags);
 	if (pb) {
-		if (PBF_NOT_DONE(pb)) {
+		if (!XFS_BUF_ISDONE(pb)) {
 			PB_TRACE(pb, "read", (unsigned long)flags);
 			XFS_STATS_INC(pb_get_read);
 			pagebuf_iostart(pb, flags);
@@ -700,25 +658,6 @@ xfs_buf_read_flags(
 }
 
 /*
- * Create a skeletal pagebuf (no pages associated with it).
- */
-xfs_buf_t *
-pagebuf_lookup(
-	xfs_buftarg_t		*target,
-	loff_t			ioff,
-	size_t			isize,
-	page_buf_flags_t	flags)
-{
-	xfs_buf_t		*pb;
-
-	pb = pagebuf_allocate(flags);
-	if (pb) {
-		_pagebuf_initialize(pb, target, ioff, isize, flags);
-	}
-	return pb;
-}
-
-/*
  * If we are not low on memory then do the readahead in a deadlock
  * safe manner.
  */
@@ -828,7 +767,7 @@ pagebuf_get_no_daddr(
 	bp = pagebuf_allocate(0);
 	if (unlikely(bp == NULL))
 		goto fail;
-	_pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+	_pagebuf_initialize(bp, target, 0, len, 0);
 
  try_again:
 	data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -891,44 +830,24 @@ pagebuf_rele(
 
 	PB_TRACE(pb, "rele", pb->pb_relse);
 
-	/*
-	 * pagebuf_lookup buffers are not hashed, not delayed write,
-	 * and don't have their own release routines.  Special case.
-	 */
-	if (unlikely(!hash)) {
-		ASSERT(!pb->pb_relse);
-		if (atomic_dec_and_test(&pb->pb_hold))
-			xfs_buf_free(pb);
-		return;
-	}
-
 	if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
-		int		do_free = 1;
-
 		if (pb->pb_relse) {
 			atomic_inc(&pb->pb_hold);
 			spin_unlock(&hash->bh_lock);
 			(*(pb->pb_relse)) (pb);
-			spin_lock(&hash->bh_lock);
-			do_free = 0;
-		}
-
-		if (pb->pb_flags & PBF_DELWRI) {
-			pb->pb_flags |= PBF_ASYNC;
-			atomic_inc(&pb->pb_hold);
-			pagebuf_delwri_queue(pb, 0);
-			do_free = 0;
 		} else if (pb->pb_flags & PBF_FS_MANAGED) {
-			do_free = 0;
-		}
-
-		if (do_free) {
-			list_del_init(&pb->pb_hash_list);
 			spin_unlock(&hash->bh_lock);
-			pagebuf_free(pb);
 		} else {
+			ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
+			list_del_init(&pb->pb_hash_list);
 			spin_unlock(&hash->bh_lock);
+			pagebuf_free(pb);
 		}
+	} else {
+		/*
+		 * Catch reference count leaks
+		 */
+		ASSERT(atomic_read(&pb->pb_hold) >= 0);
 	}
 }
 
@@ -1006,13 +925,24 @@ pagebuf_lock(
  *	pagebuf_unlock
  *
  *	pagebuf_unlock releases the lock on the buffer object created by
- *	pagebuf_lock or pagebuf_cond_lock (not any
- *	pinning of underlying pages created by pagebuf_pin).
+ *	pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
+ *	created by pagebuf_pin).
+ *
+ *	If the buffer is marked delwri but is not queued, do so before we
+ *	unlock the buffer as we need to set flags correctly. We also need to
+ *	take a reference for the delwri queue because the unlocker is going to
+ *	drop their's and they don't know we just queued it.
  */
 void
 pagebuf_unlock(				/* unlock buffer		*/
 	xfs_buf_t		*pb)	/* buffer to unlock		*/
 {
+	if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
+		atomic_inc(&pb->pb_hold);
+		pb->pb_flags |= PBF_ASYNC;
+		pagebuf_delwri_queue(pb, 0);
+	}
+
 	PB_CLEAR_OWNER(pb);
 	up(&pb->pb_sema);
 	PB_TRACE(pb, "unlock", 0);
@@ -1124,21 +1054,18 @@ pagebuf_iodone_work(
 void
 pagebuf_iodone(
 	xfs_buf_t		*pb,
-	int			dataio,
 	int			schedule)
 {
 	pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
-	if (pb->pb_error == 0) {
-		pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
-	}
+	if (pb->pb_error == 0)
+		pb->pb_flags |= PBF_DONE;
 
 	PB_TRACE(pb, "iodone", pb->pb_iodone);
 
 	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
 		if (schedule) {
 			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
-			queue_work(dataio ? xfsdatad_workqueue :
-				xfslogd_workqueue, &pb->pb_iodone_work);
+			queue_work(xfslogd_workqueue, &pb->pb_iodone_work);
 		} else {
 			pagebuf_iodone_work(pb);
 		}
@@ -1238,7 +1165,7 @@ _pagebuf_iodone(
 {
 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
 		pb->pb_locked = 0;
-		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+		pagebuf_iodone(pb, schedule);
 	}
 }
 
@@ -1249,8 +1176,8 @@ bio_end_io_pagebuf(
 	int			error)
 {
 	xfs_buf_t		*pb = (xfs_buf_t *)bio->bi_private;
-	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
-	struct bio_vec		*bvec = bio->bi_io_vec;
+	unsigned int		blocksize = pb->pb_target->pbr_bsize;
+	struct bio_vec		*bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 
 	if (bio->bi_size)
 		return 1;
@@ -1258,10 +1185,12 @@ bio_end_io_pagebuf(
 	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 		pb->pb_error = EIO;
 
-	for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+	do {
 		struct page	*page = bvec->bv_page;
 
-		if (pb->pb_error) {
+		if (unlikely(pb->pb_error)) {
+			if (pb->pb_flags & PBF_READ)
+				ClearPageUptodate(page);
 			SetPageError(page);
 		} else if (blocksize == PAGE_CACHE_SIZE) {
 			SetPageUptodate(page);
@@ -1270,10 +1199,13 @@ bio_end_io_pagebuf(
 			set_page_region(page, bvec->bv_offset, bvec->bv_len);
 		}
 
+		if (--bvec >= bio->bi_io_vec)
+			prefetchw(&bvec->bv_page->flags);
+
 		if (_pagebuf_iolocked(pb)) {
 			unlock_page(page);
 		}
-	}
+	} while (bvec >= bio->bi_io_vec);
 
 	_pagebuf_iodone(pb, 1);
 	bio_put(bio);
@@ -1302,6 +1234,11 @@ _pagebuf_ioapply(
 		rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
 	}
 
+	if (pb->pb_flags & PBF_ORDERED) {
+		ASSERT(!(pb->pb_flags & PBF_READ));
+		rw = WRITE_BARRIER;
+	}
+
 	/* Special code path for reading a sub page size pagebuf in --
 	 * we populate up the whole page, and hence the other metadata
 	 * in the same page.  This optimization is only valid when the
@@ -1511,6 +1448,11 @@ again:
 			ASSERT(btp == bp->pb_target);
 			if (!(bp->pb_flags & PBF_FS_MANAGED)) {
 				spin_unlock(&hash->bh_lock);
+				/*
+				 * Catch superblock reference count leaks
+				 * immediately
+				 */
+				BUG_ON(bp->pb_bn == 0);
 				delay(100);
 				goto again;
 			}
@@ -1686,17 +1628,20 @@ pagebuf_delwri_queue(
 	int			unlock)
 {
 	PB_TRACE(pb, "delwri_q", (long)unlock);
-	ASSERT(pb->pb_flags & PBF_DELWRI);
+	ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
+					(PBF_DELWRI|PBF_ASYNC));
 
 	spin_lock(&pbd_delwrite_lock);
 	/* If already in the queue, dequeue and place at tail */
 	if (!list_empty(&pb->pb_list)) {
+		ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
 		if (unlock) {
 			atomic_dec(&pb->pb_hold);
 		}
 		list_del(&pb->pb_list);
 	}
 
+	pb->pb_flags |= _PBF_DELWRI_Q;
 	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
 	pb->pb_queuetime = jiffies;
 	spin_unlock(&pbd_delwrite_lock);
@@ -1713,10 +1658,11 @@ pagebuf_delwri_dequeue(
 
 	spin_lock(&pbd_delwrite_lock);
 	if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+		ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
 		list_del_init(&pb->pb_list);
 		dequeued = 1;
 	}
-	pb->pb_flags &= ~PBF_DELWRI;
+	pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
 	spin_unlock(&pbd_delwrite_lock);
 
 	if (dequeued)
@@ -1733,16 +1679,14 @@ pagebuf_runall_queues(
 }
 
 /* Defines for pagebuf daemon */
-STATIC DECLARE_COMPLETION(xfsbufd_done);
 STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_active;
 STATIC int xfsbufd_force_flush;
 STATIC int xfsbufd_force_sleep;
 
 STATIC int
 xfsbufd_wakeup(
 	int			priority,
-	unsigned int		mask)
+	gfp_t			mask)
 {
 	if (xfsbufd_force_sleep)
 		return 0;
@@ -1761,14 +1705,8 @@ xfsbufd(
 	xfs_buftarg_t		*target;
 	xfs_buf_t		*pb, *n;
 
-	/*  Set up the thread  */
-	daemonize("xfsbufd");
 	current->flags |= PF_MEMALLOC;
 
-	xfsbufd_task = current;
-	xfsbufd_active = 1;
-	barrier();
-
 	INIT_LIST_HEAD(&tmp);
 	do {
 		if (unlikely(freezing(current))) {
@@ -1778,10 +1716,10 @@ xfsbufd(
 			xfsbufd_force_sleep = 0;
 		}
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+		schedule_timeout_interruptible(
+			xfs_buf_timer_centisecs * msecs_to_jiffies(10));
 
-		age = (xfs_buf_age_centisecs * HZ) / 100;
+		age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
 		spin_lock(&pbd_delwrite_lock);
 		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
 			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
@@ -1795,7 +1733,7 @@ xfsbufd(
 					break;
 				}
 
-				pb->pb_flags &= ~PBF_DELWRI;
+				pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
 				pb->pb_flags |= PBF_WRITE;
 				list_move(&pb->pb_list, &tmp);
 			}
@@ -1816,9 +1754,9 @@ xfsbufd(
 			purge_addresses();
 
 		xfsbufd_force_flush = 0;
-	} while (xfsbufd_active);
+	} while (!kthread_should_stop());
 
-	complete_and_exit(&xfsbufd_done, 0);
+	return 0;
 }
 
 /*
@@ -1845,15 +1783,13 @@ xfs_flush_buftarg(
 		if (pb->pb_target != target)
 			continue;
 
-		ASSERT(pb->pb_flags & PBF_DELWRI);
+		ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
 		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
 		if (pagebuf_ispin(pb)) {
 			pincount++;
 			continue;
 		}
 
-		pb->pb_flags &= ~PBF_DELWRI;
-		pb->pb_flags |= PBF_WRITE;
 		list_move(&pb->pb_list, &tmp);
 	}
 	spin_unlock(&pbd_delwrite_lock);
@@ -1862,12 +1798,14 @@ xfs_flush_buftarg(
 	 * Dropped the delayed write list lock, now walk the temporary list
 	 */
 	list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+		pagebuf_lock(pb);
+		pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+		pb->pb_flags |= PBF_WRITE;
 		if (wait)
 			pb->pb_flags &= ~PBF_ASYNC;
 		else
 			list_del_init(&pb->pb_list);
 
-		pagebuf_lock(pb);
 		pagebuf_iostrategy(pb);
 	}
 
@@ -1888,101 +1826,63 @@ xfs_flush_buftarg(
 	return pincount;
 }
 
-STATIC int
-xfs_buf_daemons_start(void)
+int __init
+pagebuf_init(void)
 {
 	int		error = -ENOMEM;
 
+#ifdef PAGEBUF_TRACE
+	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+
+	pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+	if (!pagebuf_zone)
+		goto out_free_trace_buf;
+
 	xfslogd_workqueue = create_workqueue("xfslogd");
 	if (!xfslogd_workqueue)
-		goto out;
+		goto out_free_buf_zone;
 
 	xfsdatad_workqueue = create_workqueue("xfsdatad");
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-	error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES);
-	if (error < 0)
+	xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
+	if (IS_ERR(xfsbufd_task)) {
+		error = PTR_ERR(xfsbufd_task);
 		goto out_destroy_xfsdatad_workqueue;
+	}
+
+	pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
+	if (!pagebuf_shake)
+		goto out_stop_xfsbufd;
+
 	return 0;
 
+ out_stop_xfsbufd:
+	kthread_stop(xfsbufd_task);
  out_destroy_xfsdatad_workqueue:
 	destroy_workqueue(xfsdatad_workqueue);
  out_destroy_xfslogd_workqueue:
 	destroy_workqueue(xfslogd_workqueue);
- out:
-	return error;
-}
-
-/*
- * Note: do not mark as __exit, it is called from pagebuf_terminate.
- */
-STATIC void
-xfs_buf_daemons_stop(void)
-{
-	xfsbufd_active = 0;
-	barrier();
-	wait_for_completion(&xfsbufd_done);
-
-	destroy_workqueue(xfslogd_workqueue);
-	destroy_workqueue(xfsdatad_workqueue);
-}
-
-/*
- *	Initialization and Termination
- */
-
-int __init
-pagebuf_init(void)
-{
-	int		error = -ENOMEM;
-
-	pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
-	if (!pagebuf_zone)
-		goto out;
-
-#ifdef PAGEBUF_TRACE
-	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
-#endif
-
-	error = xfs_buf_daemons_start();
-	if (error)
-		goto out_free_buf_zone;
-
-	pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
-	if (!pagebuf_shake) {
-		error = -ENOMEM;
-		goto out_stop_daemons;
-	}
-
-	return 0;
-
- out_stop_daemons:
-	xfs_buf_daemons_stop();
  out_free_buf_zone:
+	kmem_zone_destroy(pagebuf_zone);
+ out_free_trace_buf:
 #ifdef PAGEBUF_TRACE
 	ktrace_free(pagebuf_trace_buf);
 #endif
-	kmem_zone_destroy(pagebuf_zone);
- out:
 	return error;
 }
 
-
-/*
- *	pagebuf_terminate.
- *
- *	Note: do not mark as __exit, this is also called from the __init code.
- */
 void
 pagebuf_terminate(void)
 {
-	xfs_buf_daemons_stop();
-
+	kmem_shake_deregister(pagebuf_shake);
+	kthread_stop(xfsbufd_task);
+	destroy_workqueue(xfsdatad_workqueue);
+	destroy_workqueue(xfslogd_workqueue);
+	kmem_zone_destroy(pagebuf_zone);
 #ifdef PAGEBUF_TRACE
 	ktrace_free(pagebuf_trace_buf);
 #endif
-
-	kmem_zone_destroy(pagebuf_zone);
-	kmem_shake_deregister(pagebuf_shake);
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 3f8f69a66ae..237a35b915d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -1,39 +1,20 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #ifndef __XFS_BUF_H__
 #define __XFS_BUF_H__
 
@@ -69,15 +50,12 @@ typedef enum page_buf_flags_e {		/* pb_flags values */
 	PBF_READ = (1 << 0),	/* buffer intended for reading from device */
 	PBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
 	PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
-	PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
 	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
-	PBF_NONE = (1 << 5),    /* buffer not read at all                  */
+	PBF_DONE = (1 << 5),    /* all pages in the buffer uptodate	   */
 	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
 	PBF_STALE = (1 << 7),	/* buffer has been staled, do not find it  */
 	PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
-	PBF_FS_DATAIOD = (1 << 9),  /* schedule IO completion on fs datad  */
-	PBF_FORCEIO = (1 << 10),    /* ignore any cache state		   */
-	PBF_FLUSH = (1 << 11),	    /* flush disk write cache		   */
+ 	PBF_ORDERED = (1 << 11),    /* use ordered writes		   */
 	PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
 
 	/* flags used only as arguments to access routines */
@@ -89,11 +67,9 @@ typedef enum page_buf_flags_e {		/* pb_flags values */
 	_PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
 	_PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
 	_PBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
+	_PBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
 } page_buf_flags_t;
 
-#define PBF_UPDATE (PBF_READ | PBF_WRITE)
-#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
-#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
 
 typedef struct xfs_bufhash {
 	struct list_head	bh_list;
@@ -206,13 +182,6 @@ extern xfs_buf_t *xfs_buf_read_flags(	/* allocate and read a buffer	*/
 #define xfs_buf_read(target, blkno, len, flags) \
 	xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
 
-extern xfs_buf_t *pagebuf_lookup(
-		xfs_buftarg_t *,
-		loff_t,			/* starting offset of range	*/
-		size_t,			/* length of range		*/
-		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
-					/* PBF_FORCEIO, 		*/
-
 extern xfs_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
 					/*  no memory or disk address	*/
 		size_t len,
@@ -264,7 +233,6 @@ extern void pagebuf_unlock(		/* unlock buffer		*/
 
 extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
 		xfs_buf_t *,		/* buffer to mark		*/
-		int,			/* use data/log helper thread.	*/
 		int);			/* run completion locally, or in
 					 * a helper thread.		*/
 
@@ -344,8 +312,6 @@ extern void pagebuf_trace(
 
 
 
-
-
 /* These are just for xfs_syncsub... it sets an internal variable
  * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
  */
@@ -386,21 +352,21 @@ extern void pagebuf_trace(
 #define XFS_BUF_GETERROR(x)	 pagebuf_geterror(x)
 #define XFS_BUF_ISERROR(x)	 (pagebuf_geterror(x)?1:0)
 
-#define XFS_BUF_DONE(x)		 ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
-#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
-#define XFS_BUF_ISDONE(x)	 (!(PBF_NOT_DONE(x)))
+#define XFS_BUF_DONE(x)		 ((x)->pb_flags |= PBF_DONE)
+#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags &= ~PBF_DONE)
+#define XFS_BUF_ISDONE(x)	 ((x)->pb_flags & PBF_DONE)
 
-#define XFS_BUF_BUSY(x)		 ((x)->pb_flags |= PBF_FORCEIO)
-#define XFS_BUF_UNBUSY(x)	 ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_BUSY(x)		 do { } while (0)
+#define XFS_BUF_UNBUSY(x)	 do { } while (0)
 #define XFS_BUF_ISBUSY(x)	 (1)
 
 #define XFS_BUF_ASYNC(x)	 ((x)->pb_flags |= PBF_ASYNC)
 #define XFS_BUF_UNASYNC(x)	 ((x)->pb_flags &= ~PBF_ASYNC)
 #define XFS_BUF_ISASYNC(x)	 ((x)->pb_flags & PBF_ASYNC)
 
-#define XFS_BUF_FLUSH(x)	 ((x)->pb_flags |= PBF_FLUSH)
-#define XFS_BUF_UNFLUSH(x)	 ((x)->pb_flags &= ~PBF_FLUSH)
-#define XFS_BUF_ISFLUSH(x)	 ((x)->pb_flags & PBF_FLUSH)
+#define XFS_BUF_ORDERED(x)	 ((x)->pb_flags |= PBF_ORDERED)
+#define XFS_BUF_UNORDERED(x)	 ((x)->pb_flags &= ~PBF_ORDERED)
+#define XFS_BUF_ISORDERED(x)	 ((x)->pb_flags & PBF_ORDERED)
 
 #define XFS_BUF_SHUT(x)		 printk("XFS_BUF_SHUT not implemented yet\n")
 #define XFS_BUF_UNSHUT(x)	 printk("XFS_BUF_UNSHUT not implemented yet\n")
@@ -420,9 +386,6 @@ extern void pagebuf_trace(
 
 #define XFS_BUF_BP_ISMAPPED(bp)	 1
 
-#define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
-#define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
-
 #define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
 #define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
 			(buf)->pb_iodone = (func)
@@ -452,7 +415,7 @@ extern void pagebuf_trace(
 
 #define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
 
-extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
 {
 	if (bp->pb_flags & PBF_MAPPED)
 		return XFS_BUF_PTR(bp) + offset;
@@ -518,7 +481,7 @@ static inline void	xfs_buf_relse(xfs_buf_t *bp)
 	    pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
 
 #define xfs_biodone(pb)		    \
-	    pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+	    pagebuf_iodone(pb, 0)
 
 #define xfs_biomove(pb, off, len, data, rw) \
 	    pagebuf_iomove((pb), (off), (len), (data), \
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 00c45849d41..4af49102472 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_CRED_H__
 #define __XFS_CRED_H__
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f372a1a5e16..80eb249f2fa 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_types.h"
 #include "xfs_dmapi.h"
@@ -41,7 +26,7 @@
 #include "xfs_export.h"
 
 /*
- * XFS encode and decodes the fileid portion of NFS filehandles
+ * XFS encodes and decodes the fileid portion of NFS filehandles
  * itself instead of letting the generic NFS code do it.  This
  * allows filesystems with 64 bit inode numbers to be exported.
  *
@@ -51,7 +36,6 @@
  * remains in that code.
  */
 
-
 STATIC struct dentry *
 linvfs_decode_fh(
 	struct super_block	*sb,
@@ -92,7 +76,7 @@ linvfs_decode_fh(
 		p = xfs_fileid_decode_fid2(p, &pfid, is64);
 		parent = &pfid;
 	}
-	
+
 	fh = (__u32 *)&ifid;
 	return find_exported_dentry(sb, fh, parent, acceptable, context);
 }
@@ -112,9 +96,8 @@ linvfs_encode_fh(
 	int			is64 = 0;
 #if XFS_BIG_INUMS
 	vfs_t			*vfs = LINVFS_GET_VFS(inode->i_sb);
-	xfs_mount_t		*mp = XFS_VFSTOM(vfs);
-	
-	if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) {
+
+	if (!(vfs->vfs_flag & VFS_32BITINODES)) {
 		/* filesystem may contain 64bit inode numbers */
 		is64 = XFS_FILEID_TYPE_64FLAG;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index 60b2abac1c1..e5b0559700a 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_EXPORT_H__
 #define __XFS_EXPORT_H__
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f1ce4323f56..06111d0bbae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -1,39 +1,26 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_trans.h"
@@ -311,6 +298,31 @@ linvfs_fsync(
 
 #define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
 
+#ifdef CONFIG_XFS_DMAPI
+
+STATIC struct page *
+linvfs_filemap_nopage(
+	struct vm_area_struct	*area,
+	unsigned long		address,
+	int			*type)
+{
+	struct inode	*inode = area->vm_file->f_dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+	int		error;
+
+	ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
+
+	error = XFS_SEND_MMAP(mp, area, 0);
+	if (error)
+		return NULL;
+
+	return filemap_nopage(area, address, type);
+}
+
+#endif /* CONFIG_XFS_DMAPI */
+
+
 STATIC int
 linvfs_readdir(
 	struct file	*filp,
@@ -390,14 +402,6 @@ done:
 	return -error;
 }
 
-#ifdef CONFIG_XFS_DMAPI
-STATIC void
-linvfs_mmap_close(
-	struct vm_area_struct	*vma)
-{
-	xfs_dm_mm_put(vma);
-}
-#endif /* CONFIG_XFS_DMAPI */
 
 STATIC int
 linvfs_file_mmap(
@@ -411,16 +415,11 @@ linvfs_file_mmap(
 
 	vma->vm_ops = &linvfs_file_vm_ops;
 
-	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
-
-		error = -XFS_SEND_MMAP(mp, vma, 0);
-		if (error)
-			return error;
 #ifdef CONFIG_XFS_DMAPI
+	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
 		vma->vm_ops = &linvfs_dmapi_file_vm_ops;
-#endif
 	}
+#endif /* CONFIG_XFS_DMAPI */
 
 	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
 	if (!error)
@@ -474,6 +473,7 @@ linvfs_ioctl_invis(
 	return error;
 }
 
+#ifdef CONFIG_XFS_DMAPI
 #ifdef HAVE_VMOP_MPROTECT
 STATIC int
 linvfs_mprotect(
@@ -494,6 +494,7 @@ linvfs_mprotect(
 	return error;
 }
 #endif /* HAVE_VMOP_MPROTECT */
+#endif /* CONFIG_XFS_DMAPI */
 
 #ifdef HAVE_FOP_OPEN_EXEC
 /* If the user is attempting to execute a file that is offline then
@@ -528,49 +529,10 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-/*
- * Temporary workaround to the AIO direct IO write problem.
- * This code can go and we can revert to do_sync_write once
- * the writepage(s) rework is merged.
- */
-STATIC ssize_t
-linvfs_write(
-	struct file	*filp,
-	const char	__user *buf,
-	size_t		len,
-	loff_t		*ppos)
-{
-	struct kiocb	kiocb;
-	ssize_t		ret;
-
-	init_sync_kiocb(&kiocb, filp);
-	kiocb.ki_pos = *ppos;
-	ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
-	*ppos = kiocb.ki_pos;
-	return ret;
-}
-STATIC ssize_t
-linvfs_write_invis(
-	struct file	*filp,
-	const char	__user *buf,
-	size_t		len,
-	loff_t		*ppos)
-{
-	struct kiocb	kiocb;
-	ssize_t		ret;
-
-	init_sync_kiocb(&kiocb, filp);
-	kiocb.ki_pos = *ppos;
-	ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
-	*ppos = kiocb.ki_pos;
-	return ret;
-}
-
-
 struct file_operations linvfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.write		= linvfs_write,
+	.write		= do_sync_write,
 	.readv		= linvfs_readv,
 	.writev		= linvfs_writev,
 	.aio_read	= linvfs_aio_read,
@@ -592,7 +554,7 @@ struct file_operations linvfs_file_operations = {
 struct file_operations linvfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.write		= linvfs_write_invis,
+	.write		= do_sync_write,
 	.readv		= linvfs_readv_invis,
 	.writev		= linvfs_writev_invis,
 	.aio_read	= linvfs_aio_read_invis,
@@ -626,8 +588,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = {
 
 #ifdef CONFIG_XFS_DMAPI
 static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
-	.close		= linvfs_mmap_close,
-	.nopage		= filemap_nopage,
+	.nopage		= linvfs_filemap_nopage,
 	.populate	= filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
 	.mprotect	= linvfs_mprotect,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 05ebd30ec96..f89340c61bf 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include "xfs.h"
@@ -117,6 +103,8 @@ fs_flush_pages(
 
 	if (VN_CACHED(vp)) {
 		filemap_fdatawrite(ip->i_mapping);
+		if (flags & XFS_B_ASYNC)
+			return 0;
 		filemap_fdatawait(ip->i_mapping);
 	}
 
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
index 2db9ddbd456..aee9ccdd18f 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -1,48 +1,29 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef	__XFS_SUBR_H__
-#define __XFS_SUBR_H__
-
-/*
- * Utilities shared among file system implementations.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#ifndef	__XFS_FS_SUBR_H__
+#define __XFS_FS_SUBR_H__
 
 struct cred;
-
-extern int	fs_noerr(void);
-extern int	fs_nosys(void);
-extern void	fs_noval(void);
-extern void	fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-extern void	fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-extern int	fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
+extern int  fs_noerr(void);
+extern int  fs_nosys(void);
+extern void fs_noval(void);
+extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern int  fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
 
 #endif	/* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index a6da5b4fd24..6e8085f3463 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -1,40 +1,20 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * This file contains globals needed by XFS that were normally defined
- * somewhere else in IRIX.
- */
-
 #include "xfs.h"
 #include "xfs_cred.h"
 #include "xfs_sysctl.h"
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index e81e2f38a85..e1a22bfcf86 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -1,42 +1,23 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_GLOBALS_H__
 #define __XFS_GLOBALS_H__
 
-/*
- * This file declares globals needed by XFS that were normally defined
- * somewhere else in IRIX.
- */
-
 extern uint64_t	xfs_panic_mask;		/* set to cause more panics */
 extern unsigned long xfs_physmem;
 extern struct cred *sys_cred;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 05a447e51cc..b78b5eb9e96 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1,67 +1,52 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
-#include "xfs_error.h"
 #include "xfs_itable.h"
+#include "xfs_error.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
 #include "xfs_mac.h"
 #include "xfs_attr.h"
+#include "xfs_bmap.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_dfrag.h"
@@ -141,13 +126,19 @@ xfs_find_handle(
 		return -XFS_ERROR(EINVAL);
 	}
 
-	/* we need the vnode */
-	vp = LINVFS_GET_VP(inode);
-	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+	case S_IFDIR:
+	case S_IFLNK:
+		break;
+	default:
 		iput(inode);
 		return -XFS_ERROR(EBADF);
 	}
 
+	/* we need the vnode */
+	vp = LINVFS_GET_VP(inode);
+
 	/* now we can grab the fsid */
 	memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
 	hsize = sizeof(xfs_fsid_t);
@@ -386,7 +377,7 @@ xfs_readlink_by_handle(
 		return -error;
 
 	/* Restrict this handle operation to symlinks only. */
-	if (vp->v_type != VLNK) {
+	if (!S_ISLNK(inode->i_mode)) {
 		VN_RELE(vp);
 		return -XFS_ERROR(EINVAL);
 	}
@@ -982,10 +973,10 @@ xfs_ioc_space(
 	if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
 		return -XFS_ERROR(EPERM);
 
-	if (!(filp->f_flags & FMODE_WRITE))
+	if (!(filp->f_mode & FMODE_WRITE))
 		return -XFS_ERROR(EBADF);
 
-	if (vp->v_type != VREG)
+	if (!VN_ISREG(vp))
 		return -XFS_ERROR(EINVAL);
 
 	if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0f8f1384eb3..c83ae15bb0e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <linux/config.h>
 #include <linux/compat.h>
 #include <linux/init.h>
@@ -39,7 +24,6 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <asm/uaccess.h>
-
 #include "xfs.h"
 #include "xfs_types.h"
 #include "xfs_fs.h"
@@ -47,8 +31,52 @@
 #include "xfs_vnode.h"
 #include "xfs_dfrag.h"
 
+#define  _NATIVE_IOC(cmd, type) \
+	  _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 #define BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct xfs_flock64_32 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start	__attribute__((packed));
+			/* len == 0 means until end of file */
+	__s64		l_len __attribute__((packed));
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserve area */
+} xfs_flock64_32_t;
+
+#define XFS_IOC_ALLOCSP_32	_IOW ('X', 10, struct xfs_flock64_32)
+#define XFS_IOC_FREESP_32	_IOW ('X', 11, struct xfs_flock64_32)
+#define XFS_IOC_ALLOCSP64_32	_IOW ('X', 36, struct xfs_flock64_32)
+#define XFS_IOC_FREESP64_32	_IOW ('X', 37, struct xfs_flock64_32)
+#define XFS_IOC_RESVSP_32	_IOW ('X', 40, struct xfs_flock64_32)
+#define XFS_IOC_UNRESVSP_32	_IOW ('X', 41, struct xfs_flock64_32)
+#define XFS_IOC_RESVSP64_32	_IOW ('X', 42, struct xfs_flock64_32)
+#define XFS_IOC_UNRESVSP64_32	_IOW ('X', 43, struct xfs_flock64_32)
+
+/* just account for different alignment */
+STATIC unsigned long
+xfs_ioctl32_flock(
+	unsigned long		arg)
+{
+	xfs_flock64_32_t	__user *p32 = (void __user *)arg;
+	xfs_flock64_t		__user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(&p->l_type,	&p32->l_type,	sizeof(s16)) ||
+	    copy_in_user(&p->l_whence,	&p32->l_whence, sizeof(s16)) ||
+	    copy_in_user(&p->l_start,	&p32->l_start,	sizeof(s64)) ||
+	    copy_in_user(&p->l_len,	&p32->l_len,	sizeof(s64)) ||
+	    copy_in_user(&p->l_sysid,	&p32->l_sysid,	sizeof(s32)) ||
+	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
+	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
+		return -EFAULT;
+	
+	return (unsigned long)p;
+}
+
 #else
 
 typedef struct xfs_fsop_bulkreq32 {
@@ -103,7 +131,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 /* not handled
 	case XFS_IOC_FD_TO_HANDLE:
 	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
 	case XFS_IOC_PATH_TO_FSHANDLE:
 	case XFS_IOC_OPEN_BY_HANDLE:
 	case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -124,8 +151,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 	case XFS_IOC_ERROR_CLEARALL:
 		break;
 
-#ifndef BROKEN_X86_ALIGNMENT
-	/* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
+#ifdef BROKEN_X86_ALIGNMENT
+	/* xfs_flock_t has wrong u32 vs u64 alignment */
+	case XFS_IOC_ALLOCSP_32:
+	case XFS_IOC_FREESP_32:
+	case XFS_IOC_ALLOCSP64_32:
+	case XFS_IOC_FREESP64_32:
+	case XFS_IOC_RESVSP_32:
+	case XFS_IOC_UNRESVSP_32:
+	case XFS_IOC_RESVSP64_32:
+	case XFS_IOC_UNRESVSP64_32:
+		arg = xfs_ioctl32_flock(arg);
+		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+		break;
+
+#else /* These are handled fine if no alignment issues */
 	case XFS_IOC_ALLOCSP:
 	case XFS_IOC_FREESP:
 	case XFS_IOC_RESVSP:
@@ -134,6 +174,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 	case XFS_IOC_FREESP64:
 	case XFS_IOC_RESVSP64:
 	case XFS_IOC_UNRESVSP64:
+		break;
+
+	/* xfs_bstat_t still has wrong u32 vs u64 alignment */
 	case XFS_IOC_SWAPEXT:
 		break;
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index c874793a1dc..011c273bec5 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -1,34 +1,24 @@
 /*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#ifndef __XFS_IOCTL32_H__
+#define __XFS_IOCTL32_H__
+
+extern long linvfs_compat_ioctl(struct file *, unsigned, unsigned long);
+extern long linvfs_compat_invis_ioctl(struct file *f, unsigned, unsigned long);
 
-long linvfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
-long linvfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg);
+#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 407e9935939..14215a7db59 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,18 +29,17 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -69,6 +54,137 @@
 #include <linux/xattr.h>
 #include <linux/namei.h>
 
+/*
+ * Change the requested timestamp in the given inode.
+ * We don't lock across timestamp updates, and we don't log them but
+ * we do record the fact that there is dirty information in core.
+ *
+ * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
+ *		with XFS_ICHGTIME_ACC to be sure that access time
+ *		update will take.  Calling first with XFS_ICHGTIME_ACC
+ *		and then XFS_ICHGTIME_MOD may fail to modify the access
+ *		timestamp if the filesystem is mounted noacctm.
+ */
+void
+xfs_ichgtime(
+	xfs_inode_t	*ip,
+	int		flags)
+{
+	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	timespec_t	tv;
+
+	/*
+	 * We're not supposed to change timestamps in readonly-mounted
+	 * filesystems.  Throw it away if anyone asks us.
+	 */
+	if (unlikely(IS_RDONLY(inode)))
+		return;
+
+	/*
+	 * Don't update access timestamps on reads if mounted "noatime".
+	 * Throw it away if anyone asks us.
+	 */
+	if (unlikely(
+	    (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
+	    (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
+			XFS_ICHGTIME_ACC))
+		return;
+
+	nanotime(&tv);
+	if (flags & XFS_ICHGTIME_MOD) {
+		inode->i_mtime = tv;
+		ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_ACC) {
+		inode->i_atime = tv;
+		ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_CHG) {
+		inode->i_ctime = tv;
+		ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+
+	/*
+	 * We update the i_update_core field _after_ changing
+	 * the timestamps in order to coordinate properly with
+	 * xfs_iflush() so that we don't lose timestamp updates.
+	 * This keeps us from having to hold the inode lock
+	 * while doing this.  We use the SYNCHRONIZE macro to
+	 * ensure that the compiler does not reorder the update
+	 * of i_update_core above the timestamp updates above.
+	 */
+	SYNCHRONIZE();
+	ip->i_update_core = 1;
+	if (!(inode->i_state & I_LOCK))
+		mark_inode_dirty_sync(inode);
+}
+
+/*
+ * Variant on the above which avoids querying the system clock
+ * in situations where we know the Linux inode timestamps have
+ * just been updated (and so we can update our inode cheaply).
+ * We also skip the readonly and noatime checks here, they are
+ * also catered for already.
+ */
+void
+xfs_ichgtime_fast(
+	xfs_inode_t	*ip,
+	struct inode	*inode,
+	int		flags)
+{
+	timespec_t	*tvp;
+
+	/*
+	 * We're not supposed to change timestamps in readonly-mounted
+	 * filesystems.  Throw it away if anyone asks us.
+	 */
+	if (unlikely(IS_RDONLY(inode)))
+		return;
+
+	/*
+	 * Don't update access timestamps on reads if mounted "noatime".
+	 * Throw it away if anyone asks us.
+	 */
+	if (unlikely(
+	    (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
+	    ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
+			XFS_ICHGTIME_ACC)))
+		return;
+
+	if (flags & XFS_ICHGTIME_MOD) {
+		tvp = &inode->i_mtime;
+		ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
+		ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_ACC) {
+		tvp = &inode->i_atime;
+		ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_CHG) {
+		tvp = &inode->i_ctime;
+		ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
+		ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec;
+	}
+
+	/*
+	 * We update the i_update_core field _after_ changing
+	 * the timestamps in order to coordinate properly with
+	 * xfs_iflush() so that we don't lose timestamp updates.
+	 * This keeps us from having to hold the inode lock
+	 * while doing this.  We use the SYNCHRONIZE macro to
+	 * ensure that the compiler does not reorder the update
+	 * of i_update_core above the timestamp updates above.
+	 */
+	SYNCHRONIZE();
+	ip->i_update_core = 1;
+	if (!(inode->i_state & I_LOCK))
+		mark_inode_dirty_sync(inode);
+}
+
 
 /*
  * Pull the link count and size up from the xfs inode to the linux inode
@@ -140,7 +256,6 @@ linvfs_mknod(
 
 	memset(&va, 0, sizeof(va));
 	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-	va.va_type = IFTOVT(mode);
 	va.va_mode = mode;
 
 	switch (mode & S_IFMT) {
@@ -308,14 +423,13 @@ linvfs_symlink(
 	cvp = NULL;
 
 	memset(&va, 0, sizeof(va));
-	va.va_type = VLNK;
-	va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+	va.va_mode = S_IFLNK |
+		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
 	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
 	error = 0;
 	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
 	if (!error && cvp) {
-		ASSERT(cvp->v_type == VLNK);
 		ip = LINVFS_GET_IP(cvp);
 		d_instantiate(dentry, ip);
 		validate_fields(dir);
@@ -374,7 +488,7 @@ linvfs_rename(
  * we need to be very careful about how much stack we use.
  * uio is kmalloced for this reason...
  */
-STATIC int
+STATIC void *
 linvfs_follow_link(
 	struct dentry		*dentry,
 	struct nameidata	*nd)
@@ -391,14 +505,14 @@ linvfs_follow_link(
 	link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
 	if (!link) {
 		nd_set_link(nd, ERR_PTR(-ENOMEM));
-		return 0;
+		return NULL;
 	}
 
 	uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
 	if (!uio) {
 		kfree(link);
 		nd_set_link(nd, ERR_PTR(-ENOMEM));
-		return 0;
+		return NULL;
 	}
 
 	vp = LINVFS_GET_VP(dentry->d_inode);
@@ -422,12 +536,17 @@ linvfs_follow_link(
 	kfree(uio);
 
 	nd_set_link(nd, link);
-	return 0;
+	return NULL;
 }
 
-static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
+STATIC void
+linvfs_put_link(
+	struct dentry	*dentry,
+	struct nameidata *nd,
+	void		*p)
 {
-	char *s = nd_get_link(nd);
+	char		*s = nd_get_link(nd);
+
 	if (!IS_ERR(s))
 		kfree(s);
 }
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 6a69a62c36b..ee784b63acb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_IOPS_H__
 #define __XFS_IOPS_H__
@@ -48,4 +34,8 @@ extern void linvfs_unwritten_done(struct buffer_head *, int);
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
 
+struct xfs_inode;
+extern void xfs_ichgtime(struct xfs_inode *, int);
+extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int);
+
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 42dc5e4662e..44fed10af0d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_LINUX__
 #define __XFS_LINUX__
@@ -64,7 +50,6 @@
 #include <sema.h>
 #include <time.h>
 
-#include <support/qsort.h>
 #include <support/ktrace.h>
 #include <support/debug.h>
 #include <support/move.h>
@@ -104,6 +89,7 @@
 #include <xfs_stats.h>
 #include <xfs_sysctl.h>
 #include <xfs_iops.h>
+#include <xfs_aops.h>
 #include <xfs_super.h>
 #include <xfs_globals.h>
 #include <xfs_fs_subr.h>
@@ -197,10 +183,6 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 /* bytes to clicks */
 #define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
 
-#ifndef CELL_CAPABLE
-#define FSC_NOTIFY_NAME_CHANGED(vp)
-#endif
-
 #ifndef ENOATTR
 #define ENOATTR		ENODATA		/* Attribute not found */
 #endif
@@ -235,19 +217,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define Q_XSETPQLIM	XQM_CMD(10)	/* set projects disk limits */
 #define Q_XGETPQUOTA	XQM_CMD(11)	/* get projects disk limits */
 
-/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
-/* we may well need to fine-tune this if it ever becomes an issue.  */
-#define DQUOT_MAX_HEURISTIC	1024	/* NR_DQUOTS */
-#define ndquot			DQUOT_MAX_HEURISTIC
-
-/* IRIX uses the current size of the name cache to guess a good value */
-/* - this isn't the same but is a good enough starting point for now. */
-#define DQUOT_HASH_HEURISTIC	files_stat.nr_files
-
-/* IRIX inodes maintain the project ID also, zero this field on Linux */
-#define DEFAULT_PROJID	0
-#define dfltprid	DEFAULT_PROJID
-
+#define dfltprid	0
 #define MAXPATHLEN	1024
 
 #define MIN(a,b)	(min(a,b))
@@ -255,10 +225,17 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define howmany(x, y)	(((x)+((y)-1))/(y))
 #define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
 
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
+#define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()	dump_stack()
-
 #define xfs_itruncate_data(ip, off)	\
 	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+#define xfs_statvfs_fsid(statp, mp)	\
+	({ u64 id = huge_encode_dev((mp)->m_dev);	\
+	   __kernel_fsid_t *fsid = &(statp)->f_fsid;	\
+	(fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
 
 
 /* Move the kernel do_div definition off to one side */
@@ -371,6 +348,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
 	return(x * y);
 }
 
-#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
-
 #endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index acab58c4804..279e9bc92ab 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -1,44 +1,25 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-/*
- *  fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
- *
- */
-
 #include "xfs.h"
-
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -48,18 +29,17 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -302,7 +282,7 @@ xfs_read(
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	if (likely(!(ioflags & IO_INVIS)))
-		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+		xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
 
 unlock_isem:
 	if (unlikely(ioflags & IO_ISDIRECT))
@@ -367,7 +347,7 @@ xfs_sendfile(
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
 	if (likely(!(ioflags & IO_INVIS)))
-		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+		xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
 
 	return ret;
 }
@@ -660,9 +640,6 @@ xfs_write(
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
-		if (ioflags & IO_ISAIO)
-			return XFS_ERROR(-ENOSYS);
-
 		if ((pos & target->pbr_smask) || (count & target->pbr_smask))
 			return XFS_ERROR(-EINVAL);
 
@@ -735,15 +712,10 @@ start:
 		}
 	}
 
-	/*
-	 * On Linux, generic_file_write updates the times even if
-	 * no data is copied in so long as the write had a size.
-	 *
-	 * We must update xfs' times since revalidate will overcopy xfs.
-	 */
-	if (!(ioflags & IO_INVIS)) {
-		xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	if (likely(!(ioflags & IO_INVIS))) {
 		inode_update_time(inode, 1);
+		xfs_ichgtime_fast(xip, inode,
+				  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	}
 
 	/*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index f197a720e39..38864a88d42 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_LRW_H__
 #define __XFS_LRW_H__
@@ -70,9 +56,10 @@ struct xfs_iomap;
 #define	XFS_SENDFILE_ENTER	21
 #define	XFS_WRITEPAGE_ENTER	22
 #define	XFS_RELEASEPAGE_ENTER	23
-#define	XFS_IOMAP_ALLOC_ENTER	24
-#define	XFS_IOMAP_ALLOC_MAP	25
-#define	XFS_IOMAP_UNWRITTEN	26
+#define	XFS_INVALIDPAGE_ENTER	24
+#define	XFS_IOMAP_ALLOC_ENTER	25
+#define	XFS_IOMAP_ALLOC_MAP	26
+#define	XFS_IOMAP_UNWRITTEN	27
 extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
 				void *, size_t, loff_t, int);
 extern void xfs_inval_cached_trace(struct xfs_iocore *,
@@ -106,9 +93,4 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
 
 extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
 
-#define XFS_FSB_TO_DB_IO(io,fsb) \
-		(((io)->io_flags & XFS_IOCORE_RT) ? \
-		 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
-		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
-
 #endif	/* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index aaf5ddba47f..6c40a74be7c 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include <linux/proc_fs.h>
 
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 3f756a6c3eb..50027c4a561 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_STATS_H__
 #define __XFS_STATS_H__
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f6dd7de2592..6116b5bf433 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1,60 +1,45 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_clnt.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -70,11 +55,15 @@
 #include <linux/namei.h>
 #include <linux/init.h>
 #include <linux/mount.h>
+#include <linux/mempool.h>
 #include <linux/writeback.h>
+#include <linux/kthread.h>
 
 STATIC struct quotactl_ops linvfs_qops;
 STATIC struct super_operations linvfs_sops;
-STATIC kmem_zone_t *linvfs_inode_zone;
+STATIC kmem_zone_t *xfs_vnode_zone;
+STATIC kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
 
 STATIC struct xfs_mount_args *
 xfs_args_allocate(
@@ -138,24 +127,25 @@ STATIC __inline__ void
 xfs_set_inodeops(
 	struct inode		*inode)
 {
-	vnode_t			*vp = LINVFS_GET_VP(inode);
-
-	if (vp->v_type == VNON) {
-		vn_mark_bad(vp);
-	} else if (S_ISREG(inode->i_mode)) {
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
 		inode->i_op = &linvfs_file_inode_operations;
 		inode->i_fop = &linvfs_file_operations;
 		inode->i_mapping->a_ops = &linvfs_aops;
-	} else if (S_ISDIR(inode->i_mode)) {
+		break;
+	case S_IFDIR:
 		inode->i_op = &linvfs_dir_inode_operations;
 		inode->i_fop = &linvfs_dir_operations;
-	} else if (S_ISLNK(inode->i_mode)) {
+		break;
+	case S_IFLNK:
 		inode->i_op = &linvfs_symlink_inode_operations;
 		if (inode->i_blocks)
 			inode->i_mapping->a_ops = &linvfs_aops;
-	} else {
+		break;
+	default:
 		inode->i_op = &linvfs_file_inode_operations;
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		break;
 	}
 }
 
@@ -167,17 +157,24 @@ xfs_revalidate_inode(
 {
 	struct inode		*inode = LINVFS_GET_IP(vp);
 
-	inode->i_mode	= (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+	inode->i_mode	= ip->i_d.di_mode;
 	inode->i_nlink	= ip->i_d.di_nlink;
 	inode->i_uid	= ip->i_d.di_uid;
 	inode->i_gid	= ip->i_d.di_gid;
-	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		inode->i_rdev =
+			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+			      sysv_minor(ip->i_df.if_u2.if_rdev));
+		break;
+	default:
 		inode->i_rdev = 0;
-	} else {
-		xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
-		inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+		break;
 	}
-	inode->i_blksize = PAGE_CACHE_SIZE;
+
+	inode->i_blksize = xfs_preferred_iosize(mp);
 	inode->i_generation = ip->i_d.di_gen;
 	i_size_write(inode, ip->i_d.di_size);
 	inode->i_blocks =
@@ -231,7 +228,6 @@ xfs_initialize_vnode(
 	 * finish our work.
 	 */
 	if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
-		vp->v_type = IFTOVT(ip->i_d.di_mode);
 		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
 		xfs_set_inodeops(inode);
 	
@@ -267,6 +263,72 @@ xfs_blkdev_put(
 		close_bdev_excl(bdev);
 }
 
+/*
+ * Try to write out the superblock using barriers.
+ */
+STATIC int
+xfs_barrier_test(
+	xfs_mount_t	*mp)
+{
+	xfs_buf_t	*sbp = xfs_getsb(mp, 0);
+	int		error;
+
+	XFS_BUF_UNDONE(sbp);
+	XFS_BUF_UNREAD(sbp);
+	XFS_BUF_UNDELAYWRITE(sbp);
+	XFS_BUF_WRITE(sbp);
+	XFS_BUF_UNASYNC(sbp);
+	XFS_BUF_ORDERED(sbp);
+
+	xfsbdstrat(mp, sbp);
+	error = xfs_iowait(sbp);
+
+	/*
+	 * Clear all the flags we set and possible error state in the
+	 * buffer.  We only did the write to try out whether barriers
+	 * worked and shouldn't leave any traces in the superblock
+	 * buffer.
+	 */
+	XFS_BUF_DONE(sbp);
+	XFS_BUF_ERROR(sbp, 0);
+	XFS_BUF_UNORDERED(sbp);
+
+	xfs_buf_relse(sbp);
+	return error;
+}
+
+void
+xfs_mountfs_check_barriers(xfs_mount_t *mp)
+{
+	int error;
+
+	if (mp->m_logdev_targp != mp->m_ddev_targp) {
+		xfs_fs_cmn_err(CE_NOTE, mp,
+		  "Disabling barriers, not supported with external log device");
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+	}
+
+	if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+					QUEUE_ORDERED_NONE) {
+		xfs_fs_cmn_err(CE_NOTE, mp,
+		  "Disabling barriers, not supported by the underlying device");
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+	}
+
+	error = xfs_barrier_test(mp);
+	if (error) {
+		xfs_fs_cmn_err(CE_NOTE, mp,
+		  "Disabling barriers, trial barrier write failed");
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+	}
+}
+
+void
+xfs_blkdev_issue_flush(
+	xfs_buftarg_t		*buftarg)
+{
+	blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+}
 
 STATIC struct inode *
 linvfs_alloc_inode(
@@ -274,8 +336,7 @@ linvfs_alloc_inode(
 {
 	vnode_t			*vp;
 
-	vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 
-                kmem_flags_convert(KM_SLEEP));
+	vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
 	if (!vp)
 		return NULL;
 	return LINVFS_GET_IP(vp);
@@ -285,11 +346,11 @@ STATIC void
 linvfs_destroy_inode(
 	struct inode		*inode)
 {
-	kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+	kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
 }
 
 STATIC void
-init_once(
+linvfs_inode_init_once(
 	void			*data,
 	kmem_cache_t		*cachep,
 	unsigned long		flags)
@@ -302,21 +363,41 @@ init_once(
 }
 
 STATIC int
-init_inodecache( void )
+linvfs_init_zones(void)
 {
-	linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+	xfs_vnode_zone = kmem_cache_create("xfs_vnode",
 				sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
-				init_once, NULL);
-	if (linvfs_inode_zone == NULL)
-		return -ENOMEM;
+				linvfs_inode_init_once, NULL);
+	if (!xfs_vnode_zone)
+		goto out;
+
+	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+	if (!xfs_ioend_zone)
+		goto out_destroy_vnode_zone;
+
+	xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
+			mempool_alloc_slab, mempool_free_slab,
+			xfs_ioend_zone);
+	if (!xfs_ioend_pool)
+		goto out_free_ioend_zone;
+
 	return 0;
+
+
+ out_free_ioend_zone:
+	kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+	kmem_zone_destroy(xfs_vnode_zone);
+ out:
+	return -ENOMEM;
 }
 
 STATIC void
-destroy_inodecache( void )
+linvfs_destroy_zones(void)
 {
-	if (kmem_cache_destroy(linvfs_inode_zone))
-		printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+	mempool_destroy(xfs_ioend_pool);
+	kmem_zone_destroy(xfs_vnode_zone);
+	kmem_zone_destroy(xfs_ioend_zone);
 }
 
 /*
@@ -354,17 +435,38 @@ linvfs_clear_inode(
 	struct inode		*inode)
 {
 	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error, cache;
 
-	if (vp) {
-		vn_rele(vp);
-		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-		/*
-		 * Do all our cleanup, and remove this vnode.
-		 */
-		vn_remove(vp);
+	vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
+
+	XFS_STATS_INC(vn_rele);
+	XFS_STATS_INC(vn_remove);
+	XFS_STATS_INC(vn_reclaim);
+	XFS_STATS_DEC(vn_active);
+
+	/*
+	 * This can happen because xfs_iget_core calls xfs_idestroy if we
+	 * find an inode with di_mode == 0 but without IGET_CREATE set.
+	 */
+	if (vp->v_fbhv)
+		VOP_INACTIVE(vp, NULL, cache);
+
+	VN_LOCK(vp);
+	vp->v_flag &= ~VMODIFIED;
+	VN_UNLOCK(vp, 0);
+
+	if (vp->v_fbhv) {
+		VOP_RECLAIM(vp, error);
+		if (error)
+			panic("vn_purge: cannot reclaim");
 	}
-}
 
+	ASSERT(vp->v_fbhv == NULL);
+
+#ifdef XFS_VNODE_TRACE
+	ktrace_free(vp->v_trace);
+#endif
+}
 
 /*
  * Enqueue a work item to be picked up by the vfs xfssyncd thread.
@@ -416,7 +518,7 @@ xfs_flush_inode(
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
-	delay(HZ/2);
+	delay(msecs_to_jiffies(500));
 }
 
 /*
@@ -441,7 +543,7 @@ xfs_flush_device(
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
-	delay(HZ/2);
+	delay(msecs_to_jiffies(500));
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
@@ -466,25 +568,15 @@ xfssyncd(
 {
 	long			timeleft;
 	vfs_t			*vfsp = (vfs_t *) arg;
-	struct list_head	tmp;
 	struct vfs_sync_work	*work, *n;
+	LIST_HEAD		(tmp);
 
-	daemonize("xfssyncd");
-
-	vfsp->vfs_sync_work.w_vfs = vfsp;
-	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
-	vfsp->vfs_sync_task = current;
-	wmb();
-	wake_up(&vfsp->vfs_wait_sync_task);
-
-	INIT_LIST_HEAD(&tmp);
-	timeleft = (xfs_syncd_centisecs * HZ) / 100;
+	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		timeleft = schedule_timeout(timeleft);
+		timeleft = schedule_timeout_interruptible(timeleft);
 		/* swsusp */
 		try_to_freeze();
-		if (vfsp->vfs_flag & VFS_UMOUNT)
+		if (kthread_should_stop())
 			break;
 
 		spin_lock(&vfsp->vfs_sync_lock);
@@ -495,7 +587,8 @@ xfssyncd(
 		 */
 		if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
 			if (!timeleft)
-				timeleft = (xfs_syncd_centisecs * HZ) / 100;
+				timeleft = xfs_syncd_centisecs *
+							msecs_to_jiffies(10);
 			INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
 			list_add_tail(&vfsp->vfs_sync_work.w_list,
 					&vfsp->vfs_sync_list);
@@ -513,10 +606,6 @@ xfssyncd(
 		}
 	}
 
-	vfsp->vfs_sync_task = NULL;
-	wmb();
-	wake_up(&vfsp->vfs_wait_sync_task);
-
 	return 0;
 }
 
@@ -524,13 +613,11 @@ STATIC int
 linvfs_start_syncd(
 	vfs_t			*vfsp)
 {
-	int			pid;
-
-	pid = kernel_thread(xfssyncd, (void *) vfsp,
-			CLONE_VM | CLONE_FS | CLONE_FILES);
-	if (pid < 0)
-		return -pid;
-	wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
+	vfsp->vfs_sync_work.w_vfs = vfsp;
+	vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
+	if (IS_ERR(vfsp->vfs_sync_task))
+		return -PTR_ERR(vfsp->vfs_sync_task);
 	return 0;
 }
 
@@ -538,11 +625,7 @@ STATIC void
 linvfs_stop_syncd(
 	vfs_t			*vfsp)
 {
-	vfsp->vfs_flag |= VFS_UMOUNT;
-	wmb();
-
-	wake_up_process(vfsp->vfs_sync_task);
-	wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+	kthread_stop(vfsp->vfs_sync_task);
 }
 
 STATIC void
@@ -669,6 +752,18 @@ linvfs_show_options(
 }
 
 STATIC int
+linvfs_quotasync(
+	struct super_block	*sb,
+	int			type)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
+	return -error;
+}
+
+STATIC int
 linvfs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
@@ -836,6 +931,7 @@ STATIC struct super_operations linvfs_sops = {
 };
 
 STATIC struct quotactl_ops linvfs_qops = {
+	.quota_sync		= linvfs_quotasync,
 	.get_xstate		= linvfs_getxstate,
 	.set_xstate		= linvfs_setxstate,
 	.get_xquota		= linvfs_getxquota,
@@ -866,9 +962,9 @@ init_xfs_fs( void )
 
 	ktrace_init(64);
 
-	error = init_inodecache();
+	error = linvfs_init_zones();
 	if (error < 0)
-		goto undo_inodecache;
+		goto undo_zones;
 
 	error = pagebuf_init();
 	if (error < 0)
@@ -889,9 +985,9 @@ undo_register:
 	pagebuf_terminate();
 
 undo_pagebuf:
-	destroy_inodecache();
+	linvfs_destroy_zones();
 
-undo_inodecache:
+undo_zones:
 	return error;
 }
 
@@ -903,7 +999,7 @@ exit_xfs_fs( void )
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
 	pagebuf_terminate();
-	destroy_inodecache();
+	linvfs_destroy_zones();
 	ktrace_uninit();
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index ec7e0035c73..df59408dca0 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPER_H__
 #define __XFS_SUPER_H__
@@ -132,6 +118,7 @@ extern void xfs_flush_device(struct xfs_inode *);
 extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
 				struct block_device **);
 extern void xfs_blkdev_put(struct block_device *);
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern struct export_operations linvfs_export_ops;
 
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 0dc010356f4..a0256497242 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -1,44 +1,26 @@
 /*
- * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_rw.h"
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 
-
 static struct ctl_table_header *xfs_table_header;
 
-
 #ifdef CONFIG_PROC_FS
 STATIC int
 xfs_stats_clear_proc_handler(
@@ -76,7 +58,7 @@ xfs_stats_clear_proc_handler(
 STATIC ctl_table xfs_table[] = {
 	{XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
 
 	{XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
@@ -86,22 +68,22 @@ STATIC ctl_table xfs_table[] = {
 
 	{XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
 
 	{XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
 
 	{XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.error_level.min, &xfs_params.error_level.max},
 
 	{XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
 
 	{XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
@@ -118,7 +100,7 @@ STATIC ctl_table xfs_table[] = {
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
 	&sysctl_intvec, NULL,
 	&xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
-	
+
 	{XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
 	&sysctl_intvec, NULL,
@@ -136,14 +118,14 @@ STATIC ctl_table xfs_table[] = {
 
 	{XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
 	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
 
 	/* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
 	{XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
 	sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
-	&sysctl_intvec, NULL, 
+	&sysctl_intvec, NULL,
 	&xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
 #endif /* CONFIG_PROC_FS */
 
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a39a95020a5..bc8c11f1372 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #ifndef __XFS_SYSCTL_H__
 #define __XFS_SYSCTL_H__
 
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
index 96f96394417..f8d279d7563 100644
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ b/fs/xfs/linux-2.6/xfs_version.h
@@ -1,34 +1,22 @@
 /*
- * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#ifndef __XFS_VERSION_H__
+#define __XFS_VERSION_H__
 
 /*
  * Dummy file that can contain a timestamp to put into the
@@ -36,9 +24,6 @@
  * running
  */
 
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-
 #define XFS_VERSION_STRING "SGI XFS"
 
 #endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 669c6164495..c855d62e534 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -1,38 +1,22 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_macros.h"
 #include "xfs_inum.h"
 #include "xfs_log.h"
 #include "xfs_clnt.h"
@@ -251,7 +235,6 @@ vfs_allocate( void )
 	bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
 	INIT_LIST_HEAD(&vfsp->vfs_sync_list);
 	spin_lock_init(&vfsp->vfs_sync_lock);
-	init_waitqueue_head(&vfsp->vfs_wait_sync_task);
 	init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
 	return vfsp;
 }
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7ee1f714e9b..57caf9eddee 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_VFS_H__
 #define __XFS_VFS_H__
@@ -65,7 +51,6 @@ typedef struct vfs {
 	spinlock_t		vfs_sync_lock;	/* work item list lock */
 	int 			vfs_sync_seq;	/* sync thread generation no. */
 	wait_queue_head_t	vfs_wait_single_sync_task;
-	wait_queue_head_t	vfs_wait_sync_task;
 } vfs_t;
 
 #define vfs_fbhv		vfs_bh.bh_first	/* 1st on vfs behavior chain */
@@ -96,7 +81,7 @@ typedef enum {
 #define VFS_RDONLY		0x0001	/* read-only vfs */
 #define VFS_GRPID		0x0002	/* group-ID assigned from directory */
 #define VFS_DMI			0x0004	/* filesystem has the DMI enabled */
-#define VFS_UMOUNT		0x0008	/* unmount in progress */
+#define VFS_32BITINODES		0x0008	/* do not use inums above 32 bits */
 #define VFS_END			0x0008	/* max flag */
 
 #define SYNC_ATTR		0x0001	/* sync attributes */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 250cad54e89..e9bbcb4d624 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -1,38 +1,22 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 
-
 uint64_t vn_generation;		/* vnode generation number */
 DEFINE_SPINLOCK(vnumber_lock);
 
@@ -42,93 +26,32 @@ DEFINE_SPINLOCK(vnumber_lock);
  */
 #define NVSYNC                  37
 #define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
-sv_t vsync[NVSYNC];
-
-/*
- * Translate stat(2) file types to vnode types and vice versa.
- * Aware of numeric order of S_IFMT and vnode type values.
- */
-enum vtype iftovt_tab[] = {
-	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
-	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
-};
-
-u_short vttoif_tab[] = {
-	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
-};
-
+STATIC wait_queue_head_t vsync[NVSYNC];
 
 void
 vn_init(void)
 {
-	register sv_t *svp;
-	register int i;
+	int i;
 
-	for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
-		init_sv(svp, SV_DEFAULT, "vsy", i);
+	for (i = 0; i < NVSYNC; i++)
+		init_waitqueue_head(&vsync[i]);
 }
 
-/*
- * Clean a vnode of filesystem-specific data and prepare it for reuse.
- */
-STATIC int
-vn_reclaim(
+void
+vn_iowait(
 	struct vnode	*vp)
 {
-	int		error;
-
-	XFS_STATS_INC(vn_reclaim);
-	vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
-
-	/*
-	 * Only make the VOP_RECLAIM call if there are behaviors
-	 * to call.
-	 */
-	if (vp->v_fbhv) {
-		VOP_RECLAIM(vp, error);
-		if (error)
-			return -error;
-	}
-	ASSERT(vp->v_fbhv == NULL);
-
-	VN_LOCK(vp);
-	vp->v_flag &= (VRECLM|VWAIT);
-	VN_UNLOCK(vp, 0);
-
-	vp->v_type = VNON;
-	vp->v_fbhv = NULL;
-
-#ifdef XFS_VNODE_TRACE
-	ktrace_free(vp->v_trace);
-	vp->v_trace = NULL;
-#endif
+	wait_queue_head_t *wq = vptosync(vp);
 
-	return 0;
+	wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
 }
 
-STATIC void
-vn_wakeup(
-	struct vnode	*vp)
-{
-	VN_LOCK(vp);
-	if (vp->v_flag & VWAIT)
-		sv_broadcast(vptosync(vp));
-	vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
-	VN_UNLOCK(vp, 0);
-}
-
-int
-vn_wait(
+void
+vn_iowake(
 	struct vnode	*vp)
 {
-	VN_LOCK(vp);
-	if (vp->v_flag & (VINACT | VRECLM)) {
-		vp->v_flag |= VWAIT;
-		sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
-		return 1;
-	}
-	VN_UNLOCK(vp, 0);
-	return 0;
+	if (atomic_dec_and_test(&vp->v_iocount))
+		wake_up(vptosync(vp));
 }
 
 struct vnode *
@@ -154,6 +77,8 @@ vn_initialize(
 	/* Initialize the first behavior and the behavior chain head. */
 	vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
 
+	atomic_set(&vp->v_iocount, 0);
+
 #ifdef	XFS_VNODE_TRACE
 	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
 #endif	/* XFS_VNODE_TRACE */
@@ -163,30 +88,6 @@ vn_initialize(
 }
 
 /*
- * Get a reference on a vnode.
- */
-vnode_t *
-vn_get(
-	struct vnode	*vp,
-	vmap_t		*vmap)
-{
-	struct inode	*inode;
-
-	XFS_STATS_INC(vn_get);
-	inode = LINVFS_GET_IP(vp);
-	if (inode->i_state & I_FREEING)
-		return NULL;
-
-	inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
-	if (!inode)	/* Inode not present */
-		return NULL;
-
-	vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
-
-	return vp;
-}
-
-/*
  * Revalidate the Linux inode from the vattr.
  * Note: i_size _not_ updated; we must hold the inode
  * semaphore when doing that - callers responsibility.
@@ -198,7 +99,7 @@ vn_revalidate_core(
 {
 	struct inode	*inode = LINVFS_GET_IP(vp);
 
-	inode->i_mode	    = VTTOIF(vap->va_type) | vap->va_mode;
+	inode->i_mode	    = vap->va_mode;
 	inode->i_nlink	    = vap->va_nlink;
 	inode->i_uid	    = vap->va_uid;
 	inode->i_gid	    = vap->va_gid;
@@ -206,6 +107,7 @@ vn_revalidate_core(
 	inode->i_mtime	    = vap->va_mtime;
 	inode->i_ctime	    = vap->va_ctime;
 	inode->i_atime	    = vap->va_atime;
+	inode->i_blksize    = vap->va_blocksize;
 	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;
 	else
@@ -247,71 +149,6 @@ vn_revalidate(
 }
 
 /*
- * purge a vnode from the cache
- * At this point the vnode is guaranteed to have no references (vn_count == 0)
- * The caller has to make sure that there are no ways someone could
- * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
- */
-void
-vn_purge(
-	struct vnode	*vp,
-	vmap_t		*vmap)
-{
-	vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
-
-again:
-	/*
-	 * Check whether vp has already been reclaimed since our caller
-	 * sampled its version while holding a filesystem cache lock that
-	 * its VOP_RECLAIM function acquires.
-	 */
-	VN_LOCK(vp);
-	if (vp->v_number != vmap->v_number) {
-		VN_UNLOCK(vp, 0);
-		return;
-	}
-
-	/*
-	 * If vp is being reclaimed or inactivated, wait until it is inert,
-	 * then proceed.  Can't assume that vnode is actually reclaimed
-	 * just because the reclaimed flag is asserted -- a vn_alloc
-	 * reclaim can fail.
-	 */
-	if (vp->v_flag & (VINACT | VRECLM)) {
-		ASSERT(vn_count(vp) == 0);
-		vp->v_flag |= VWAIT;
-		sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
-		goto again;
-	}
-
-	/*
-	 * Another process could have raced in and gotten this vnode...
-	 */
-	if (vn_count(vp) > 0) {
-		VN_UNLOCK(vp, 0);
-		return;
-	}
-
-	XFS_STATS_DEC(vn_active);
-	vp->v_flag |= VRECLM;
-	VN_UNLOCK(vp, 0);
-
-	/*
-	 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
-	 * vp's filesystem to flush and invalidate all cached resources.
-	 * When vn_reclaim returns, vp should have no private data,
-	 * either in a system cache or attached to v_data.
-	 */
-	if (vn_reclaim(vp) != 0)
-		panic("vn_purge: cannot reclaim");
-
-	/*
-	 * Wakeup anyone waiting for vp to be reclaimed.
-	 */
-	vn_wakeup(vp);
-}
-
-/*
  * Add a reference to a referenced vnode.
  */
 struct vnode *
@@ -330,80 +167,6 @@ vn_hold(
 	return vp;
 }
 
-/*
- *  Call VOP_INACTIVE on last reference.
- */
-void
-vn_rele(
-	struct vnode	*vp)
-{
-	int		vcnt;
-	int		cache;
-
-	XFS_STATS_INC(vn_rele);
-
-	VN_LOCK(vp);
-
-	vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
-	vcnt = vn_count(vp);
-
-	/*
-	 * Since we always get called from put_inode we know
-	 * that i_count won't be decremented after we
-	 * return.
-	 */
-	if (!vcnt) {
-		/*
-		 * As soon as we turn this on, noone can find us in vn_get
-		 * until we turn off VINACT or VRECLM
-		 */
-		vp->v_flag |= VINACT;
-		VN_UNLOCK(vp, 0);
-
-		/*
-		 * Do not make the VOP_INACTIVE call if there
-		 * are no behaviors attached to the vnode to call.
-		 */
-		if (vp->v_fbhv)
-			VOP_INACTIVE(vp, NULL, cache);
-
-		VN_LOCK(vp);
-		if (vp->v_flag & VWAIT)
-			sv_broadcast(vptosync(vp));
-
-		vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
-	}
-
-	VN_UNLOCK(vp, 0);
-
-	vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
-}
-
-/*
- * Finish the removal of a vnode.
- */
-void
-vn_remove(
-	struct vnode	*vp)
-{
-	vmap_t		vmap;
-
-	/* Make sure we don't do this to the same vnode twice */
-	if (!(vp->v_fbhv))
-		return;
-
-	XFS_STATS_INC(vn_remove);
-	vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
-
-	/*
-	 * After the following purge the vnode
-	 * will no longer exist.
-	 */
-	VMAP(vp, vmap);
-	vn_purge(vp, &vmap);
-}
-
-
 #ifdef	XFS_VNODE_TRACE
 
 #define KTRACE_ENTER(vp, vk, s, line, ra)			\
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index a6e57c647be..f2bbb327c08 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *
  * Portions Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -65,10 +51,6 @@ struct vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
 
-/*
- * Vnode types.  VNON means no type.
- */
-enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
 
 typedef xfs_ino_t vnumber_t;
 typedef struct dentry vname_t;
@@ -77,15 +59,14 @@ typedef bhv_head_t vn_bhv_head_t;
 /*
  * MP locking protocols:
  *	v_flag, v_vfsp				VN_LOCK/VN_UNLOCK
- *	v_type					read-only or fs-dependent
  */
 typedef struct vnode {
 	__u32		v_flag;			/* vnode flags (see below) */
-	enum vtype	v_type;			/* vnode type */
 	struct vfs	*v_vfsp;		/* ptr to containing VFS */
 	vnumber_t	v_number;		/* in-core vnode number */
 	vn_bhv_head_t	v_bh;			/* behavior head */
 	spinlock_t	v_lock;			/* VN_LOCK/VN_UNLOCK */
+	atomic_t	v_iocount;		/* outstanding I/O count */
 #ifdef XFS_VNODE_TRACE
 	struct ktrace	*v_trace;		/* trace header structure    */
 #endif
@@ -93,6 +74,12 @@ typedef struct vnode {
 	/* inode MUST be last */
 } vnode_t;
 
+#define VN_ISLNK(vp)	S_ISLNK((vp)->v_inode.i_mode)
+#define VN_ISREG(vp)	S_ISREG((vp)->v_inode.i_mode)
+#define VN_ISDIR(vp)	S_ISDIR((vp)->v_inode.i_mode)
+#define VN_ISCHR(vp)	S_ISCHR((vp)->v_inode.i_mode)
+#define VN_ISBLK(vp)	S_ISBLK((vp)->v_inode.i_mode)
+
 #define v_fbhv			v_bh.bh_first	       /* first behavior */
 #define v_fops			v_bh.bh_first->bd_ops  /* first behavior ops */
 
@@ -133,22 +120,8 @@ typedef enum {
 #define LINVFS_GET_IP(vp)	(&(vp)->v_inode)
 
 /*
- * Convert between vnode types and inode formats (since POSIX.1
- * defines mode word of stat structure in terms of inode formats).
- */
-extern enum vtype	iftovt_tab[];
-extern u_short		vttoif_tab[];
-#define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
-#define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
-#define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
-
-
-/*
  * Vnode flags.
  */
-#define VINACT		       0x1	/* vnode is being inactivated	*/
-#define VRECLM		       0x2	/* vnode is being reclaimed	*/
-#define VWAIT		       0x4	/* waiting for VINACT/VRECLM to end */
 #define VMODIFIED	       0x8	/* XFS inode state possibly differs */
 					/* to the Linux inode state.	*/
 
@@ -229,11 +202,12 @@ typedef void	(*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
 typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
 				struct xfs_iomap *, int *);
 typedef int	(*vop_reclaim_t)(bhv_desc_t *);
-typedef int	(*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
-				struct cred *);
-typedef	int	(*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
-				struct cred *);
-typedef	int	(*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
+typedef int	(*vop_attr_get_t)(bhv_desc_t *, const char *, char *, int *,
+				int, struct cred *);
+typedef	int	(*vop_attr_set_t)(bhv_desc_t *, const char *, char *, int,
+				int, struct cred *);
+typedef	int	(*vop_attr_remove_t)(bhv_desc_t *, const char *,
+				int, struct cred *);
 typedef	int	(*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
 				struct attrlist_cursor_kern *, struct cred *);
 typedef void	(*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
@@ -408,7 +382,6 @@ typedef struct vnodeops {
  */
 typedef struct vattr {
 	int		va_mask;	/* bit-mask of attributes present */
-	enum vtype	va_type;	/* vnode type (for create) */
 	mode_t		va_mode;	/* file access mode and type */
 	xfs_nlink_t	va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
@@ -498,27 +471,12 @@ typedef struct vattr {
  * Check whether mandatory file locking is enabled.
  */
 #define MANDLOCK(vp, mode)	\
-	((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+	(VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 
 extern void	vn_init(void);
-extern int	vn_wait(struct vnode *);
 extern vnode_t	*vn_initialize(struct inode *);
 
 /*
- * Acquiring and invalidating vnodes:
- *
- *	if (vn_get(vp, version, 0))
- *		...;
- *	vn_purge(vp, version);
- *
- * vn_get and vn_purge must be called with vmap_t arguments, sampled
- * while a lock that the vnode's VOP_RECLAIM function acquires is
- * held, to ensure that the vnode sampled with the lock held isn't
- * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
- * and the subsequent vn_get or vn_purge.
- */
-
-/*
  * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
  */
 typedef struct vnode_map {
@@ -531,11 +489,11 @@ typedef struct vnode_map {
 			 (vmap).v_number = (vp)->v_number,	\
 			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
 
-extern void	vn_purge(struct vnode *, vmap_t *);
-extern vnode_t	*vn_get(struct vnode *, vmap_t *);
 extern int	vn_revalidate(struct vnode *);
 extern void	vn_revalidate_core(struct vnode *, vattr_t *);
-extern void	vn_remove(struct vnode *);
+
+extern void	vn_iowait(struct vnode *vp);
+extern void	vn_iowake(struct vnode *vp);
 
 static inline int vn_count(struct vnode *vp)
 {
@@ -546,7 +504,6 @@ static inline int vn_count(struct vnode *vp)
  * Vnode reference counting functions (and macros for compatibility).
  */
 extern vnode_t	*vn_hold(struct vnode *);
-extern void	vn_rele(struct vnode *);
 
 #if defined(XFS_VNODE_TRACE)
 #define VN_HOLD(vp)		\
@@ -560,6 +517,12 @@ extern void	vn_rele(struct vnode *);
 #define VN_RELE(vp)		(iput(LINVFS_GET_IP(vp)))
 #endif
 
+static inline struct vnode *vn_grab(struct vnode *vp)
+{
+	struct inode *inode = igrab(LINVFS_GET_IP(vp));
+	return inode ? LINVFS_GET_VP(inode) : NULL;
+}
+
 /*
  * Vname handling macros.
  */
@@ -590,13 +553,6 @@ static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
 }
 
 /*
- * Update modify/access/change times on the vnode
- */
-#define VN_MTIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_mtime = *(tvp))
-#define VN_ATIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_atime = *(tvp))
-#define VN_CTIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_ctime = *(tvp))
-
-/*
  * Dealing with bad inodes
  */
 static inline void vn_mark_bad(struct vnode *vp)
@@ -627,6 +583,7 @@ static inline int VN_BAD(struct vnode *vp)
 #define	ATTR_LAZY	0x80	/* set/get attributes lazily */
 #define	ATTR_NONBLOCK	0x100	/* return EAGAIN if operation would block */
 #define ATTR_NOLOCK	0x200	/* Don't grab any conflicting locks */
+#define ATTR_NOSIZETOK	0x400	/* Don't get the SIZE token */
 
 /*
  * Flags to VOP_FSYNC and VOP_RECLAIM.
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 46ce1e3ce1d..00b5043dfa5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,18 +29,17 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -66,7 +51,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
 #include "xfs_trans_priv.h"
-
 #include "xfs_qm.h"
 
 
@@ -112,7 +96,7 @@ xfs_qm_dqinit(
 
 	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
 	dqp->dq_flags = type;
-	INT_SET(dqp->q_core.d_id, ARCH_CONVERT, id);
+	dqp->q_core.d_id = cpu_to_be32(id);
 	dqp->q_mount = mp;
 
 	/*
@@ -194,10 +178,10 @@ xfs_qm_dqinit_core(
 	/*
 	 * Caller has zero'd the entire dquot 'chunk' already.
 	 */
-	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
-	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
-	INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
-	INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
+	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+	d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+	d->dd_diskdq.d_id = cpu_to_be32(id);
+	d->dd_diskdq.d_flags = type;
 }
 
 
@@ -227,19 +211,13 @@ __xfs_dqtrace_entry(
 		     (void *)(__psint_t)dqp->q_nrefs,
 		     (void *)(__psint_t)dqp->dq_flags,
 		     (void *)(__psint_t)dqp->q_res_bcount,
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_bcount,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_icount,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_hardlimit,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_softlimit,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_hardlimit,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_softlimit,
-						ARCH_CONVERT),
-		     (void *)(__psint_t)INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
+		     (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
+		     (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
 		     (void *)(__psint_t)current_pid(),
 		     (void *)(__psint_t)ino,
 		     (void *)(__psint_t)retaddr,
@@ -264,17 +242,17 @@ xfs_qm_adjust_dqlimits(
 	ASSERT(d->d_id);
 
 	if (q->qi_bsoftlimit && !d->d_blk_softlimit)
-		INT_SET(d->d_blk_softlimit, ARCH_CONVERT, q->qi_bsoftlimit);
+		d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
 	if (q->qi_bhardlimit && !d->d_blk_hardlimit)
-		INT_SET(d->d_blk_hardlimit, ARCH_CONVERT, q->qi_bhardlimit);
+		d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
 	if (q->qi_isoftlimit && !d->d_ino_softlimit)
-		INT_SET(d->d_ino_softlimit, ARCH_CONVERT, q->qi_isoftlimit);
+		d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
 	if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-		INT_SET(d->d_ino_hardlimit, ARCH_CONVERT, q->qi_ihardlimit);
+		d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
 	if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-		INT_SET(d->d_rtb_softlimit, ARCH_CONVERT, q->qi_rtbsoftlimit);
+		d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
 	if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-		INT_SET(d->d_rtb_hardlimit, ARCH_CONVERT, q->qi_rtbhardlimit);
+		d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
 }
 
 /*
@@ -298,81 +276,81 @@ xfs_qm_adjust_dqtimers(
 	ASSERT(d->d_id);
 
 #ifdef QUOTADEBUG
-	if (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT))
-		ASSERT(INT_GET(d->d_blk_softlimit, ARCH_CONVERT) <=
-			INT_GET(d->d_blk_hardlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT))
-		ASSERT(INT_GET(d->d_ino_softlimit, ARCH_CONVERT) <=
-			INT_GET(d->d_ino_hardlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT))
-		ASSERT(INT_GET(d->d_rtb_softlimit, ARCH_CONVERT) <=
-			INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT));
+	if (d->d_blk_hardlimit)
+		ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
+		       be64_to_cpu(d->d_blk_hardlimit));
+	if (d->d_ino_hardlimit)
+		ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
+		       be64_to_cpu(d->d_ino_hardlimit));
+	if (d->d_rtb_hardlimit)
+		ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
+		       be64_to_cpu(d->d_rtb_hardlimit));
 #endif
 	if (!d->d_btimer) {
-		if ((INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) >=
-				INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) ||
-		    (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) >=
-				INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
-			INT_SET(d->d_btimer, ARCH_CONVERT,
-				get_seconds() + XFS_QI_BTIMELIMIT(mp));
+		if ((d->d_blk_softlimit &&
+		     (be64_to_cpu(d->d_bcount) >=
+		      be64_to_cpu(d->d_blk_softlimit))) ||
+		    (d->d_blk_hardlimit &&
+		     (be64_to_cpu(d->d_bcount) >=
+		      be64_to_cpu(d->d_blk_hardlimit)))) {
+			d->d_btimer = cpu_to_be32(get_seconds() +
+					XFS_QI_BTIMELIMIT(mp));
 		} else {
 			d->d_bwarns = 0;
 		}
 	} else {
 		if ((!d->d_blk_softlimit ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) <
-				INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) &&
+		     (be64_to_cpu(d->d_bcount) <
+		      be64_to_cpu(d->d_blk_softlimit))) &&
 		    (!d->d_blk_hardlimit ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) <
-				INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
+		    (be64_to_cpu(d->d_bcount) <
+		     be64_to_cpu(d->d_blk_hardlimit)))) {
 			d->d_btimer = 0;
 		}
 	}
 
 	if (!d->d_itimer) {
-		if ((INT_GET(d->d_ino_softlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_icount, ARCH_CONVERT) >=
-				INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) ||
-		    (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_icount, ARCH_CONVERT) >=
-				INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
-			INT_SET(d->d_itimer, ARCH_CONVERT,
-				get_seconds() + XFS_QI_ITIMELIMIT(mp));
+		if ((d->d_ino_softlimit &&
+		     (be64_to_cpu(d->d_icount) >=
+		      be64_to_cpu(d->d_ino_softlimit))) ||
+		    (d->d_ino_hardlimit &&
+		     (be64_to_cpu(d->d_icount) >=
+		      be64_to_cpu(d->d_ino_hardlimit)))) {
+			d->d_itimer = cpu_to_be32(get_seconds() +
+					XFS_QI_ITIMELIMIT(mp));
 		} else {
 			d->d_iwarns = 0;
 		}
 	} else {
 		if ((!d->d_ino_softlimit ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) <
-				INT_GET(d->d_ino_softlimit, ARCH_CONVERT)))  &&
+		     (be64_to_cpu(d->d_icount) <
+		      be64_to_cpu(d->d_ino_softlimit)))  &&
 		    (!d->d_ino_hardlimit ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) <
-				INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
+		     (be64_to_cpu(d->d_icount) <
+		      be64_to_cpu(d->d_ino_hardlimit)))) {
 			d->d_itimer = 0;
 		}
 	}
 
 	if (!d->d_rtbtimer) {
-		if ((INT_GET(d->d_rtb_softlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
-				INT_GET(d->d_rtb_softlimit, ARCH_CONVERT))) ||
-		    (INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT) &&
-		    (INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
-				INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
-			INT_SET(d->d_rtbtimer, ARCH_CONVERT,
-				get_seconds() + XFS_QI_RTBTIMELIMIT(mp));
+		if ((d->d_rtb_softlimit &&
+		     (be64_to_cpu(d->d_rtbcount) >=
+		      be64_to_cpu(d->d_rtb_softlimit))) ||
+		    (d->d_rtb_hardlimit &&
+		     (be64_to_cpu(d->d_rtbcount) >=
+		      be64_to_cpu(d->d_rtb_hardlimit)))) {
+			d->d_rtbtimer = cpu_to_be32(get_seconds() +
+					XFS_QI_RTBTIMELIMIT(mp));
 		} else {
 			d->d_rtbwarns = 0;
 		}
 	} else {
 		if ((!d->d_rtb_softlimit ||
-		    (INT_GET(d->d_rtbcount, ARCH_CONVERT) <
-				INT_GET(d->d_rtb_softlimit, ARCH_CONVERT))) &&
+		     (be64_to_cpu(d->d_rtbcount) <
+		      be64_to_cpu(d->d_rtb_softlimit))) &&
 		    (!d->d_rtb_hardlimit ||
-		    (INT_GET(d->d_rtbcount, ARCH_CONVERT) <
-				INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
+		     (be64_to_cpu(d->d_rtbcount) <
+		      be64_to_cpu(d->d_rtb_hardlimit)))) {
 			d->d_rtbtimer = 0;
 		}
 	}
@@ -421,7 +399,7 @@ xfs_qm_init_dquot_blk(
  */
 STATIC int
 xfs_qm_dqalloc(
-	xfs_trans_t	*tp,
+	xfs_trans_t	**tpp,
 	xfs_mount_t	*mp,
 	xfs_dquot_t	*dqp,
 	xfs_inode_t	*quotip,
@@ -433,6 +411,7 @@ xfs_qm_dqalloc(
 	xfs_bmbt_irec_t map;
 	int		nmaps, error, committed;
 	xfs_buf_t	*bp;
+	xfs_trans_t	*tp = *tpp;
 
 	ASSERT(tp != NULL);
 	xfs_dqtrace_entry(dqp, "DQALLOC");
@@ -489,13 +468,35 @@ xfs_qm_dqalloc(
 	 * Make a chunk of dquots out of this buffer and log
 	 * the entire thing.
 	 */
-	xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
+	xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
 			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 
-	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
+	/*
+	 * xfs_bmap_finish() may commit the current transaction and
+	 * start a second transaction if the freelist is not empty.
+	 *
+	 * Since we still want to modify this buffer, we need to
+	 * ensure that the buffer is not released on commit of
+	 * the first transaction and ensure the buffer is added to the
+	 * second transaction.
+	 *
+	 * If there is only one transaction then don't stop the buffer
+	 * from being released when it commits later on.
+	 */
+
+	xfs_trans_bhold(tp, bp);
+
+	if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
 		goto error1;
 	}
 
+	if (committed) {
+		tp = *tpp;
+		xfs_trans_bjoin(tp, bp);
+	} else {
+		xfs_trans_bhold_release(tp, bp);
+	}
+
 	*O_bpp = bp;
 	return 0;
 
@@ -514,7 +515,7 @@ xfs_qm_dqalloc(
  */
 STATIC int
 xfs_qm_dqtobp(
-	xfs_trans_t		*tp,
+	xfs_trans_t		**tpp,
 	xfs_dquot_t		*dqp,
 	xfs_disk_dquot_t	**O_ddpp,
 	xfs_buf_t		**O_bpp,
@@ -528,9 +529,10 @@ xfs_qm_dqtobp(
 	xfs_disk_dquot_t *ddq;
 	xfs_dqid_t	id;
 	boolean_t	newdquot;
+	xfs_trans_t	*tp = (tpp ? *tpp : NULL);
 
 	mp = dqp->q_mount;
-	id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+	id = be32_to_cpu(dqp->q_core.d_id);
 	nmaps = 1;
 	newdquot = B_FALSE;
 
@@ -539,8 +541,7 @@ xfs_qm_dqtobp(
 	 */
 	if (dqp->q_blkno == (xfs_daddr_t) 0) {
 		/* We use the id as an index */
-		dqp->q_fileoffset = (xfs_fileoff_t) ((uint)id /
-						     XFS_QM_DQPERBLK(mp));
+		dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp);
 		nmaps = 1;
 		quotip = XFS_DQ_TO_QIP(dqp);
 		xfs_ilock(quotip, XFS_ILOCK_SHARED);
@@ -579,9 +580,10 @@ xfs_qm_dqtobp(
 				return (ENOENT);
 
 			ASSERT(tp);
-			if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip,
+			if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
 						dqp->q_fileoffset, &bp)))
 				return (error);
+			tp = *tpp;
 			newdquot = B_TRUE;
 		} else {
 			/*
@@ -645,7 +647,7 @@ xfs_qm_dqtobp(
 /* ARGSUSED */
 STATIC int
 xfs_qm_dqread(
-	xfs_trans_t	*tp,
+	xfs_trans_t	**tpp,
 	xfs_dqid_t	id,
 	xfs_dquot_t	*dqp,	/* dquot to get filled in */
 	uint		flags)
@@ -653,28 +655,32 @@ xfs_qm_dqread(
 	xfs_disk_dquot_t *ddqp;
 	xfs_buf_t	 *bp;
 	int		 error;
+	xfs_trans_t	 *tp;
+
+	ASSERT(tpp);
 
 	/*
 	 * get a pointer to the on-disk dquot and the buffer containing it
 	 * dqp already knows its own type (GROUP/USER).
 	 */
 	xfs_dqtrace_entry(dqp, "DQREAD");
-	if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) {
+	if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
 		return (error);
 	}
+	tp = *tpp;
 
 	/* copy everything from disk dquot to the incore dquot */
 	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-	ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+	ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
 	xfs_qm_dquot_logitem_init(dqp);
 
 	/*
 	 * Reservation counters are defined as reservation plus current usage
 	 * to avoid having to add everytime.
 	 */
-	dqp->q_res_bcount = INT_GET(ddqp->d_bcount, ARCH_CONVERT);
-	dqp->q_res_icount = INT_GET(ddqp->d_icount, ARCH_CONVERT);
-	dqp->q_res_rtbcount = INT_GET(ddqp->d_rtbcount, ARCH_CONVERT);
+	dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
+	dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
+	dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
 
 	/* Mark the buf so that this will stay incore a little longer */
 	XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
@@ -740,7 +746,7 @@ xfs_qm_idtodq(
 	 * Read it from disk; xfs_dqread() takes care of
 	 * all the necessary initialization of dquot's fields (locks, etc)
 	 */
-	if ((error = xfs_qm_dqread(tp, id, dqp, flags))) {
+	if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
 		/*
 		 * This can happen if quotas got turned off (ESRCH),
 		 * or if the dquot didn't exist on disk and we ask to
@@ -800,7 +806,7 @@ xfs_qm_dqlookup(
 		 * dqlock to look at the id field of the dquot, since the
 		 * id can't be modified without the hashlock anyway.
 		 */
-		if (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id && dqp->q_mount == mp) {
+		if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
 			xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
 			/*
 			 * All in core dquots must be on the dqlist of mp
@@ -831,7 +837,7 @@ xfs_qm_dqlookup(
 			 * id couldn't have changed; we had the hashlock all
 			 * along
 			 */
-			ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+			ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
 
 			if (flist_locked) {
 				if (dqp->q_nrefs != 0) {
@@ -1253,7 +1259,7 @@ xfs_qm_dqflush(
 		return (error);
 	}
 
-	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT),
+	if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
 			   0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
 		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
 		return XFS_ERROR(EIO);
@@ -1406,8 +1412,8 @@ xfs_dqlock2(
 {
 	if (d1 && d2) {
 		ASSERT(d1 != d2);
-		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) >
-		    INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
+		if (be32_to_cpu(d1->q_core.d_id) >
+		    be32_to_cpu(d2->q_core.d_id)) {
 			xfs_dqlock(d2);
 			xfs_dqlock(d1);
 		} else {
@@ -1529,33 +1535,33 @@ xfs_qm_dqprint(xfs_dquot_t *dqp)
 {
 	cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
 	cmn_err(CE_DEBUG, "---- dquotID =  %d",
-		(int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+		(int)be32_to_cpu(dqp->q_core.d_id));
 	cmn_err(CE_DEBUG, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
 	cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
 	cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
 	cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
 	cmn_err(CE_DEBUG, "---- blkhlimit =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
-		(int) INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_blk_hardlimit),
+		(int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
 	cmn_err(CE_DEBUG, "---- blkslimit =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
-		(int)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_blk_softlimit),
+		(int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
 	cmn_err(CE_DEBUG, "---- inohlimit =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
-		(int)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_ino_hardlimit),
+		(int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
 	cmn_err(CE_DEBUG, "---- inoslimit =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
-		(int)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_ino_softlimit),
+		(int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
 	cmn_err(CE_DEBUG, "---- bcount  =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
-		(int)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_bcount),
+		(int)be64_to_cpu(dqp->q_core.d_bcount));
 	cmn_err(CE_DEBUG, "---- icount  =  %Lu (0x%x)",
-		INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
-		(int)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+		be64_to_cpu(dqp->q_core.d_icount),
+		(int)be64_to_cpu(dqp->q_core.d_icount));
 	cmn_err(CE_DEBUG, "---- btimer  =  %d",
-		(int)INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT));
+		(int)be32_to_cpu(dqp->q_core.d_btimer));
 	cmn_err(CE_DEBUG, "---- itimer  =  %d",
-		(int)INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT));
+		(int)be32_to_cpu(dqp->q_core.d_itimer));
 	cmn_err(CE_DEBUG, "---------------------------");
 }
 #endif
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 39175103c8e..c0c629663a5 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DQUOT_H__
 #define __XFS_DQUOT_H__
@@ -113,20 +99,6 @@ typedef struct xfs_dquot {
 
 #define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
 
-/*
- * Quota Accounting/Enforcement flags
- */
-#define XFS_ALL_QUOTA_ACCT	\
-		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
-
-#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
-#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
-
 #ifdef DEBUG
 static inline int
 XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f5271b7b1e8..2f69822344e 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,18 +29,17 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -65,10 +50,8 @@
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
-
 #include "xfs_qm.h"
 
-
 /*
  * returns the number of iovecs needed to log the given dquot item.
  */
@@ -467,7 +450,7 @@ xfs_qm_dquot_logitem_init(
 	lp->qli_item.li_mountp = dqp->q_mount;
 	lp->qli_dquot = dqp;
 	lp->qli_format.qlf_type = XFS_LI_DQUOT;
-	lp->qli_format.qlf_id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+	lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
 	lp->qli_format.qlf_blkno = dqp->q_blkno;
 	lp->qli_format.qlf_len = 1;
 	/*
@@ -509,6 +492,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t	*qf,
 
 	log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
 	log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+	XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF);
 	qf->qql_format.qf_size = 1;
 }
 
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
index 9c6500dabca..5a632531f84 100644
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ b/fs/xfs/quota/xfs_dquot_item.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DQUOT_ITEM_H__
 #define __XFS_DQUOT_ITEM_H__
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f665ca8f9e9..1aea42d71a6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_clnt.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -44,21 +30,20 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_itable.h"
+#include "xfs_bmap.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
@@ -67,7 +52,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
 #include "xfs_utils.h"
-
 #include "xfs_qm.h"
 
 /*
@@ -76,8 +60,9 @@
  * quota functionality, including maintaining the freelist and hash
  * tables of dquots.
  */
-mutex_t xfs_Gqm_lock;
+mutex_t		xfs_Gqm_lock;
 struct xfs_qm	*xfs_Gqm;
+uint		ndquot;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
@@ -107,10 +92,10 @@ extern mutex_t	qcheck_lock;
 	for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
 		cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
 				  "bcnt = %d, icnt = %d, refs = %d", \
-			++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
+			++i, (int) be32_to_cpu(dqp->q_core.d_id), \
 			DQFLAGTO_TYPESTR(dqp),	     \
-			(int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
-			(int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
+			(int) be64_to_cpu(dqp->q_core.d_bcount), \
+			(int) be64_to_cpu(dqp->q_core.d_icount), \
 			(int) dqp->q_nrefs);  } \
 }
 #else
@@ -124,25 +109,25 @@ extern mutex_t	qcheck_lock;
 STATIC struct xfs_qm *
 xfs_Gqm_init(void)
 {
-	xfs_qm_t		*xqm;
-	int			hsize, i;
-
-	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-	ASSERT(xqm);
+	xfs_dqhash_t	*udqhash, *gdqhash;
+	xfs_qm_t	*xqm;
+	uint		i, hsize, flags = KM_SLEEP | KM_MAYFAIL;
 
 	/*
 	 * Initialize the dquot hash tables.
 	 */
-	hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
-		XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
-	xqm->qm_dqhashmask = hsize - 1;
+	hsize = XFS_QM_HASHSIZE_HIGH;
+	while (!(udqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), flags))) {
+		if ((hsize >>= 1) <= XFS_QM_HASHSIZE_LOW)
+			flags = KM_SLEEP;
+	}
+	gdqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), KM_SLEEP);
+	ndquot = hsize << 8;
 
-	xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
-						      sizeof(xfs_dqhash_t),
-						      KM_SLEEP);
-	xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
-						      sizeof(xfs_dqhash_t),
-						      KM_SLEEP);
+	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+	xqm->qm_dqhashmask = hsize - 1;
+	xqm->qm_usr_dqhtable = udqhash;
+	xqm->qm_grp_dqhtable = gdqhash;
 	ASSERT(xqm->qm_usr_dqhtable != NULL);
 	ASSERT(xqm->qm_grp_dqhtable != NULL);
 
@@ -365,16 +350,6 @@ xfs_qm_mount_quotas(
 	int		error = 0;
 	uint		sbf;
 
-	/*
-	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o uquota/pquota/gquota options, revoke the
-	 * quotachecked license, and bail out.
-	 */
-	if (! XFS_IS_QUOTA_ON(mp) &&
-	    (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
 
 	/*
 	 * If quotas on realtime volumes is not supported, we disable
@@ -388,11 +363,8 @@ xfs_qm_mount_quotas(
 		goto write_changes;
 	}
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
-#endif
-
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
 	/*
 	 * Allocate the quotainfo structure inside the mount struct, and
 	 * create quotainode(s), and change/rev superblock if necessary.
@@ -410,19 +382,14 @@ xfs_qm_mount_quotas(
 	 */
 	if (XFS_QM_NEED_QUOTACHECK(mp) &&
 		!(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
-#ifdef DEBUG
-		cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
-#endif
 		if ((error = xfs_qm_quotacheck(mp))) {
 			/* Quotacheck has failed and quotas have
 			 * been disabled.
 			 */
 			return XFS_ERROR(error);
 		}
-#ifdef DEBUG
-		cmn_err(CE_NOTE, "Done quotacheck.");
-#endif
 	}
+
  write_changes:
 	/*
 	 * We actually don't have to acquire the SB_LOCK at all.
@@ -761,7 +728,7 @@ xfs_qm_dqattach_one(
 	 */
 	if (udqhint &&
 	    (dqp = udqhint->q_gdquot) &&
-	    (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
+	    (be32_to_cpu(dqp->q_core.d_id) == id)) {
 		ASSERT(XFS_DQ_IS_LOCKED(udqhint));
 		xfs_dqlock(dqp);
 		XFS_DQHOLD(dqp);
@@ -1231,42 +1198,24 @@ xfs_qm_init_quotainfo(
 		 * a user or group before he or she can not perform any
 		 * more writing. If it is zero, a default is used.
 		 */
-		qinf->qi_btimelimit =
-				INT_GET(ddqp->d_btimer, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_btimer, ARCH_CONVERT) :
-				XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit =
-				INT_GET(ddqp->d_itimer, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_itimer, ARCH_CONVERT) :
-				XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit =
-				INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) :
-				XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit =
-				INT_GET(ddqp->d_bwarns, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_bwarns, ARCH_CONVERT) :
-				XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit =
-				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
-				XFS_QM_IWARNLIMIT;
-		qinf->qi_rtbwarnlimit =
-				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ?
-				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) :
-				XFS_QM_RTBWARNLIMIT;
-		qinf->qi_bhardlimit =
-				INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
-		qinf->qi_bsoftlimit =
-				INT_GET(ddqp->d_blk_softlimit, ARCH_CONVERT);
-		qinf->qi_ihardlimit =
-				INT_GET(ddqp->d_ino_hardlimit, ARCH_CONVERT);
-		qinf->qi_isoftlimit =
-				INT_GET(ddqp->d_ino_softlimit, ARCH_CONVERT);
-		qinf->qi_rtbhardlimit =
-				INT_GET(ddqp->d_rtb_hardlimit, ARCH_CONVERT);
-		qinf->qi_rtbsoftlimit =
-				INT_GET(ddqp->d_rtb_softlimit, ARCH_CONVERT);
+		qinf->qi_btimelimit = ddqp->d_btimer ?
+			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
+		qinf->qi_itimelimit = ddqp->d_itimer ?
+			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
+		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
+			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
+		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
+			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
+		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
+			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
+			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
+		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
  
 		/*
 		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
@@ -1545,15 +1494,15 @@ xfs_qm_reset_dqcounts(
 		 */
 		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
 				      "xfs_quotacheck");
-		INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
-		INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
-		INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
-		INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0);
-		INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
-		INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
-		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL);
+		ddq->d_bcount = 0;
+		ddq->d_icount = 0;
+		ddq->d_rtbcount = 0;
+		ddq->d_btimer = 0;
+		ddq->d_itimer = 0;
+		ddq->d_rtbtimer = 0;
+		ddq->d_bwarns = 0;
+		ddq->d_iwarns = 0;
+		ddq->d_rtbwarns = 0;
 		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
 	}
 
@@ -1726,14 +1675,14 @@ xfs_qm_quotacheck_dqadjust(
 	 * Adjust the inode count and the block count to reflect this inode's
 	 * resource usage.
 	 */
-	INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
+	be64_add(&dqp->q_core.d_icount, 1);
 	dqp->q_res_icount++;
 	if (nblks) {
-		INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
+		be64_add(&dqp->q_core.d_bcount, nblks);
 		dqp->q_res_bcount += nblks;
 	}
 	if (rtblks) {
-		INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
+		be64_add(&dqp->q_core.d_rtbcount, rtblks);
 		dqp->q_res_rtbcount += rtblks;
 	}
 
@@ -2010,7 +1959,7 @@ xfs_qm_quotacheck(
 		ASSERT(mp->m_quotainfo != NULL);
 		ASSERT(xfs_Gqm != NULL);
 		xfs_qm_destroy_quotainfo(mp);
-		xfs_mount_reset_sbqflags(mp);
+		(void)xfs_mount_reset_sbqflags(mp);
 	} else {
 		cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
 	}
@@ -2220,7 +2169,7 @@ xfs_qm_shake_freelist(
 		xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
 #ifdef QUOTADEBUG
 		cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
-			dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+			dqp, be32_to_cpu(dqp->q_core.d_id));
 #endif
 		ASSERT(dqp->q_nrefs == 0);
 		nextdqp = dqp->dq_flnext;
@@ -2688,7 +2637,7 @@ xfs_qm_vop_chown_reserve(
 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
 
 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-	    ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
+	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
 		delblksudq = udqp;
 		/*
 		 * If there are delayed allocation blocks, then we have to
@@ -2701,10 +2650,10 @@ xfs_qm_vop_chown_reserve(
 		}
 	}
 	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
-		if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid !=
-				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) ||
-		    (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid !=
-				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) {
+		if ((XFS_IS_GQUOTA_ON(ip->i_mount) &&
+		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) ||
+		    (XFS_IS_PQUOTA_ON(ip->i_mount) &&
+		     ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))) {
 			delblksgdq = gdqp;
 			if (delblks) {
 				ASSERT(ip->i_gdquot);
@@ -2794,7 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(udqp);
 		ASSERT(ip->i_udquot == NULL);
 		ip->i_udquot = udqp;
-		ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
+		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
 	if (gdqp) {
@@ -2803,7 +2752,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(gdqp);
 		ASSERT(ip->i_gdquot == NULL);
 		ip->i_gdquot = gdqp;
-		ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
+		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index b03eecf3b6c..12da259f2fc 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_QM_H__
 #define __XFS_QM_H__
@@ -40,6 +26,7 @@
 struct xfs_qm;
 struct xfs_inode;
 
+extern uint		ndquot;
 extern mutex_t		xfs_Gqm_lock;
 extern struct xfs_qm	*xfs_Gqm;
 extern kmem_zone_t	*qm_dqzone;
@@ -65,9 +52,8 @@ extern kmem_zone_t	*qm_dqtrxzone;
 /*
  * Dquot hashtable constants/threshold values.
  */
-#define XFS_QM_NCSIZE_THRESHOLD		5000
-#define XFS_QM_HASHSIZE_LOW		32
-#define XFS_QM_HASHSIZE_HIGH		64
+#define XFS_QM_HASHSIZE_LOW		(NBPP / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH		((NBPP * 4) / sizeof(xfs_dqhash_t))
 
 /*
  * We output a cmn_err when quotachecking a quota file with more than
@@ -184,8 +170,6 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_HOLD(xqm)	((xqm)->qm_nrefs++)
 #define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)
 
-extern void		xfs_mount_reset_sbqflags(xfs_mount_t *);
-
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
 extern int		xfs_qm_mount_quotas(xfs_mount_t *, int);
 extern void		xfs_qm_mount_quotainit(xfs_mount_t *, uint);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index dc3c37a1e15..d9d2993de43 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -1,70 +1,55 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_clnt.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_itable.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
 #include "xfs_mac.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
-
 #include "xfs_qm.h"
 
 #define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
@@ -229,48 +214,6 @@ xfs_qm_syncall(
 	return error;
 }
 
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-void
-xfs_mount_reset_sbqflags(
-	xfs_mount_t		*mp)
-{
-	xfs_trans_t		*tp;
-	unsigned long		s;
-
-	mp->m_qflags = 0;
-	/*
-	 * It is OK to look at sb_qflags here in mount path,
-	 * without SB_LOCK.
-	 */
-	if (mp->m_sb.sb_qflags == 0)
-		return;
-	s = XFS_SB_LOCK(mp);
-	mp->m_sb.sb_qflags = 0;
-	XFS_SB_UNLOCK(mp, s);
-
-	/*
-	 * if the fs is readonly, let the incore superblock run
-	 * with quotas off but don't flush the update out to disk
-	 */
-	if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
-		return;
-#ifdef QUOTADEBUG
-	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-				      XFS_DEFAULT_LOG_COUNT)) {
-		xfs_trans_cancel(tp, 0);
-		xfs_fs_cmn_err(CE_ALERT, mp,
-			"xfs_mount_reset_sbqflags: Superblock update failed!");
-		return;
-	}
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-	xfs_trans_commit(tp, 0, NULL);
-}
-
 STATIC int
 xfs_qm_newmount(
 	xfs_mount_t	*mp,
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 29978e037fe..0570f773355 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -1,69 +1,54 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_itable.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
 #include "xfs_mac.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
-
 #include "xfs_qm.h"
 
 struct xqmstats xqmstats;
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
index 8093c5c284e..a50ffabcf55 100644
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ b/fs/xfs/quota/xfs_qm_stats.h
@@ -1,38 +1,23 @@
 /*
- * Copyright (c) 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_QM_STATS_H__
 #define __XFS_QM_STATS_H__
 
-
 #if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
 
 /*
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 68e98962dbe..24690e1af65 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1,62 +1,48 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_itable.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
@@ -64,7 +50,6 @@
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
-
 #include "xfs_qm.h"
 
 #ifdef DEBUG
@@ -109,10 +94,7 @@ xfs_qm_quotactl(
 	vfsp = bhvtovfs(bdp);
 	mp = XFS_VFSTOM(vfsp);
 
-	if (addr == NULL && cmd != Q_SYNC)
-		return XFS_ERROR(EINVAL);
-	if (id < 0 && cmd != Q_SYNC)
-		return XFS_ERROR(EINVAL);
+	ASSERT(addr != NULL || cmd == Q_XQUOTASYNC);
 
 	/*
 	 * The following commands are valid even when quotaoff.
@@ -122,7 +104,7 @@ xfs_qm_quotactl(
 		/*
 		 * Truncate quota files. quota must be off.
 		 */
-		if (XFS_IS_QUOTA_ON(mp) || addr == NULL)
+		if (XFS_IS_QUOTA_ON(mp))
 			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
@@ -140,8 +122,6 @@ xfs_qm_quotactl(
 		 * QUOTAON - enabling quota enforcement.
 		 * Quota accounting must be turned on at mount time.
 		 */
-		if (addr == NULL)
-			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		return (xfs_qm_scall_quotaon(mp,
@@ -152,6 +132,9 @@ xfs_qm_quotactl(
 			return XFS_ERROR(EROFS);
 		break;
 
+	case Q_XQUOTASYNC:
+		return (xfs_sync_inodes(mp, SYNC_DELWRI, 0, NULL));
+
 	default:
 		break;
 	}
@@ -655,13 +638,13 @@ xfs_qm_scall_setqlim(
 	 */
 	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
 		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-			INT_GET(ddq->d_blk_hardlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_blk_hardlimit);
 	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
 		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-			INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_blk_softlimit);
 	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_blk_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_blk_softlimit, ARCH_CONVERT, soft);
+		ddq->d_blk_hardlimit = cpu_to_be64(hard);
+		ddq->d_blk_softlimit = cpu_to_be64(soft);
 		if (id == 0) {
 			mp->m_quotainfo->qi_bhardlimit = hard;
 			mp->m_quotainfo->qi_bsoftlimit = soft;
@@ -671,13 +654,13 @@ xfs_qm_scall_setqlim(
 	}
 	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
 		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-			INT_GET(ddq->d_rtb_hardlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_rtb_hardlimit);
 	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
 		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-			INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_rtb_softlimit);
 	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_rtb_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_rtb_softlimit, ARCH_CONVERT, soft);
+		ddq->d_rtb_hardlimit = cpu_to_be64(hard);
+		ddq->d_rtb_softlimit = cpu_to_be64(soft);
 		if (id == 0) {
 			mp->m_quotainfo->qi_rtbhardlimit = hard;
 			mp->m_quotainfo->qi_rtbsoftlimit = soft;
@@ -688,13 +671,13 @@ xfs_qm_scall_setqlim(
 
 	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
 		(xfs_qcnt_t) newlim->d_ino_hardlimit :
-			INT_GET(ddq->d_ino_hardlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_ino_hardlimit);
 	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
 		(xfs_qcnt_t) newlim->d_ino_softlimit :
-			INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT);
+			be64_to_cpu(ddq->d_ino_softlimit);
 	if (hard == 0 || hard >= soft) {
-		INT_SET(ddq->d_ino_hardlimit, ARCH_CONVERT, hard);
-		INT_SET(ddq->d_ino_softlimit, ARCH_CONVERT, soft);
+		ddq->d_ino_hardlimit = cpu_to_be64(hard);
+		ddq->d_ino_softlimit = cpu_to_be64(soft);
 		if (id == 0) {
 			mp->m_quotainfo->qi_ihardlimit = hard;
 			mp->m_quotainfo->qi_isoftlimit = soft;
@@ -707,11 +690,11 @@ xfs_qm_scall_setqlim(
 	 * Update warnings counter(s) if requested
 	 */
 	if (newlim->d_fieldmask & FS_DQ_BWARNS)
-		INT_SET(ddq->d_bwarns, ARCH_CONVERT, newlim->d_bwarns);
+		ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
 	if (newlim->d_fieldmask & FS_DQ_IWARNS)
-		INT_SET(ddq->d_iwarns, ARCH_CONVERT, newlim->d_iwarns);
+		ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
 	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, newlim->d_rtbwarns);
+		ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
 
 	if (id == 0) {
 		/*
@@ -723,15 +706,15 @@ xfs_qm_scall_setqlim(
 		 */
 		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
 			mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
-			INT_SET(ddq->d_btimer, ARCH_CONVERT, newlim->d_btimer);
+			ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
 		}
 		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
 			mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
-			INT_SET(ddq->d_itimer, ARCH_CONVERT, newlim->d_itimer);
+			ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
 		}
 		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
 			mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
-			INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
+			ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
 		}
 		if (newlim->d_fieldmask & FS_DQ_BWARNS)
 			mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns;
@@ -902,33 +885,27 @@ xfs_qm_export_dquot(
 {
 	memset(dst, 0, sizeof(*dst));
 	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-	dst->d_flags =
-		xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
-	dst->d_id = INT_GET(src->d_id, ARCH_CONVERT);
-	dst->d_blk_hardlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_hardlimit, ARCH_CONVERT));
-	dst->d_blk_softlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_softlimit, ARCH_CONVERT));
-	dst->d_ino_hardlimit = (__uint64_t)
-		INT_GET(src->d_ino_hardlimit, ARCH_CONVERT);
-	dst->d_ino_softlimit = (__uint64_t)
-		INT_GET(src->d_ino_softlimit, ARCH_CONVERT);
-	dst->d_bcount = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_bcount, ARCH_CONVERT));
-	dst->d_icount = (__uint64_t) INT_GET(src->d_icount, ARCH_CONVERT);
-	dst->d_btimer = (__uint32_t) INT_GET(src->d_btimer, ARCH_CONVERT);
-	dst->d_itimer = (__uint32_t) INT_GET(src->d_itimer, ARCH_CONVERT);
-	dst->d_iwarns = INT_GET(src->d_iwarns, ARCH_CONVERT);
-	dst->d_bwarns = INT_GET(src->d_bwarns, ARCH_CONVERT);
-
-	dst->d_rtb_hardlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_hardlimit, ARCH_CONVERT));
-	dst->d_rtb_softlimit = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_softlimit, ARCH_CONVERT));
-	dst->d_rtbcount = (__uint64_t)
-		XFS_FSB_TO_BB(mp, INT_GET(src->d_rtbcount, ARCH_CONVERT));
-	dst->d_rtbtimer = (__uint32_t) INT_GET(src->d_rtbtimer, ARCH_CONVERT);
-	dst->d_rtbwarns = INT_GET(src->d_rtbwarns, ARCH_CONVERT);
+	dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
+	dst->d_id = be32_to_cpu(src->d_id);
+	dst->d_blk_hardlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+	dst->d_blk_softlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
+	dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
+	dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
+	dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
+	dst->d_icount = be64_to_cpu(src->d_icount);
+	dst->d_btimer = be32_to_cpu(src->d_btimer);
+	dst->d_itimer = be32_to_cpu(src->d_itimer);
+	dst->d_iwarns = be16_to_cpu(src->d_iwarns);
+	dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+	dst->d_rtb_hardlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+	dst->d_rtb_softlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
+	dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
+	dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
+	dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
 
 	/*
 	 * Internally, we don't reset all the timers when quota enforcement
@@ -1053,7 +1030,6 @@ xfs_qm_dqrele_all_inodes(
 	struct xfs_mount *mp,
 	uint		 flags)
 {
-	vmap_t		vmap;
 	xfs_inode_t	*ip, *topino;
 	uint		ireclaims;
 	vnode_t		*vp;
@@ -1061,8 +1037,8 @@ xfs_qm_dqrele_all_inodes(
 
 	ASSERT(mp->m_quotainfo);
 
-again:
 	XFS_MOUNT_ILOCK(mp);
+again:
 	ip = mp->m_inodes;
 	if (ip == NULL) {
 		XFS_MOUNT_IUNLOCK(mp);
@@ -1090,18 +1066,14 @@ again:
 		}
 		vnode_refd = B_FALSE;
 		if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
-			/*
-			 * Sample vp mapping while holding the mplock, lest
-			 * we come across a non-existent vnode.
-			 */
-			VMAP(vp, vmap);
 			ireclaims = mp->m_ireclaims;
 			topino = mp->m_inodes;
-			XFS_MOUNT_IUNLOCK(mp);
+			vp = vn_grab(vp);
+			if (!vp)
+				goto again;
 
+			XFS_MOUNT_IUNLOCK(mp);
 			/* XXX restart limit ? */
-			if ( ! (vp = vn_get(vp, &vmap)))
-				goto again;
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			vnode_refd = B_TRUE;
 		} else {
@@ -1137,7 +1109,6 @@ again:
 		 */
 		if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
 			/* XXX use a sentinel */
-			XFS_MOUNT_IUNLOCK(mp);
 			goto again;
 		}
 		ip = ip->i_mnext;
@@ -1228,10 +1199,10 @@ xfs_qm_dqtest_failed(
 	qmtest_nfails++;
 	if (error)
 		cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s",
-		       INT_GET(d->d_id, ARCH_CONVERT), error, reason);
+		       d->d_id, error, reason);
 	else
 		cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]",
-		       INT_GET(d->d_id, ARCH_CONVERT), reason, (int)a, (int)b);
+		       d->d_id, reason, (int)a, (int)b);
 	xfs_qm_dqtest_print(d);
 	if (dqp)
 		xfs_qm_dqprint(dqp);
@@ -1243,21 +1214,21 @@ xfs_dqtest_cmp2(
 	xfs_dquot_t	*dqp)
 {
 	int err = 0;
-	if (INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) != d->d_icount) {
+	if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
 		xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
-			INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+			be64_to_cpu(dqp->q_core.d_icount),
 			d->d_icount, 0);
 		err++;
 	}
-	if (INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) != d->d_bcount) {
+	if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
 		xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
-			INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+			be64_to_cpu(dqp->q_core.d_bcount),
 			d->d_bcount, 0);
 		err++;
 	}
-	if (INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) &&
-	    INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) >=
-	    INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT)) {
+	if (dqp->q_core.d_blk_softlimit &&
+	    be64_to_cpu(dqp->q_core.d_bcount) >=
+	    be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
 		if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
 			cmn_err(CE_DEBUG,
 				"%d [%s] [0x%p] BLK TIMER NOT STARTED",
@@ -1265,9 +1236,9 @@ xfs_dqtest_cmp2(
 			err++;
 		}
 	}
-	if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) &&
-	    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
-	    INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
+	if (dqp->q_core.d_ino_softlimit &&
+	    be64_to_cpu(dqp->q_core.d_icount) >=
+	    be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
 		if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
 			cmn_err(CE_DEBUG,
 				"%d [%s] [0x%p] INO TIMER NOT STARTED",
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index bf413e70ec0..7a9f3beb818 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_QUOTA_PRIV_H__
 #define __XFS_QUOTA_PRIV_H__
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 3b99daf8a64..3290975d31f 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,21 +29,20 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_itable.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_cap.h"
@@ -65,7 +50,6 @@
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
-
 #include "xfs_qm.h"
 
 STATIC void	xfs_trans_alloc_dqinfo(xfs_trans_t *);
@@ -429,25 +413,25 @@ xfs_trans_apply_dquot_deltas(
 				qtrx->qt_delrtb_delta;
 #ifdef QUOTADEBUG
 			if (totalbdelta < 0)
-				ASSERT(INT_GET(d->d_bcount, ARCH_CONVERT) >=
+				ASSERT(be64_to_cpu(d->d_bcount) >=
 				       (xfs_qcnt_t) -totalbdelta);
 
 			if (totalrtbdelta < 0)
-				ASSERT(INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
+				ASSERT(be64_to_cpu(d->d_rtbcount) >=
 				       (xfs_qcnt_t) -totalrtbdelta);
 
 			if (qtrx->qt_icount_delta < 0)
-				ASSERT(INT_GET(d->d_icount, ARCH_CONVERT) >=
+				ASSERT(be64_to_cpu(d->d_icount) >=
 				       (xfs_qcnt_t) -qtrx->qt_icount_delta);
 #endif
 			if (totalbdelta)
-				INT_MOD(d->d_bcount, ARCH_CONVERT, (xfs_qcnt_t)totalbdelta);
+				be64_add(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
 
 			if (qtrx->qt_icount_delta)
-				INT_MOD(d->d_icount, ARCH_CONVERT, (xfs_qcnt_t)qtrx->qt_icount_delta);
+				be64_add(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
 
 			if (totalrtbdelta)
-				INT_MOD(d->d_rtbcount, ARCH_CONVERT, (xfs_qcnt_t)totalrtbdelta);
+				be64_add(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
 
 			/*
 			 * Get any default limits in use.
@@ -531,11 +515,11 @@ xfs_trans_apply_dquot_deltas(
 			}
 
 			ASSERT(dqp->q_res_bcount >=
-				INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+				be64_to_cpu(dqp->q_core.d_bcount));
 			ASSERT(dqp->q_res_icount >=
-				INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+				be64_to_cpu(dqp->q_core.d_icount));
 			ASSERT(dqp->q_res_rtbcount >=
-				INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
+				be64_to_cpu(dqp->q_core.d_rtbcount));
 		}
 		/*
 		 * Do the group quotas next
@@ -642,26 +626,26 @@ xfs_trans_dqresv(
 	}
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
 	if (flags & XFS_TRANS_DQ_RES_BLKS) {
-		hardlimit = INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT);
+		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 		if (!hardlimit)
 			hardlimit = q->qi_bhardlimit;
-		softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
+		softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
 		if (!softlimit)
 			softlimit = q->qi_bsoftlimit;
-		timer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
-		warns = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT);
+		timer = be32_to_cpu(dqp->q_core.d_btimer);
+		warns = be16_to_cpu(dqp->q_core.d_bwarns);
 		warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_bcount;
 	} else {
 		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-		hardlimit = INT_GET(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT);
+		hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
 		if (!hardlimit)
 			hardlimit = q->qi_rtbhardlimit;
-		softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
+		softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
 		if (!softlimit)
 			softlimit = q->qi_rtbsoftlimit;
-		timer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
-		warns = INT_GET(dqp->q_core.d_rtbwarns, ARCH_CONVERT);
+		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
 		warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_rtbcount;
 	}
@@ -700,16 +684,14 @@ xfs_trans_dqresv(
 			}
 		}
 		if (ninos > 0) {
-			count = INT_GET(dqp->q_core.d_icount, ARCH_CONVERT);
-			timer = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT);
-			warns = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT);
+			count = be64_to_cpu(dqp->q_core.d_icount);
+			timer = be32_to_cpu(dqp->q_core.d_itimer);
+			warns = be16_to_cpu(dqp->q_core.d_iwarns);
 			warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount);
-			hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit,
-						ARCH_CONVERT);
+			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
 			if (!hardlimit)
 				hardlimit = q->qi_ihardlimit;
-			softlimit = INT_GET(dqp->q_core.d_ino_softlimit,
-						ARCH_CONVERT);
+			softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
 			if (!softlimit)
 				softlimit = q->qi_isoftlimit;
 			if (hardlimit > 0ULL && count >= hardlimit) {
@@ -756,9 +738,9 @@ xfs_trans_dqresv(
 					    XFS_TRANS_DQ_RES_INOS,
 					    ninos);
 	}
-	ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
-	ASSERT(dqp->q_res_rtbcount >= INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
-	ASSERT(dqp->q_res_icount >= INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+	ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
+	ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
+	ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
 
 error_return:
 	if (! (flags & XFS_QMOPT_DQLOCK)) {
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 4ed7b6928cd..bb6dc91ea26 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -1,37 +1,22 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "debug.h"
-
+#include "spin.h"
 #include <asm/page.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index c5b9365a7e2..aff558664c3 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_SUPPORT_DEBUG_H__
 #define	__XFS_SUPPORT_DEBUG_H__
@@ -41,9 +27,10 @@
 #define CE_ALERT        1               /* alert        */
 #define CE_PANIC        0               /* panic        */
 
-extern void icmn_err(int, char *, va_list);
-/* PRINTFLIKE2 */
-extern void cmn_err(int, char *, ...);
+extern void icmn_err(int, char *, va_list)
+	__attribute__ ((format (printf, 2, 0)));
+extern void cmn_err(int, char *, ...)
+	__attribute__ ((format (printf, 2, 3)));
 
 #ifndef STATIC
 # define STATIC static
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55..841aa4c15b8 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <xfs.h>
 
 static kmem_zone_t *ktrace_hdr_zone;
@@ -65,7 +50,7 @@ ktrace_uninit(void)
  * number of entries.
  */
 ktrace_t *
-ktrace_alloc(int nentries, int sleep)
+ktrace_alloc(int nentries, unsigned int __nocast sleep)
 {
 	ktrace_t        *ktp;
 	ktrace_entry_t  *ktep;
@@ -170,7 +155,7 @@ ktrace_enter(
 	void            *val14,
 	void            *val15)
 {
-	static lock_t   wrap_lock = SPIN_LOCK_UNLOCKED;
+	static DEFINE_SPINLOCK(wrap_lock);
 	unsigned long	flags;
 	int             index;
 	ktrace_entry_t  *ktep;
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
index 92d1a1a5d04..0d73216287c 100644
--- a/fs/xfs/support/ktrace.h
+++ b/fs/xfs/support/ktrace.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_KTRACE_H__
 #define __XFS_SUPPORT_KTRACE_H__
@@ -66,7 +52,7 @@ typedef struct ktrace_snap {
 extern void ktrace_init(int zentries);
 extern void ktrace_uninit(void);
 
-extern ktrace_t *ktrace_alloc(int, int);
+extern ktrace_t *ktrace_alloc(int, unsigned int __nocast);
 extern void ktrace_free(ktrace_t *);
 
 extern void ktrace_enter(
diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
index 15b5194f16b..caefa17b80f 100644
--- a/fs/xfs/support/move.c
+++ b/fs/xfs/support/move.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <xfs.h>
 
 /* Read from kernel buffer at src to user/kernel buffer defined
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index 3d406dc1c89..97a2498d2da 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -1,34 +1,20 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- * 
  * Portions Copyright (c) 1982, 1986, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
diff --git a/fs/xfs/support/qsort.c b/fs/xfs/support/qsort.c
deleted file mode 100644
index 1ec824140cf..00000000000
--- a/fs/xfs/support/qsort.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-
-/*
- * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
- */
-#define swapcode(TYPE, parmi, parmj, n) { 		\
-	long i = (n) / sizeof (TYPE); 			\
-	register TYPE *pi = (TYPE *) (parmi); 		\
-	register TYPE *pj = (TYPE *) (parmj); 		\
-	do { 						\
-		register TYPE	t = *pi;		\
-		*pi++ = *pj;				\
-		*pj++ = t;				\
-        } while (--i > 0);				\
-}
-
-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
-	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
-
-static __inline void
-swapfunc(char *a, char *b, int n, int swaptype)
-{
-	if (swaptype <= 1) 
-		swapcode(long, a, b, n)
-	else
-		swapcode(char, a, b, n)
-}
-
-#define swap(a, b)					\
-	if (swaptype == 0) {				\
-		long t = *(long *)(a);			\
-		*(long *)(a) = *(long *)(b);		\
-		*(long *)(b) = t;			\
-	} else						\
-		swapfunc(a, b, es, swaptype)
-
-#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
-
-static __inline char *
-med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
-{
-	return cmp(a, b) < 0 ?
-	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
-              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
-}
-
-void
-qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
-{
-	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-	int d, r, swaptype, swap_cnt;
-	register char *a = aa;
-
-loop:	SWAPINIT(a, es);
-	swap_cnt = 0;
-	if (n < 7) {
-		for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-	pm = (char *)a + (n / 2) * es;
-	if (n > 7) {
-		pl = (char *)a;
-		pn = (char *)a + (n - 1) * es;
-		if (n > 40) {
-			d = (n / 8) * es;
-			pl = med3(pl, pl + d, pl + 2 * d, cmp);
-			pm = med3(pm - d, pm, pm + d, cmp);
-			pn = med3(pn - 2 * d, pn - d, pn, cmp);
-		}
-		pm = med3(pl, pm, pn, cmp);
-	}
-	swap(a, pm);
-	pa = pb = (char *)a + es;
-
-	pc = pd = (char *)a + (n - 1) * es;
-	for (;;) {
-		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pa, pb);
-				pa += es;
-			}
-			pb += es;
-		}
-		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pc, pd);
-				pd -= es;
-			}
-			pc -= es;
-		}
-		if (pb > pc)
-			break;
-		swap(pb, pc);
-		swap_cnt = 1;
-		pb += es;
-		pc -= es;
-	}
-	if (swap_cnt == 0) {  /* Switch to insertion sort */
-		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; 
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-
-	pn = (char *)a + n * es;
-	r = min(pa - (char *)a, pb - pa);
-	vecswap(a, pb - r, r);
-	r = min((long)(pd - pc), (long)(pn - pd - es));
-	vecswap(pb, pn - r, r);
-	if ((r = pb - pa) > es)
-		qsort(a, r / es, es, cmp);
-	if ((r = pd - pc) > es) { 
-		/* Iterate rather than recurse to save stack space */
-		a = pn - r;
-		n = r / es;
-		goto loop;
-	}
-/*		qsort(pn - r, r / es, es, cmp);*/
-}
diff --git a/fs/xfs/support/qsort.h b/fs/xfs/support/qsort.h
deleted file mode 100644
index 94263106d71..00000000000
--- a/fs/xfs/support/qsort.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#ifndef QSORT_H
-#define QSORT_H
-
-extern void qsort (void *const pbase,
-		    size_t total_elems,
-		    size_t size,
-		    int (*cmp)(const void *, const void *));
-
-#endif
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 81f40cfcb26..70ce40914c8 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -1,35 +1,20 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include <xfs.h>
 
 static mutex_t	uuid_monitor;
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index 5220ea58ba2..b6f5922199b 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SUPPORT_UUID_H__
 #define __XFS_SUPPORT_UUID_H__
@@ -36,13 +22,13 @@ typedef struct {
 	unsigned char	__u_bits[16];
 } uuid_t;
 
-void uuid_init(void);
-void uuid_create_nil(uuid_t *uuid);
-int uuid_is_nil(uuid_t *uuid);
-int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-__uint64_t uuid_hash64(uuid_t *uuid);
-int uuid_table_insert(uuid_t *uuid);
-void uuid_table_remove(uuid_t *uuid);
+extern void uuid_init(void);
+extern void uuid_create_nil(uuid_t *uuid);
+extern int uuid_is_nil(uuid_t *uuid);
+extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
+extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
+extern __uint64_t uuid_hash64(uuid_t *uuid);
+extern int uuid_table_insert(uuid_t *uuid);
+extern void uuid_table_remove(uuid_t *uuid);
 
 #endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 7e276dcaf4d..99b50d2bda9 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -1,40 +1,28 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_H__
 #define __XFS_H__
 
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include <linux-2.6/xfs_linux.h>
-
-#include <xfs_fs.h> 
-#include <xfs_macros.h>
+#else
+#include <linux-2.4/xfs_linux.h>
+#endif
 
 #endif	/* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8d01dce8c53..cc9c91b9e77 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -1,49 +1,37 @@
 /*
- * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
 #include "xfs_inum.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
 #include "xfs_acl.h"
 #include "xfs_mac.h"
 #include "xfs_attr.h"
@@ -85,7 +73,7 @@ xfs_acl_vhasacl_default(
 {
 	int		error;
 
-	if (vp->v_type != VDIR)
+	if (!VN_ISDIR(vp))
 		return 0;
 	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
 	return (error == 0);
@@ -155,7 +143,7 @@ posix_acl_xattr_to_xfs(
 }
 
 /*
- * Comparison function called from qsort().
+ * Comparison function called from xfs_sort().
  * Primary key is ae_tag, secondary key is ae_id.
  */
 STATIC int
@@ -189,8 +177,8 @@ posix_acl_xfs_to_xattr(
 		return -ERANGE;
 
 	/* Need to sort src XFS ACL by <ae_tag,ae_id> */
-	qsort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
-		xfs_acl_entry_compare);
+	xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
+		 xfs_acl_entry_compare);
 
 	dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
 	dest_entry = &dest->a_entries[0];
@@ -389,7 +377,7 @@ xfs_acl_allow_set(
 
 	if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
 		return EPERM;
-	if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR)
+	if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
 		return ENOTDIR;
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
 		return EROFS;
@@ -448,6 +436,7 @@ xfs_acl_access(
 	int		seen_userobj = 0;
 
 	matched.ae_tag = 0;	/* Invalid type */
+	matched.ae_perm = 0;
 	md >>= 6;	/* Normalize the bits for comparison */
 
 	for (i = 0; i < fap->acl_cnt; i++) {
@@ -750,7 +739,7 @@ xfs_acl_inherit(
 	 * If the new file is a directory, its default ACL is a copy of
 	 * the containing directory's default ACL.
 	 */
-	if (vp->v_type == VDIR)
+	if (VN_ISDIR(vp))
 		xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
 	if (!error && !basicperms)
 		xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0363eb46d35..f9315bc960c 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ACL_H__
 #define __XFS_ACL_H__
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 96b70f7fba3..a96e2ffce0c 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_AG_H__
 #define	__XFS_AG_H__
@@ -46,18 +32,9 @@ struct xfs_trans;
 #define	XFS_AGI_MAGIC	0x58414749	/* 'XAGI' */
 #define	XFS_AGF_VERSION	1
 #define	XFS_AGI_VERSION	1
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION)
-int xfs_agf_good_version(unsigned v);
-#define	XFS_AGF_GOOD_VERSION(v)	xfs_agf_good_version(v)
-#else
-#define XFS_AGF_GOOD_VERSION(v)		((v) == XFS_AGF_VERSION)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION)
-int xfs_agi_good_version(unsigned v);
-#define	XFS_AGI_GOOD_VERSION(v)	xfs_agi_good_version(v)
-#else
-#define XFS_AGI_GOOD_VERSION(v)		((v) == XFS_AGI_VERSION)
-#endif
+
+#define	XFS_AGF_GOOD_VERSION(v)	((v) == XFS_AGF_VERSION)
+#define	XFS_AGI_GOOD_VERSION(v)	((v) == XFS_AGI_VERSION)
 
 /*
  * Btree number 0 is bno, 1 is cnt.  This value gives the size of the
@@ -71,27 +48,26 @@ int xfs_agi_good_version(unsigned v);
  * are > 64k, our value cannot be confused for an EFS superblock's.
  */
 
-typedef struct xfs_agf
-{
+typedef struct xfs_agf {
 	/*
 	 * Common allocation group header information
 	 */
-	__uint32_t	agf_magicnum;	/* magic number == XFS_AGF_MAGIC */
-	__uint32_t	agf_versionnum;	/* header version == XFS_AGF_VERSION */
-	xfs_agnumber_t	agf_seqno;	/* sequence # starting from 0 */
-	xfs_agblock_t	agf_length;	/* size in blocks of a.g. */
+	__be32		agf_magicnum;	/* magic number == XFS_AGF_MAGIC */
+	__be32		agf_versionnum;	/* header version == XFS_AGF_VERSION */
+	__be32		agf_seqno;	/* sequence # starting from 0 */
+	__be32		agf_length;	/* size in blocks of a.g. */
 	/*
 	 * Freespace information
 	 */
-	xfs_agblock_t	agf_roots[XFS_BTNUM_AGF];	/* root blocks */
-	__uint32_t	agf_spare0;	/* spare field */
-	__uint32_t	agf_levels[XFS_BTNUM_AGF];	/* btree levels */
-	__uint32_t	agf_spare1;	/* spare field */
-	__uint32_t	agf_flfirst;	/* first freelist block's index */
-	__uint32_t	agf_fllast;	/* last freelist block's index */
-	__uint32_t	agf_flcount;	/* count of blocks in freelist */
-	xfs_extlen_t	agf_freeblks;	/* total free blocks */
-	xfs_extlen_t	agf_longest;	/* longest free space */
+	__be32		agf_roots[XFS_BTNUM_AGF];	/* root blocks */
+	__be32		agf_spare0;	/* spare field */
+	__be32		agf_levels[XFS_BTNUM_AGF];	/* btree levels */
+	__be32		agf_spare1;	/* spare field */
+	__be32		agf_flfirst;	/* first freelist block's index */
+	__be32		agf_fllast;	/* last freelist block's index */
+	__be32		agf_flcount;	/* count of blocks in freelist */
+	__be32		agf_freeblks;	/* total free blocks */
+	__be32		agf_longest;	/* longest free space */
 } xfs_agf_t;
 
 #define	XFS_AGF_MAGICNUM	0x00000001
@@ -110,43 +86,39 @@ typedef struct xfs_agf
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGF_DADDR(mp)	((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK)
-xfs_agblock_t xfs_agf_block(struct xfs_mount *mp);
-#define	XFS_AGF_BLOCK(mp)	xfs_agf_block(mp)
-#else
-#define XFS_AGF_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#endif
+#define	XFS_AGF_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
+#define	XFS_BUF_TO_AGF(bp)	((xfs_agf_t *)XFS_BUF_PTR(bp))
+
 
 /*
  * Size of the unlinked inode hash table in the agi.
  */
 #define	XFS_AGI_UNLINKED_BUCKETS	64
 
-typedef struct xfs_agi
-{
+typedef struct xfs_agi {
 	/*
 	 * Common allocation group header information
 	 */
-	__uint32_t	agi_magicnum;	/* magic number == XFS_AGI_MAGIC */
-	__uint32_t	agi_versionnum;	/* header version == XFS_AGI_VERSION */
-	xfs_agnumber_t	agi_seqno;	/* sequence # starting from 0 */
-	xfs_agblock_t	agi_length;	/* size in blocks of a.g. */
+	__be32		agi_magicnum;	/* magic number == XFS_AGI_MAGIC */
+	__be32		agi_versionnum;	/* header version == XFS_AGI_VERSION */
+	__be32		agi_seqno;	/* sequence # starting from 0 */
+	__be32		agi_length;	/* size in blocks of a.g. */
 	/*
 	 * Inode information
 	 * Inodes are mapped by interpreting the inode number, so no
 	 * mapping data is needed here.
 	 */
-	xfs_agino_t	agi_count;	/* count of allocated inodes */
-	xfs_agblock_t	agi_root;	/* root of inode btree */
-	__uint32_t	agi_level;	/* levels in inode btree */
-	xfs_agino_t	agi_freecount;	/* number of free inodes */
-	xfs_agino_t	agi_newino;	/* new inode just allocated */
-	xfs_agino_t	agi_dirino;	/* last directory inode chunk */
+	__be32		agi_count;	/* count of allocated inodes */
+	__be32		agi_root;	/* root of inode btree */
+	__be32		agi_level;	/* levels in inode btree */
+	__be32		agi_freecount;	/* number of free inodes */
+	__be32		agi_newino;	/* new inode just allocated */
+	__be32		agi_dirino;	/* last directory inode chunk */
 	/*
 	 * Hash table of inodes which have been unlinked but are
 	 * still being referenced.
 	 */
-	xfs_agino_t	agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
+	__be32		agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
 } xfs_agi_t;
 
 #define	XFS_AGI_MAGICNUM	0x00000001
@@ -165,25 +137,17 @@ typedef struct xfs_agi
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGI_DADDR(mp)	((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK)
-xfs_agblock_t xfs_agi_block(struct xfs_mount *mp);
-#define	XFS_AGI_BLOCK(mp)	xfs_agi_block(mp)
-#else
-#define XFS_AGI_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#endif
+#define	XFS_AGI_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
+#define	XFS_BUF_TO_AGI(bp)	((xfs_agi_t *)XFS_BUF_PTR(bp))
 
 /*
  * The third a.g. block contains the a.g. freelist, an array
  * of block pointers to blocks owned by the allocation btree code.
  */
 #define XFS_AGFL_DADDR(mp)	((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK)
-xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp);
-#define	XFS_AGFL_BLOCK(mp)	xfs_agfl_block(mp)
-#else
-#define XFS_AGFL_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
-#endif
+#define	XFS_AGFL_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
 #define XFS_AGFL_SIZE(mp)	((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
+#define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)XFS_BUF_PTR(bp))
 
 typedef struct xfs_agfl {
 	xfs_agblock_t	agfl_bno[1];	/* actually XFS_AGFL_SIZE(mp) */
@@ -230,116 +194,38 @@ typedef struct xfs_perag
 	xfs_perag_busy_t *pagb_list;	/* unstable blocks */
 } xfs_perag_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS)
-int xfs_ag_maxlevels(struct xfs_mount *mp);
-#define	XFS_AG_MAXLEVELS(mp)		xfs_ag_maxlevels(mp)
-#else
-#define	XFS_AG_MAXLEVELS(mp)	((mp)->m_ag_maxlevels)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST)
-int xfs_min_freelist(xfs_agf_t *a, struct xfs_mount *mp);
-#define	XFS_MIN_FREELIST(a,mp)		xfs_min_freelist(a,mp)
-#else
-#define	XFS_MIN_FREELIST(a,mp)	\
-	XFS_MIN_FREELIST_RAW(	\
-		INT_GET((a)->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT), \
-		INT_GET((a)->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT), mp)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG)
-int xfs_min_freelist_pag(xfs_perag_t *pag, struct xfs_mount *mp);
-#define	XFS_MIN_FREELIST_PAG(pag,mp)	xfs_min_freelist_pag(pag,mp)
-#else
-#define	XFS_MIN_FREELIST_PAG(pag,mp)	\
-	XFS_MIN_FREELIST_RAW((uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
-			     (uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW)
-int xfs_min_freelist_raw(int bl, int cl, struct xfs_mount *mp);
-#define	XFS_MIN_FREELIST_RAW(bl,cl,mp)	xfs_min_freelist_raw(bl,cl,mp)
-#else
+#define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)
 #define	XFS_MIN_FREELIST_RAW(bl,cl,mp)	\
-	(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + \
-	 MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
-#endif
+	(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
+#define	XFS_MIN_FREELIST(a,mp)		\
+	(XFS_MIN_FREELIST_RAW(		\
+		be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
+		be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
+#define	XFS_MIN_FREELIST_PAG(pag,mp)	\
+	(XFS_MIN_FREELIST_RAW(		\
+		(uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
+		(uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB)
-xfs_fsblock_t xfs_agb_to_fsb(struct xfs_mount *mp, xfs_agnumber_t agno,
-			     xfs_agblock_t agbno);
-#define XFS_AGB_TO_FSB(mp,agno,agbno)	xfs_agb_to_fsb(mp,agno,agbno)
-#else
-#define	XFS_AGB_TO_FSB(mp,agno,agbno) \
+#define XFS_AGB_TO_FSB(mp,agno,agbno)	\
 	(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO)
-xfs_agnumber_t xfs_fsb_to_agno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
-#define	XFS_FSB_TO_AGNO(mp,fsbno)	xfs_fsb_to_agno(mp,fsbno)
-#else
-#define	XFS_FSB_TO_AGNO(mp,fsbno) \
+#define	XFS_FSB_TO_AGNO(mp,fsbno)	\
 	((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO)
-xfs_agblock_t xfs_fsb_to_agbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
-#define	XFS_FSB_TO_AGBNO(mp,fsbno)	xfs_fsb_to_agbno(mp,fsbno)
-#else
-#define	XFS_FSB_TO_AGBNO(mp,fsbno) \
+#define	XFS_FSB_TO_AGBNO(mp,fsbno)	\
 	((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR)
-xfs_daddr_t xfs_agb_to_daddr(struct xfs_mount *mp, xfs_agnumber_t agno,
-				xfs_agblock_t agbno);
-#define	XFS_AGB_TO_DADDR(mp,agno,agbno)	xfs_agb_to_daddr(mp,agno,agbno)
-#else
-#define	XFS_AGB_TO_DADDR(mp,agno,agbno) \
-	((xfs_daddr_t)(XFS_FSB_TO_BB(mp, \
-		(xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))))
-#endif
-/*
- * XFS_DADDR_TO_AGNO and XFS_DADDR_TO_AGBNO moved to xfs_mount.h
- * to avoid header file ordering change
- */
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR)
-xfs_daddr_t xfs_ag_daddr(struct xfs_mount *mp, xfs_agnumber_t agno,
-				xfs_daddr_t d);
-#define	XFS_AG_DADDR(mp,agno,d)		xfs_ag_daddr(mp,agno,d)
-#else
-#define	XFS_AG_DADDR(mp,agno,d)	(XFS_AGB_TO_DADDR(mp, agno, 0) + (d))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF)
-xfs_agf_t *xfs_buf_to_agf(struct xfs_buf *bp);
-#define	XFS_BUF_TO_AGF(bp)		xfs_buf_to_agf(bp)
-#else
-#define	XFS_BUF_TO_AGF(bp)	((xfs_agf_t *)XFS_BUF_PTR(bp))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI)
-xfs_agi_t *xfs_buf_to_agi(struct xfs_buf *bp);
-#define	XFS_BUF_TO_AGI(bp)		xfs_buf_to_agi(bp)
-#else
-#define	XFS_BUF_TO_AGI(bp)	((xfs_agi_t *)XFS_BUF_PTR(bp))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL)
-xfs_agfl_t *xfs_buf_to_agfl(struct xfs_buf *bp);
-#define	XFS_BUF_TO_AGFL(bp)		xfs_buf_to_agfl(bp)
-#else
-#define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)XFS_BUF_PTR(bp))
-#endif
+#define	XFS_AGB_TO_DADDR(mp,agno,agbno)	\
+	((xfs_daddr_t)XFS_FSB_TO_BB(mp, \
+		(xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno)))
+#define	XFS_AG_DADDR(mp,agno,d)		(XFS_AGB_TO_DADDR(mp, agno, 0) + (d))
 
 /*
  * For checking for bad ranges of xfs_daddr_t's, covering multiple
  * allocation groups or a single xfs_daddr_t that's a superblock copy.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR)
-void xfs_ag_check_daddr(struct xfs_mount *mp, xfs_daddr_t d, xfs_extlen_t len);
-#define	XFS_AG_CHECK_DADDR(mp,d,len)	xfs_ag_check_daddr(mp,d,len)
-#else
 #define	XFS_AG_CHECK_DADDR(mp,d,len)	\
 	((len) == 1 ? \
 	    ASSERT((d) == XFS_SB_DADDR || \
 		   XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \
 	    ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \
 		   XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1)))
-#endif
 
 #endif	/* __XFS_AG_H__ */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index dcfe1970362..f4328e1e2a7 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1,56 +1,44 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * Free space allocation for XFS.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
-#include "xfs_bit.h"
 #include "xfs_error.h"
 
 
@@ -243,8 +231,8 @@ xfs_alloc_fix_minleft(
 	if (args->minleft == 0)
 		return 1;
 	agf = XFS_BUF_TO_AGF(args->agbp);
-	diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT)
-		+ INT_GET(agf->agf_flcount, ARCH_CONVERT)
+	diff = be32_to_cpu(agf->agf_freeblks)
+		+ be32_to_cpu(agf->agf_flcount)
 		- args->len - args->minleft;
 	if (diff >= 0)
 		return 1;
@@ -319,7 +307,8 @@ xfs_alloc_fixup_trees(
 			bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
 			cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
 			XFS_WANT_CORRUPTED_RETURN(
-				INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT));
+				be16_to_cpu(bnoblock->bb_numrecs) ==
+				be16_to_cpu(cntblock->bb_numrecs));
 		}
 	}
 #endif
@@ -505,21 +494,17 @@ xfs_alloc_trace_modagf(
 		(void *)str,
 		(void *)mp,
 		(void *)(__psint_t)flags,
-		(void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
-						ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
-						ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
-						ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
-						ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT),
-		(void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT));
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
+		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
 }
 
 STATIC void
@@ -612,12 +597,12 @@ xfs_alloc_ag_vextent(
 		if (!(args->wasfromfl)) {
 
 			agf = XFS_BUF_TO_AGF(args->agbp);
-			INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len));
+			be32_add(&agf->agf_freeblks, -(args->len));
 			xfs_trans_agblocks_delta(args->tp,
 						 -((long)(args->len)));
 			args->pag->pagf_freeblks -= args->len;
-			ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT)
-				<= INT_GET(agf->agf_length, ARCH_CONVERT));
+			ASSERT(be32_to_cpu(agf->agf_freeblks) <=
+				be32_to_cpu(agf->agf_length));
 			TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
 			xfs_alloc_log_agf(args->tp, args->agbp,
 						XFS_AGF_FREEBLKS);
@@ -723,8 +708,7 @@ xfs_alloc_ag_vextent_exact(
 	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
 		args->agno, XFS_BTNUM_CNT, NULL, 0);
 	ASSERT(args->agbno + args->len <=
-		INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
-			ARCH_CONVERT));
+		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
 			args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
@@ -897,8 +881,7 @@ xfs_alloc_ag_vextent_near(
 			goto error0;
 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 		ltend = ltbno + ltlen;
-		ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
-				ARCH_CONVERT));
+		ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 		args->len = blen;
 		if (!xfs_alloc_fix_minleft(args)) {
 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1253,8 +1236,7 @@ xfs_alloc_ag_vextent_near(
 		ltlen, &ltnew);
 	ASSERT(ltnew >= ltbno);
 	ASSERT(ltnew + rlen <= ltend);
-	ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
-		ARCH_CONVERT));
+	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 	args->agbno = ltnew;
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
 			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
@@ -1417,8 +1399,7 @@ xfs_alloc_ag_vextent_size(
 	args->agbno = rbno;
 	XFS_WANT_CORRUPTED_GOTO(
 		args->agbno + args->len <=
-			INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
-			ARCH_CONVERT),
+			be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
 		error0);
 	TRACE_ALLOC("normal", args);
 	return 0;
@@ -1466,8 +1447,8 @@ xfs_alloc_ag_vextent_small(
 	 * freelist.
 	 */
 	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
-		 (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount,
-			ARCH_CONVERT) > args->minleft)) {
+		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
+		  > args->minleft)) {
 		if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
 			goto error0;
 		if (fbno != NULLAGBLOCK) {
@@ -1482,8 +1463,7 @@ xfs_alloc_ag_vextent_small(
 			args->agbno = fbno;
 			XFS_WANT_CORRUPTED_GOTO(
 				args->agbno + args->len <=
-				INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
-					ARCH_CONVERT),
+				be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
 				error0);
 			args->wasfromfl = 1;
 			TRACE_ALLOC("freelist", args);
@@ -1757,12 +1737,12 @@ xfs_free_ag_extent(
 
 		agf = XFS_BUF_TO_AGF(agbp);
 		pag = &mp->m_perag[agno];
-		INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len);
+		be32_add(&agf->agf_freeblks, len);
 		xfs_trans_agblocks_delta(tp, len);
 		pag->pagf_freeblks += len;
 		XFS_WANT_CORRUPTED_GOTO(
-			INT_GET(agf->agf_freeblks, ARCH_CONVERT)
-				<= INT_GET(agf->agf_length, ARCH_CONVERT),
+			be32_to_cpu(agf->agf_freeblks) <=
+			be32_to_cpu(agf->agf_length),
 			error0);
 		TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
 		xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
@@ -1909,18 +1889,18 @@ xfs_alloc_fix_freelist(
 	 */
 	agf = XFS_BUF_TO_AGF(agbp);
 	need = XFS_MIN_FREELIST(agf, mp);
-	delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ?
-		(need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0;
+	delta = need > be32_to_cpu(agf->agf_flcount) ?
+		(need - be32_to_cpu(agf->agf_flcount)) : 0;
 	/*
 	 * If there isn't enough total or single-extent, reject it.
 	 */
-	longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+	longest = be32_to_cpu(agf->agf_longest);
 	longest = (longest > delta) ? (longest - delta) :
-		(INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0);
+		(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
 	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
 	     (args->minleft &&
-		(int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) +
-		   INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) <
+		(int)(be32_to_cpu(agf->agf_freeblks) +
+		   be32_to_cpu(agf->agf_flcount) - need - args->total) <
 	     (int)args->minleft)) {
 		xfs_trans_brelse(tp, agbp);
 		args->agbp = NULL;
@@ -1929,7 +1909,7 @@ xfs_alloc_fix_freelist(
 	/*
 	 * Make the freelist shorter if it's too long.
 	 */
-	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) {
+	while (be32_to_cpu(agf->agf_flcount) > need) {
 		xfs_buf_t	*bp;
 
 		if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
@@ -1956,9 +1936,9 @@ xfs_alloc_fix_freelist(
 	/*
 	 * Make the freelist longer if it's too short.
 	 */
-	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) {
+	while (be32_to_cpu(agf->agf_flcount) < need) {
 		targs.agbno = 0;
-		targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT);
+		targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
 		/*
 		 * Allocate as many blocks as possible at once.
 		 */
@@ -2018,19 +1998,19 @@ xfs_alloc_get_freelist(
 	 */
 	mp = tp->t_mountp;
 	if ((error = xfs_alloc_read_agfl(mp, tp,
-			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
+			be32_to_cpu(agf->agf_seqno), &agflbp)))
 		return error;
 	agfl = XFS_BUF_TO_AGFL(agflbp);
 	/*
 	 * Get the block number and update the data structures.
 	 */
-	bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT);
-	INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1);
+	bno = INT_GET(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)], ARCH_CONVERT);
+	be32_add(&agf->agf_flfirst, 1);
 	xfs_trans_brelse(tp, agflbp);
-	if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE(mp))
+	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
 		agf->agf_flfirst = 0;
-	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
-	INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1);
+	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+	be32_add(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
 	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
@@ -2045,7 +2025,7 @@ xfs_alloc_get_freelist(
 	 * the freeing transaction must be pushed to disk NOW by forcing
 	 * to disk all iclogs up that transaction's LSN.
 	 */
-	xfs_alloc_search_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
+	xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
 	return 0;
 }
 
@@ -2123,18 +2103,18 @@ xfs_alloc_put_freelist(
 	mp = tp->t_mountp;
 
 	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
-			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
+			be32_to_cpu(agf->agf_seqno), &agflbp)))
 		return error;
 	agfl = XFS_BUF_TO_AGFL(agflbp);
-	INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1);
-	if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE(mp))
+	be32_add(&agf->agf_fllast, 1);
+	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
 		agf->agf_fllast = 0;
-	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
-	INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1);
+	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+	be32_add(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
-	ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp));
-	blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)];
+	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+	blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
 	INT_SET(*blockp, ARCH_CONVERT, bno);
 	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
@@ -2181,14 +2161,12 @@ xfs_alloc_read_agf(
 	 */
 	agf = XFS_BUF_TO_AGF(bp);
 	agf_ok =
-		INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC &&
-		XFS_AGF_GOOD_VERSION(
-			INT_GET(agf->agf_versionnum, ARCH_CONVERT)) &&
-		INT_GET(agf->agf_freeblks, ARCH_CONVERT) <=
-				INT_GET(agf->agf_length, ARCH_CONVERT) &&
-		INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE(mp) &&
-		INT_GET(agf->agf_fllast,  ARCH_CONVERT) < XFS_AGFL_SIZE(mp) &&
-		INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp);
+		be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
+		XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
+		be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
+		be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
+		be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
+		be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
 	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
 			XFS_RANDOM_ALLOC_READ_AGF))) {
 		XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
@@ -2198,13 +2176,13 @@ xfs_alloc_read_agf(
 	}
 	pag = &mp->m_perag[agno];
 	if (!pag->pagf_init) {
-		pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT);
-		pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT);
-		pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
+		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
 		pag->pagf_levels[XFS_BTNUM_BNOi] =
-			INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT);
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
 		pag->pagf_levels[XFS_BTNUM_CNTi] =
-			INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT);
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
 		spinlock_init(&pag->pagb_lock, "xfspagb");
 		pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
 					sizeof(xfs_perag_busy_t), KM_SLEEP);
@@ -2212,13 +2190,13 @@ xfs_alloc_read_agf(
 	}
 #ifdef DEBUG
 	else if (!XFS_FORCED_SHUTDOWN(mp)) {
-		ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT));
-		ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT));
-		ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT));
+		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
+		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
 		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
-		       INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT));
+		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
 		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
-		       INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT));
+		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
 	}
 #endif
 	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
@@ -2467,7 +2445,7 @@ xfs_free_extent(
 #ifdef DEBUG
 	ASSERT(args.agbp != NULL);
 	agf = XFS_BUF_TO_AGF(args.agbp);
-	ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT));
+	ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length));
 #endif
 	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
 		len, 0);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 72329c86351..3546dea27b7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ALLOC_H__
 #define	__XFS_ALLOC_H__
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index e0355a12d94..a1d92da86cc 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -1,53 +1,41 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * Free space allocation for XFS.
- */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_bmap_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
@@ -129,7 +117,7 @@ xfs_alloc_delrec(
 	/*
 	 * Fail if we're off the end of the block.
 	 */
-	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (ptr > be16_to_cpu(block->bb_numrecs)) {
 		*stat = 0;
 		return 0;
 	}
@@ -143,18 +131,18 @@ xfs_alloc_delrec(
 		lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
 		lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
 #ifdef DEBUG
-		for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
+		for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
 				return error;
 		}
 #endif
-		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (ptr < be16_to_cpu(block->bb_numrecs)) {
 			memmove(&lkp[ptr - 1], &lkp[ptr],
-				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
+				(be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp));
 			memmove(&lpp[ptr - 1], &lpp[ptr],
-				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
-			xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
-			xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+				(be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp));
+			xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
+			xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
 		}
 	}
 	/*
@@ -163,25 +151,25 @@ xfs_alloc_delrec(
 	 */
 	else {
 		lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (ptr < be16_to_cpu(block->bb_numrecs)) {
 			memmove(&lrp[ptr - 1], &lrp[ptr],
-				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
-			xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+				(be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp));
+			xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
 		}
 		/*
 		 * If it's the first record in the block, we'll need a key
 		 * structure to pass up to the next level (updkey).
 		 */
 		if (ptr == 1) {
-			key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */
-			key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */
+			key.ar_startblock = lrp->ar_startblock;
+			key.ar_blockcount = lrp->ar_blockcount;
 			lkp = &key;
 		}
 	}
 	/*
 	 * Decrement and log the number of entries in the block.
 	 */
-	INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&block->bb_numrecs, -1);
 	xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
 	/*
 	 * See if the longest free extent in the allocation group was
@@ -194,24 +182,24 @@ xfs_alloc_delrec(
 
 	if (level == 0 &&
 	    cur->bc_btnum == XFS_BTNUM_CNT &&
-	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
-	    ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-		ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1);
+	    be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
+	    ptr > be16_to_cpu(block->bb_numrecs)) {
+		ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1);
 		/*
 		 * There are still records in the block.  Grab the size
 		 * from the last one.
 		 */
-		if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
-			rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur);
-			INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT);
+		if (be16_to_cpu(block->bb_numrecs)) {
+			rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur);
+			agf->agf_longest = rrp->ar_blockcount;
 		}
 		/*
 		 * No free extents left.
 		 */
 		else
 			agf->agf_longest = 0;
-		mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest =
-			INT_GET(agf->agf_longest, ARCH_CONVERT);
+		mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
+			be32_to_cpu(agf->agf_longest);
 		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 			XFS_AGF_LONGEST);
 	}
@@ -225,15 +213,15 @@ xfs_alloc_delrec(
 		 * and it's NOT the leaf level,
 		 * then we can get rid of this level.
 		 */
-		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) {
+		if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) {
 			/*
 			 * lpp is still set to the first pointer in the block.
 			 * Make it the new root of the btree.
 			 */
-			bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
-			INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT);
-			INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1);
-			mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--;
+			bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
+			agf->agf_roots[cur->bc_btnum] = *lpp;
+			be32_add(&agf->agf_levels[cur->bc_btnum], -1);
+			mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
 			/*
 			 * Put this buffer/block on the ag's freelist.
 			 */
@@ -255,7 +243,7 @@ xfs_alloc_delrec(
 			 * that freed the block.
 			 */
 			xfs_alloc_mark_busy(cur->bc_tp,
-				INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
+				be32_to_cpu(agf->agf_seqno), bno, 1);
 
 			xfs_trans_agbtree_delta(cur->bc_tp, -1);
 			xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
@@ -281,7 +269,7 @@ xfs_alloc_delrec(
 	 * If the number of records remaining in the block is at least
 	 * the minimum, we're done.
 	 */
-	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+	if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 		if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
 			return error;
 		*stat = 1;
@@ -292,8 +280,8 @@ xfs_alloc_delrec(
 	 * tree balanced.  Look at the left and right sibling blocks to
 	 * see if we can re-balance by moving only one record.
 	 */
-	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
-	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+	rbno = be32_to_cpu(block->bb_rightsib);
+	lbno = be32_to_cpu(block->bb_leftsib);
 	bno = NULLAGBLOCK;
 	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
 	/*
@@ -330,18 +318,18 @@ xfs_alloc_delrec(
 		/*
 		 * Grab the current block number, for future use.
 		 */
-		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		bno = be32_to_cpu(right->bb_leftsib);
 		/*
 		 * If right block is full enough so that removing one entry
 		 * won't make it too empty, and left-shifting an entry out
 		 * of right to us works, we're done.
 		 */
-		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+		if (be16_to_cpu(right->bb_numrecs) - 1 >=
 		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 			if ((error = xfs_alloc_lshift(tcur, level, &i)))
 				goto error0;
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
 				xfs_btree_del_cursor(tcur,
 						     XFS_BTREE_NOERROR);
@@ -358,7 +346,7 @@ xfs_alloc_delrec(
 		 * future reference, and fix up the temp cursor to point
 		 * to our block again (last record).
 		 */
-		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		rrecs = be16_to_cpu(right->bb_numrecs);
 		if (lbno != NULLAGBLOCK) {
 			i = xfs_btree_firstrec(tcur, level);
 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
@@ -394,18 +382,18 @@ xfs_alloc_delrec(
 		/*
 		 * Grab the current block number, for future use.
 		 */
-		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		bno = be32_to_cpu(left->bb_rightsib);
 		/*
 		 * If left block is full enough so that removing one entry
 		 * won't make it too empty, and right-shifting an entry out
 		 * of left to us works, we're done.
 		 */
-		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+		if (be16_to_cpu(left->bb_numrecs) - 1 >=
 		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 			if ((error = xfs_alloc_rshift(tcur, level, &i)))
 				goto error0;
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
 				xfs_btree_del_cursor(tcur,
 						     XFS_BTREE_NOERROR);
@@ -419,7 +407,7 @@ xfs_alloc_delrec(
 		 * Otherwise, grab the number of records in right for
 		 * future reference.
 		 */
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 	}
 	/*
 	 * Delete the temp cursor, we're done with it.
@@ -433,7 +421,7 @@ xfs_alloc_delrec(
 	 * See if we can join with the left neighbor block.
 	 */
 	if (lbno != NULLAGBLOCK &&
-	    lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+	    lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 		/*
 		 * Set "right" to be the starting block,
 		 * "left" to be the left neighbor.
@@ -453,7 +441,7 @@ xfs_alloc_delrec(
 	 * If that won't work, see if we can join with the right neighbor block.
 	 */
 	else if (rbno != NULLAGBLOCK &&
-		 rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		 rrecs + be16_to_cpu(block->bb_numrecs) <=
 		  XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 		/*
 		 * Set "left" to be the starting block,
@@ -488,31 +476,34 @@ xfs_alloc_delrec(
 		/*
 		 * It's a non-leaf.  Move keys and pointers.
 		 */
-		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
-		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
+		lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
 		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
 		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
 				return error;
 		}
 #endif
-		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
-		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
-		xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
-				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
-				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp));
+		memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp));
+		xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
+				   be16_to_cpu(left->bb_numrecs) +
+				   be16_to_cpu(right->bb_numrecs));
+		xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
+				   be16_to_cpu(left->bb_numrecs) +
+				   be16_to_cpu(right->bb_numrecs));
 	} else {
 		/*
 		 * It's a leaf.  Move records.
 		 */
-		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
-		xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
-				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp));
+		xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
+				   be16_to_cpu(left->bb_numrecs) +
+				   be16_to_cpu(right->bb_numrecs));
 	}
 	/*
 	 * If we joined with the left neighbor, set the buffer in the
@@ -520,7 +511,7 @@ xfs_alloc_delrec(
 	 */
 	if (bp != lbp) {
 		xfs_btree_setbuf(cur, level, lbp);
-		cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs);
 	}
 	/*
 	 * If we joined with the right neighbor and there's a level above
@@ -532,28 +523,28 @@ xfs_alloc_delrec(
 	/*
 	 * Fix up the number of records in the surviving block.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs));
 	/*
 	 * Fix up the right block pointer in the surviving block, and log it.
 	 */
-	left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+	left->bb_rightsib = right->bb_rightsib;
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 	/*
 	 * If there is a right sibling now, make it point to the
 	 * remaining block.
 	 */
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
 		xfs_alloc_block_t	*rrblock;
 		xfs_buf_t		*rrbp;
 
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
-				cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+				cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
 				&rrbp, XFS_ALLOC_BTREE_REF)))
 			return error;
 		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
 			return error;
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+		rrblock->bb_leftsib = cpu_to_be32(lbno);
 		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
 	}
 	/*
@@ -574,10 +565,9 @@ xfs_alloc_delrec(
 	 * busy block is allocated, the iclog is pushed up to the
 	 * LSN that freed the block.
 	 */
-	xfs_alloc_mark_busy(cur->bc_tp,
-		INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
-
+	xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
 	/*
 	 * Adjust the current level's cursor so that we're left referring
 	 * to the right node, after we're done.
@@ -625,7 +615,15 @@ xfs_alloc_insrec(
 	int			ptr;	/* index in btree block for this rec */
 	xfs_alloc_rec_t		*rp;	/* pointer to btree records */
 
-	ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0);
+	ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
+
+	/*
+	 * GCC doesn't understand the (arguably complex) control flow in
+	 * this function and complains about uninitialized structure fields
+	 * without this.
+	 */
+	memset(&nrec, 0, sizeof(nrec));
+
 	/*
 	 * If we made it to the root level, allocate a new root block
 	 * and we're done.
@@ -641,8 +639,8 @@ xfs_alloc_insrec(
 	/*
 	 * Make a key out of the record data to be inserted, and save it.
 	 */
-	key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */
-	key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */
+	key.ar_startblock = recp->ar_startblock;
+	key.ar_blockcount = recp->ar_blockcount;
 	optr = ptr = cur->bc_ptrs[level];
 	/*
 	 * If we're off the left edge, return failure.
@@ -663,7 +661,7 @@ xfs_alloc_insrec(
 	/*
 	 * Check that the new entry is being inserted in the right place.
 	 */
-	if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (ptr <= be16_to_cpu(block->bb_numrecs)) {
 		if (level == 0) {
 			rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
 			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
@@ -679,7 +677,7 @@ xfs_alloc_insrec(
 	 * If the block is full, we can't insert the new entry until we
 	 * make the block un-full.
 	 */
-	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+	if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 		/*
 		 * First, try shifting an entry to the right neighbor.
 		 */
@@ -716,8 +714,8 @@ xfs_alloc_insrec(
 						return error;
 #endif
 					ptr = cur->bc_ptrs[level];
-					nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */
-					nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */
+					nrec.ar_startblock = nkey.ar_startblock;
+					nrec.ar_blockcount = nkey.ar_blockcount;
 				}
 				/*
 				 * Otherwise the insert fails.
@@ -741,15 +739,15 @@ xfs_alloc_insrec(
 		kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
 		pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
 #ifdef DEBUG
-		for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
+		for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
 				return error;
 		}
 #endif
 		memmove(&kp[ptr], &kp[ptr - 1],
-			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
+			(be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp));
 		memmove(&pp[ptr], &pp[ptr - 1],
-			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
+			(be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
 			return error;
@@ -758,12 +756,12 @@ xfs_alloc_insrec(
 		 * Now stuff the new data in, bump numrecs and log the new data.
 		 */
 		kp[ptr - 1] = key;
-		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
-		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
-		xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
-		xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		pp[ptr - 1] = cpu_to_be32(*bnop);
+		be16_add(&block->bb_numrecs, 1);
+		xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
+		xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
 #ifdef DEBUG
-		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		if (ptr < be16_to_cpu(block->bb_numrecs))
 			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
 				kp + ptr);
 #endif
@@ -773,16 +771,16 @@ xfs_alloc_insrec(
 		 */
 		rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 		memmove(&rp[ptr], &rp[ptr - 1],
-			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+			(be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp));
 		/*
 		 * Now stuff the new record in, bump numrecs
 		 * and log the new data.
 		 */
 		rp[ptr - 1] = *recp; /* INT_: struct copy */
-		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
-		xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		be16_add(&block->bb_numrecs, 1);
+		xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
 #ifdef DEBUG
-		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		if (ptr < be16_to_cpu(block->bb_numrecs))
 			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
 				rp + ptr);
 #endif
@@ -804,16 +802,16 @@ xfs_alloc_insrec(
 	agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 	if (level == 0 &&
 	    cur->bc_btnum == XFS_BTNUM_CNT &&
-	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
-	    INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) {
+	    be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
+	    be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
 		/*
 		 * If this is a leaf in the by-size btree and there
 		 * is no right sibling block and this block is bigger
 		 * than the previous longest block, update it.
 		 */
-		INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT);
-		cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest
-			= INT_GET(recp->ar_blockcount, ARCH_CONVERT);
+		agf->agf_longest = recp->ar_blockcount;
+		cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
+			= be32_to_cpu(recp->ar_blockcount);
 		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 			XFS_AGF_LONGEST);
 	}
@@ -923,8 +921,9 @@ xfs_alloc_log_recs(
 
 		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 		for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
-			ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <=
-			       INT_GET(agf->agf_length, ARCH_CONVERT));
+			ASSERT(be32_to_cpu(p->ar_startblock) +
+			       be32_to_cpu(p->ar_blockcount) <=
+			       be32_to_cpu(agf->agf_length));
 	}
 #endif
 	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
@@ -961,8 +960,8 @@ xfs_alloc_lookup(
 		xfs_agf_t	*agf;	/* a.g. freespace header */
 
 		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
-		agno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
-		agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
+		agno = be32_to_cpu(agf->agf_seqno);
+		agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
 	}
 	/*
 	 * Iterate over each level in the btree, starting at the root.
@@ -1029,7 +1028,7 @@ xfs_alloc_lookup(
 			 * Set low and high entry numbers, 1-based.
 			 */
 			low = 1;
-			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+			if (!(high = be16_to_cpu(block->bb_numrecs))) {
 				/*
 				 * If the block is empty, the tree must
 				 * be an empty leaf.
@@ -1058,14 +1057,14 @@ xfs_alloc_lookup(
 					xfs_alloc_key_t	*kkp;
 
 					kkp = kkbase + keyno - 1;
-					startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT);
-					blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT);
+					startblock = be32_to_cpu(kkp->ar_startblock);
+					blockcount = be32_to_cpu(kkp->ar_blockcount);
 				} else {
 					xfs_alloc_rec_t	*krp;
 
 					krp = krbase + keyno - 1;
-					startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT);
-					blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT);
+					startblock = be32_to_cpu(krp->ar_startblock);
+					blockcount = be32_to_cpu(krp->ar_blockcount);
 				}
 				/*
 				 * Compute difference to get next direction.
@@ -1105,7 +1104,7 @@ xfs_alloc_lookup(
 			 */
 			if (diff > 0 && --keyno < 1)
 				keyno = 1;
-			agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+			agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
 #ifdef DEBUG
 			if ((error = xfs_btree_check_sptr(cur, agbno, level)))
 				return error;
@@ -1124,8 +1123,8 @@ xfs_alloc_lookup(
 		 * not the last block, we're in the wrong block.
 		 */
 		if (dir == XFS_LOOKUP_GE &&
-		    keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
-		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		    keyno > be16_to_cpu(block->bb_numrecs) &&
+		    be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
 			int	i;
 
 			cur->bc_ptrs[0] = keyno;
@@ -1142,7 +1141,7 @@ xfs_alloc_lookup(
 	/*
 	 * Return if we succeeded or not.
 	 */
-	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+	if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
 		*stat = 0;
 	else
 		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
@@ -1185,7 +1184,7 @@ xfs_alloc_lshift(
 	/*
 	 * If we've got no left sibling then we can't shift an entry left.
 	 */
-	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1201,8 +1200,8 @@ xfs_alloc_lshift(
 	 * Set up the left neighbor as "left".
 	 */
 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
-			XFS_ALLOC_BTREE_REF)))
+			cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
+			0, &lbp, XFS_ALLOC_BTREE_REF)))
 		return error;
 	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
 	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
@@ -1210,11 +1209,11 @@ xfs_alloc_lshift(
 	/*
 	 * If it's full, it can't take another entry.
 	 */
-	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+	if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 		*stat = 0;
 		return 0;
 	}
-	nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	nrec = be16_to_cpu(left->bb_numrecs) + 1;
 	/*
 	 * If non-leaf, copy a key and a ptr to the left block.
 	 */
@@ -1229,7 +1228,7 @@ xfs_alloc_lshift(
 		lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
 		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)))
+		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
 			return error;
 #endif
 		*lpp = *rpp; /* INT_: copy */
@@ -1251,30 +1250,30 @@ xfs_alloc_lshift(
 	/*
 	 * Bump and log left's numrecs, decrement and log right's numrecs.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+	be16_add(&left->bb_numrecs, 1);
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&right->bb_numrecs, -1);
 	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Slide the contents of right down one entry.
 	 */
 	if (level > 0) {
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
 					level)))
 				return error;
 		}
 #endif
-		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
-		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
+		xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 	} else {
-		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
-		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
-		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+		memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
+		xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		key.ar_startblock = rrp->ar_startblock;
+		key.ar_blockcount = rrp->ar_blockcount;
 		rkp = &key;
 	}
 	/*
@@ -1339,9 +1338,9 @@ xfs_alloc_newroot(
 		xfs_agnumber_t	seqno;
 
 		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
-		INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno);
-		INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1);
-		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+		agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
+		be32_add(&agf->agf_levels[cur->bc_btnum], 1);
+		seqno = be32_to_cpu(agf->agf_seqno);
 		mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
 		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 			XFS_AGF_ROOTS | XFS_AGF_LEVELS);
@@ -1358,12 +1357,12 @@ xfs_alloc_newroot(
 	if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
 		return error;
 #endif
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
 		/*
 		 * Our block is left, pick up the right block.
 		 */
 		lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
-		rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		rbno = be32_to_cpu(left->bb_rightsib);
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 				cur->bc_private.a.agno, rbno, 0, &rbp,
 				XFS_ALLOC_BTREE_REF)))
@@ -1380,7 +1379,7 @@ xfs_alloc_newroot(
 		rbp = lbp;
 		right = left;
 		rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
-		lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		lbno = be32_to_cpu(right->bb_leftsib);
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 				cur->bc_private.a.agno, lbno, 0, &lbp,
 				XFS_ALLOC_BTREE_REF)))
@@ -1394,11 +1393,11 @@ xfs_alloc_newroot(
 	/*
 	 * Fill in the new block's btree header and log it.
 	 */
-	INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
-	INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
-	INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
-	INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
-	INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+	new->bb_level = cpu_to_be16(cur->bc_nlevels);
+	new->bb_numrecs = cpu_to_be16(2);
+	new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+	new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 	xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
 	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
 	/*
@@ -1408,18 +1407,18 @@ xfs_alloc_newroot(
 		xfs_alloc_key_t		*kp;	/* btree key pointer */
 
 		kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
-		if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+		if (be16_to_cpu(left->bb_level) > 0) {
 			kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
 			kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
 		} else {
 			xfs_alloc_rec_t	*rp;	/* btree record pointer */
 
 			rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
-			kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
-			kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+			kp[0].ar_startblock = rp->ar_startblock;
+			kp[0].ar_blockcount = rp->ar_blockcount;
 			rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-			kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
-			kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+			kp[1].ar_startblock = rp->ar_startblock;
+			kp[1].ar_blockcount = rp->ar_blockcount;
 		}
 	}
 	xfs_alloc_log_keys(cur, nbp, 1, 2);
@@ -1430,8 +1429,8 @@ xfs_alloc_newroot(
 		xfs_alloc_ptr_t		*pp;	/* btree address pointer */
 
 		pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
-		INT_SET(pp[0], ARCH_CONVERT, lbno);
-		INT_SET(pp[1], ARCH_CONVERT, rbno);
+		pp[0] = cpu_to_be32(lbno);
+		pp[1] = cpu_to_be32(rbno);
 	}
 	xfs_alloc_log_ptrs(cur, nbp, 1, 2);
 	/*
@@ -1476,7 +1475,7 @@ xfs_alloc_rshift(
 	/*
 	 * If we've got no right sibling then we can't shift an entry right.
 	 */
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1484,7 +1483,7 @@ xfs_alloc_rshift(
 	 * If the cursor entry is the one that would be moved, don't
 	 * do it... it's too complicated.
 	 */
-	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+	if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
 		*stat = 0;
 		return 0;
 	}
@@ -1492,8 +1491,8 @@ xfs_alloc_rshift(
 	 * Set up the right neighbor as "right".
 	 */
 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
-			XFS_ALLOC_BTREE_REF)))
+			cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
+			0, &rbp, XFS_ALLOC_BTREE_REF)))
 		return error;
 	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
 	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
@@ -1501,7 +1500,7 @@ xfs_alloc_rshift(
 	/*
 	 * If it's full, it can't take another entry.
 	 */
-	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+	if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 		*stat = 0;
 		return 0;
 	}
@@ -1514,47 +1513,47 @@ xfs_alloc_rshift(
 		xfs_alloc_ptr_t	*lpp;	/* address pointer for left block */
 		xfs_alloc_ptr_t	*rpp;	/* address pointer for right block */
 
-		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
-		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
+		lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
 		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
+		for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
 				return error;
 		}
 #endif
-		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
 #ifdef DEBUG
-		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
+		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
 			return error;
 #endif
 		*rkp = *lkp; /* INT_: copy */
 		*rpp = *lpp; /* INT_: copy */
-		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
+		xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
 		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
 	} else {
 		xfs_alloc_rec_t	*lrp;	/* record pointer for left block */
 		xfs_alloc_rec_t	*rrp;	/* record pointer for right block */
 
-		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
 		*rrp = *lrp;
-		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
-		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+		xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
+		key.ar_startblock = rrp->ar_startblock;
+		key.ar_blockcount = rrp->ar_blockcount;
 		rkp = &key;
 		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
 	}
 	/*
 	 * Decrement and log left's numrecs, bump and log right's numrecs.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&left->bb_numrecs, -1);
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	be16_add(&right->bb_numrecs, 1);
 	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Using a temporary cursor, update the parent key values of the
@@ -1627,17 +1626,17 @@ xfs_alloc_split(
 	/*
 	 * Fill in the btree header for the new block.
 	 */
-	INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
-	right->bb_level = left->bb_level; /* INT_: direct copy */
-	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+	right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+	right->bb_level = left->bb_level;
+	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
 	/*
 	 * Make sure that if there's an odd number of entries now, that
 	 * each new block will have the same number of entries.
 	 */
-	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
-	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
-		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
-	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
+	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
+		be16_add(&right->bb_numrecs, 1);
+	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	/*
 	 * For non-leaf blocks, copy keys and addresses over to the new block.
 	 */
@@ -1652,15 +1651,15 @@ xfs_alloc_split(
 		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
 		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
 				return error;
 		}
 #endif
-		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
-		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); /* INT_: copy */
-		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
+		xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		*keyp = *rkp;
 	}
 	/*
@@ -1672,38 +1671,38 @@ xfs_alloc_split(
 
 		lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
 		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
-		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
-		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
-		keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+		memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
+		xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		keyp->ar_startblock = rrp->ar_startblock;
+		keyp->ar_blockcount = rrp->ar_blockcount;
 	}
 	/*
 	 * Find the left block number by looking in the buffer.
 	 * Adjust numrecs, sibling pointers.
 	 */
 	lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
-	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
-	INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno);
-	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	right->bb_rightsib = left->bb_rightsib;
+	left->bb_rightsib = cpu_to_be32(rbno);
+	right->bb_leftsib = cpu_to_be32(lbno);
 	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 	/*
 	 * If there's a block to the new block's right, make that block
 	 * point back to right instead of to left.
 	 */
-	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
 		xfs_alloc_block_t	*rrblock;	/* rr btree block */
 		xfs_buf_t		*rrbp;		/* buffer for rrblock */
 
 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
-				cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0,
+				cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
 				&rrbp, XFS_ALLOC_BTREE_REF)))
 			return error;
 		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
 			return error;
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno);
+		rrblock->bb_leftsib = cpu_to_be32(rbno);
 		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
 	}
 	/*
@@ -1711,9 +1710,9 @@ xfs_alloc_split(
 	 * If it's just pointing past the last entry in left, then we'll
 	 * insert there, so don't change anything in that case.
 	 */
-	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+	if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
 		xfs_btree_setbuf(cur, level, rbp);
-		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
 	}
 	/*
 	 * If there are more levels, we'll need another cursor which refers to
@@ -1811,7 +1810,7 @@ xfs_alloc_decrement(
 	/*
 	 * If we just went off the left edge of the tree, return failure.
 	 */
-	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1840,7 +1839,7 @@ xfs_alloc_decrement(
 		xfs_agblock_t	agbno;	/* block number of btree block */
 		xfs_buf_t	*bp;	/* buffer pointer for block */
 
-		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
 				cur->bc_private.a.agno, agbno, 0, &bp,
 				XFS_ALLOC_BTREE_REF)))
@@ -1850,7 +1849,7 @@ xfs_alloc_decrement(
 		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
 			return error;
-		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
 	}
 	*stat = 1;
 	return 0;
@@ -1917,7 +1916,7 @@ xfs_alloc_get_rec(
 	/*
 	 * Off the right end or left end, return failure.
 	 */
-	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+	if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
 		*stat = 0;
 		return 0;
 	}
@@ -1928,8 +1927,8 @@ xfs_alloc_get_rec(
 		xfs_alloc_rec_t		*rec;	/* record data */
 
 		rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
-		*bno = INT_GET(rec->ar_startblock, ARCH_CONVERT);
-		*len = INT_GET(rec->ar_blockcount, ARCH_CONVERT);
+		*bno = be32_to_cpu(rec->ar_startblock);
+		*len = be32_to_cpu(rec->ar_blockcount);
 	}
 	*stat = 1;
 	return 0;
@@ -1968,14 +1967,14 @@ xfs_alloc_increment(
 	 * Increment the ptr at this level.  If we're still in the block
 	 * then we're done.
 	 */
-	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
 		*stat = 1;
 		return 0;
 	}
 	/*
 	 * If we just went off the right edge of the tree, return failure.
 	 */
-	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1990,7 +1989,7 @@ xfs_alloc_increment(
 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
 			return error;
 #endif
-		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
 			break;
 		/*
 		 * Read-ahead the right block, we're going to read it
@@ -2010,7 +2009,7 @@ xfs_alloc_increment(
 	     lev > level; ) {
 		xfs_agblock_t	agbno;	/* block number of btree block */
 
-		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
 				cur->bc_private.a.agno, agbno, 0, &bp,
 				XFS_ALLOC_BTREE_REF)))
@@ -2045,8 +2044,8 @@ xfs_alloc_insert(
 
 	level = 0;
 	nbno = NULLAGBLOCK;
-	INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock);
-	INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount);
+	nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
+	nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
 	ncur = (xfs_btree_cur_t *)0;
 	pcur = cur;
 	/*
@@ -2167,8 +2166,8 @@ xfs_alloc_update(
 		/*
 		 * Fill in the new contents and log them.
 		 */
-		INT_SET(rp->ar_startblock, ARCH_CONVERT, bno);
-		INT_SET(rp->ar_blockcount, ARCH_CONVERT, len);
+		rp->ar_startblock = cpu_to_be32(bno);
+		rp->ar_blockcount = cpu_to_be32(len);
 		xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
 	}
 	/*
@@ -2177,15 +2176,15 @@ xfs_alloc_update(
 	 * extent in the a.g., which we cache in the a.g. freelist header.
 	 */
 	if (cur->bc_btnum == XFS_BTNUM_CNT &&
-	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
-	    ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	    be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
+	    ptr == be16_to_cpu(block->bb_numrecs)) {
 		xfs_agf_t	*agf;	/* a.g. freespace header */
 		xfs_agnumber_t	seqno;
 
 		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
-		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+		seqno = be32_to_cpu(agf->agf_seqno);
 		cur->bc_mp->m_perag[seqno].pagf_longest = len;
-		INT_SET(agf->agf_longest, ARCH_CONVERT, len);
+		agf->agf_longest = cpu_to_be32(len);
 		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 			XFS_AGF_LONGEST);
 	}
@@ -2195,8 +2194,8 @@ xfs_alloc_update(
 	if (ptr == 1) {
 		xfs_alloc_key_t	key;	/* key containing [bno, len] */
 
-		INT_SET(key.ar_startblock, ARCH_CONVERT, bno);
-		INT_SET(key.ar_blockcount, ARCH_CONVERT, len);
+		key.ar_startblock = cpu_to_be32(bno);
+		key.ar_blockcount = cpu_to_be32(len);
 		if ((error = xfs_alloc_updkey(cur, &key, 1)))
 			return error;
 	}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index ed5161a572e..bce81c7a4fd 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ALLOC_BTREE_H__
 #define	__XFS_ALLOC_BTREE_H__
@@ -52,48 +38,29 @@ struct xfs_mount;
 /*
  * Data record/key structure
  */
-typedef struct xfs_alloc_rec
-{
+typedef struct xfs_alloc_rec {
+	__be32		ar_startblock;	/* starting block number */
+	__be32		ar_blockcount;	/* count of free blocks */
+} xfs_alloc_rec_t, xfs_alloc_key_t;
+
+typedef struct xfs_alloc_rec_incore {
 	xfs_agblock_t	ar_startblock;	/* starting block number */
 	xfs_extlen_t	ar_blockcount;	/* count of free blocks */
-} xfs_alloc_rec_t, xfs_alloc_key_t;
+} xfs_alloc_rec_incore_t;
 
-typedef xfs_agblock_t xfs_alloc_ptr_t;	/* btree pointer type */
-					/* btree block header type */
+/* btree pointer type */
+typedef __be32 xfs_alloc_ptr_t;
+/* btree block header type */
 typedef	struct xfs_btree_sblock xfs_alloc_block_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK)
-xfs_alloc_block_t *xfs_buf_to_alloc_block(struct xfs_buf *bp);
-#define	XFS_BUF_TO_ALLOC_BLOCK(bp)	xfs_buf_to_alloc_block(bp)
-#else
-#define	XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)(XFS_BUF_PTR(bp)))
-#endif
+#define	XFS_BUF_TO_ALLOC_BLOCK(bp)	((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
 
 /*
  * Real block structures have a size equal to the disk block size.
  */
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE)
-int xfs_alloc_block_size(int lev, struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_BLOCK_SIZE(lev,cur)	xfs_alloc_block_size(lev,cur)
-#else
 #define	XFS_ALLOC_BLOCK_SIZE(lev,cur)	(1 << (cur)->bc_blocklog)
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS)
-int xfs_alloc_block_maxrecs(int lev, struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_BLOCK_MAXRECS(lev,cur)	xfs_alloc_block_maxrecs(lev,cur)
-#else
-#define	XFS_ALLOC_BLOCK_MAXRECS(lev,cur)	\
-	((cur)->bc_mp->m_alloc_mxr[lev != 0])
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS)
-int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_BLOCK_MINRECS(lev,cur)	xfs_alloc_block_minrecs(lev,cur)
-#else
-#define	XFS_ALLOC_BLOCK_MINRECS(lev,cur)	\
-	((cur)->bc_mp->m_alloc_mnr[lev != 0])
-#endif
+#define	XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
+#define	XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
 
 /*
  * Minimum and maximum blocksize and sectorsize.
@@ -113,145 +80,80 @@ int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur);
  * Block numbers in the AG:
  * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK)
-xfs_agblock_t xfs_bno_block(struct xfs_mount *mp);
-#define	XFS_BNO_BLOCK(mp)	xfs_bno_block(mp)
-#else
 #define	XFS_BNO_BLOCK(mp)	((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK)
-xfs_agblock_t xfs_cnt_block(struct xfs_mount *mp);
-#define	XFS_CNT_BLOCK(mp)	xfs_cnt_block(mp)
-#else
 #define	XFS_CNT_BLOCK(mp)	((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
-#endif
 
 /*
  * Record, key, and pointer address macros for btree blocks.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR)
-xfs_alloc_rec_t *xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i,
-				    struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_REC_ADDR(bb,i,cur)	xfs_alloc_rec_addr(bb,i,cur)
-#else
 #define	XFS_ALLOC_REC_ADDR(bb,i,cur)	\
-	XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, bb, i, \
-		XFS_ALLOC_BLOCK_MAXRECS(0, cur))
-#endif
+	XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, \
+				bb, i, XFS_ALLOC_BLOCK_MAXRECS(0, cur))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR)
-xfs_alloc_key_t *xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i,
-				    struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_KEY_ADDR(bb,i,cur)	xfs_alloc_key_addr(bb,i,cur)
-#else
 #define	XFS_ALLOC_KEY_ADDR(bb,i,cur)	\
-	XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
-		XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-#endif
+	XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
+				bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR)
-xfs_alloc_ptr_t *xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i,
-				    struct xfs_btree_cur *cur);
-#define	XFS_ALLOC_PTR_ADDR(bb,i,cur)	xfs_alloc_ptr_addr(bb,i,cur)
-#else
 #define	XFS_ALLOC_PTR_ADDR(bb,i,cur)	\
-	XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
-		XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-#endif
-
-/*
- * Prototypes for externally visible routines.
- */
+	XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
+				bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
 
 /*
  * Decrement cursor by one record at the level.
  * For nonzero levels the leaf-ward information is untouched.
  */
-int					/* error */
-xfs_alloc_decrement(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			level,	/* level in btree, 0 is leaf */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
 
 /*
  * Delete the record pointed to by cur.
  * The cursor refers to the place where the record was (could be inserted)
  * when the operation returns.
  */
-int					/* error */
-xfs_alloc_delete(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
 
 /*
  * Get the data from the pointed-to record.
  */
-int					/* error */
-xfs_alloc_get_rec(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		*bno,	/* output: starting block of extent */
-	xfs_extlen_t		*len,	/* output: length of extent */
-	int			*stat);	/* output: success/failure */
+extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur,	xfs_agblock_t *bno,
+				xfs_extlen_t *len, int *stat);
 
 /*
  * Increment cursor by one record at the level.
  * For nonzero levels the leaf-ward information is untouched.
  */
-int					/* error */
-xfs_alloc_increment(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			level,	/* level in btree, 0 is leaf */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
 
 /*
  * Insert the current record at the point referenced by cur.
  * The cursor may be inconsistent on return if splits have been done.
  */
-int					/* error */
-xfs_alloc_insert(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
 
 /*
  * Lookup the record equal to [bno, len] in the btree given by cur.
  */
-int					/* error */
-xfs_alloc_lookup_eq(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		bno,	/* starting block of extent */
-	xfs_extlen_t		len,	/* length of extent */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+				xfs_extlen_t len, int *stat);
 
 /*
  * Lookup the first record greater than or equal to [bno, len]
  * in the btree given by cur.
  */
-int					/* error */
-xfs_alloc_lookup_ge(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		bno,	/* starting block of extent */
-	xfs_extlen_t		len,	/* length of extent */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+				xfs_extlen_t len, int *stat);
 
 /*
  * Lookup the first record less than or equal to [bno, len]
  * in the btree given by cur.
  */
-int					/* error */
-xfs_alloc_lookup_le(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		bno,	/* starting block of extent */
-	xfs_extlen_t		len,	/* length of extent */
-	int			*stat);	/* success/failure */
+extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+				xfs_extlen_t len, int *stat);
 
 /*
  * Update the record referred to by cur, to the value given by [bno, len].
  * This either works (return 0) or gets an EFSCORRUPTED error.
  */
-int					/* error */
-xfs_alloc_update(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agblock_t		bno,	/* starting block of extent */
-	xfs_extlen_t		len);	/* length of extent */
+extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+				xfs_extlen_t len);
 
 #endif	/* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index ae35189b3d7..68e5051d8e2 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ARCH_H__
 #define __XFS_ARCH_H__
@@ -40,22 +26,28 @@
 
 #include <asm/byteorder.h>
 
-#ifdef __LITTLE_ENDIAN
-# define __BYTE_ORDER	__LITTLE_ENDIAN
-#endif
 #ifdef __BIG_ENDIAN
-# define __BYTE_ORDER	__BIG_ENDIAN
+#define	XFS_NATIVE_HOST	1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+#else /* __KERNEL__ */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define	XFS_NATIVE_HOST	1
+#else
+#undef XFS_NATIVE_HOST
 #endif
 
 #endif	/* __KERNEL__ */
 
 /* do we need conversion? */
-
 #define ARCH_NOCONVERT 1
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define ARCH_CONVERT	0
-#else
+#ifdef XFS_NATIVE_HOST
 # define ARCH_CONVERT	ARCH_NOCONVERT
+#else
+# define ARCH_CONVERT	0
 #endif
 
 /* generic swapping macros */
@@ -162,6 +154,21 @@
     } \
 }
 
+static inline void be16_add(__be16 *a, __s16 b)
+{
+	*a = cpu_to_be16(be16_to_cpu(*a) + b);
+}
+
+static inline void be32_add(__be32 *a, __s32 b)
+{
+	*a = cpu_to_be32(be32_to_cpu(*a) + b);
+}
+
+static inline void be64_add(__be64 *a, __s64 b)
+{
+	*a = cpu_to_be64(be64_to_cpu(*a) + b);
+}
+
 /*
  * In directories inode numbers are stored as unaligned arrays of unsigned
  * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index a41ad3a5e55..5484eeb460c 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -1,41 +1,26 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,27 +28,26 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 #include "xfs_quota.h"
-#include "xfs_rw.h"
 #include "xfs_trans_space.h"
 #include "xfs_acl.h"
+#include "xfs_rw.h"
 
 /*
  * xfs_attr.c
@@ -122,7 +106,7 @@ ktrace_t *xfs_attr_trace_buf;
  *========================================================================*/
 
 int
-xfs_attr_fetch(xfs_inode_t *ip, char *name, int namelen,
+xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
 	       char *value, int *valuelenp, int flags, struct cred *cred)
 {
 	xfs_da_args_t   args;
@@ -177,7 +161,7 @@ xfs_attr_fetch(xfs_inode_t *ip, char *name, int namelen,
 }
 
 int
-xfs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
+xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
 	     int flags, struct cred *cred)
 {
 	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
@@ -200,40 +184,18 @@ xfs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
 	return(error);
 }
 
-/*ARGSUSED*/
-int								/* error */
-xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
-		     struct cred *cred)
+STATIC int
+xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
+		 char *value, int valuelen, int flags)
 {
 	xfs_da_args_t	args;
-	xfs_inode_t	*dp;
 	xfs_fsblock_t	firstblock;
 	xfs_bmap_free_t flist;
 	int		error, err2, committed;
 	int		local, size;
 	uint		nblks;
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = dp->i_mount;
 	int             rsvd = (flags & ATTR_ROOT) != 0;
-	int             namelen;
-
-	namelen = strlen(name);
-	if (namelen >= MAXNAMELEN)
-		return EFAULT;		/* match IRIX behaviour */
-
-	XFS_STATS_INC(xs_attr_set);
-
-	dp = XFS_BHVTOI(bdp);
-	mp = dp->i_mount;
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return (EIO);
-
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!(flags & ATTR_SECURE) &&
-	     (error = xfs_iaccess(dp, S_IWUSR, cred))) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(error));
-	}
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
 
 	/*
 	 * Attach the dquots to the inode.
@@ -242,12 +204,18 @@ xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
 		return (error);
 
 	/*
+	 * Determine space new attribute will use, and if it would be
+	 * "local" or "remote" (note: local != inline).
+	 */
+	size = xfs_attr_leaf_newentsize(namelen, valuelen,
+					mp->m_sb.sb_blocksize, &local);
+
+	/*
 	 * If the inode doesn't have an attribute fork, add one.
 	 * (inode must not be locked when we call this routine)
 	 */
 	if (XFS_IFORK_Q(dp) == 0) {
-		error = xfs_bmap_add_attrfork(dp, rsvd);
-		if (error)
+		if ((error = xfs_bmap_add_attrfork(dp, size, rsvd)))
 			return(error);
 	}
 
@@ -265,13 +233,9 @@ xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
 	args.firstblock = &firstblock;
 	args.flist = &flist;
 	args.whichfork = XFS_ATTR_FORK;
+	args.addname = 1;
 	args.oknoent = 1;
 
-	/* Determine space new attribute will use, and if it will be inline
-	 * or out of line.
-	 */
-	size = xfs_attr_leaf_newentsize(&args, mp->m_sb.sb_blocksize, &local);
-
 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
 	if (local) {
 		if (size > (mp->m_sb.sb_blocksize >> 1)) {
@@ -343,7 +307,7 @@ xfs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
 		 * Build initial attribute list (if required).
 		 */
 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
-			(void)xfs_attr_shortform_create(&args);
+			xfs_attr_shortform_create(&args);
 
 		/*
 		 * Try to add the attr to the attribute list in
@@ -456,32 +420,21 @@ out:
 	return(error);
 }
 
-/*
- * Generic handler routine to remove a name from an attribute list.
- * Transitions attribute list from Btree to shortform as necessary.
- */
-/*ARGSUSED*/
-int								/* error */
-xfs_attr_remove(bhv_desc_t *bdp, char *name, int flags, struct cred *cred)
+int
+xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int flags,
+	     struct cred *cred)
 {
-	xfs_da_args_t       args;
-	xfs_inode_t         *dp;
-	xfs_fsblock_t       firstblock;
-	xfs_bmap_free_t     flist;
-	int                 error;
-	xfs_mount_t         *mp;
-	int                 namelen;
+	xfs_inode_t	*dp;
+	int             namelen, error;
 
-	ASSERT(MAXNAMELEN-1<=0xff); /* length is stored in uint8 */
 	namelen = strlen(name);
-	if (namelen>=MAXNAMELEN)
-		return EFAULT; /* match irix behaviour */
+	if (namelen >= MAXNAMELEN)
+		return EFAULT;		/* match IRIX behaviour */
 
-	XFS_STATS_INC(xs_attr_remove);
+	XFS_STATS_INC(xs_attr_set);
 
 	dp = XFS_BHVTOI(bdp);
-	mp = dp->i_mount;
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return (EIO);
 
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
@@ -489,14 +442,25 @@ xfs_attr_remove(bhv_desc_t *bdp, char *name, int flags, struct cred *cred)
 	     (error = xfs_iaccess(dp, S_IWUSR, cred))) {
 		xfs_iunlock(dp, XFS_ILOCK_SHARED);
 		return(XFS_ERROR(error));
-	} else if (XFS_IFORK_Q(dp) == 0 ||
-		   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-		    dp->i_d.di_anextents == 0)) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(ENOATTR));
 	}
 	xfs_iunlock(dp, XFS_ILOCK_SHARED);
 
+	return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
+}
+
+/*
+ * Generic handler routine to remove a name from an attribute list.
+ * Transitions attribute list from Btree to shortform as necessary.
+ */
+STATIC int
+xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
+{
+	xfs_da_args_t	args;
+	xfs_fsblock_t	firstblock;
+	xfs_bmap_free_t	flist;
+	int		error;
+	xfs_mount_t	*mp = dp->i_mount;
+
 	/*
 	 * Fill in the arg structure for this request.
 	 */
@@ -544,7 +508,6 @@ xfs_attr_remove(bhv_desc_t *bdp, char *name, int flags, struct cred *cred)
 				      XFS_ATTRRM_LOG_COUNT))) {
 		xfs_trans_cancel(args.trans, 0);
 		return(error);
-
 	}
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -612,6 +575,38 @@ out:
 	return(error);
 }
 
+int
+xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
+{
+	xfs_inode_t         *dp;
+	int                 namelen, error;
+
+	namelen = strlen(name);
+	if (namelen >= MAXNAMELEN)
+		return EFAULT;		/* match IRIX behaviour */
+
+	XFS_STATS_INC(xs_attr_remove);
+
+	dp = XFS_BHVTOI(bdp);
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return (EIO);
+
+	xfs_ilock(dp, XFS_ILOCK_SHARED);
+	if (!(flags & ATTR_SECURE) &&
+	     (error = xfs_iaccess(dp, S_IWUSR, cred))) {
+		xfs_iunlock(dp, XFS_ILOCK_SHARED);
+		return(XFS_ERROR(error));
+	} else if (XFS_IFORK_Q(dp) == 0 ||
+		   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+		    dp->i_d.di_anextents == 0)) {
+		xfs_iunlock(dp, XFS_ILOCK_SHARED);
+		return(XFS_ERROR(ENOATTR));
+	}
+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
+
+	return xfs_attr_remove_int(dp, name, namelen, flags);
+}
+
 /*
  * Generate a list of extended attribute names and optionally
  * also value lengths.  Positive return value follows the XFS
@@ -811,7 +806,7 @@ out:
 STATIC int
 xfs_attr_shortform_addname(xfs_da_args_t *args)
 {
-	int newsize, retval;
+	int newsize, forkoff, retval;
 
 	retval = xfs_attr_shortform_lookup(args);
 	if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
@@ -823,16 +818,18 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
 		ASSERT(retval == 0);
 	}
 
+	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
+	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
+		return(XFS_ERROR(ENOSPC));
+
 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
-	if ((newsize <= XFS_IFORK_ASIZE(args->dp)) &&
-	    (args->namelen < XFS_ATTR_SF_ENTSIZE_MAX) &&
-	    (args->valuelen < XFS_ATTR_SF_ENTSIZE_MAX)) {
-		retval = xfs_attr_shortform_add(args);
-		ASSERT(retval == 0);
-	} else {
+
+	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
+	if (!forkoff)
 		return(XFS_ERROR(ENOSPC));
-	}
+
+	xfs_attr_shortform_add(args, forkoff);
 	return(0);
 }
 
@@ -852,7 +849,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 {
 	xfs_inode_t *dp;
 	xfs_dabuf_t *bp;
-	int retval, error, committed;
+	int retval, error, committed, forkoff;
 
 	/*
 	 * Read the (only) block in the attribute list in.
@@ -995,9 +992,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 		/*
 		 * If the result is small enough, shrink it all into the inode.
 		 */
-		if (xfs_attr_shortform_allfit(bp, dp)) {
+		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
 			XFS_BMAP_INIT(args->flist, args->firstblock);
-			error = xfs_attr_leaf_to_shortform(bp, args);
+			error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (!error) {
 				error = xfs_bmap_finish(&args->trans,
@@ -1049,8 +1046,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 {
 	xfs_inode_t *dp;
 	xfs_dabuf_t *bp;
-	int committed;
-	int error;
+	int error, committed, forkoff;
 
 	/*
 	 * Remove the attribute.
@@ -1075,9 +1071,9 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 	/*
 	 * If the result is small enough, shrink it all into the inode.
 	 */
-	if (xfs_attr_shortform_allfit(bp, dp)) {
+	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
 		XFS_BMAP_INIT(args->flist, args->firstblock);
-		error = xfs_attr_leaf_to_shortform(bp, args);
+		error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (!error) {
 			error = xfs_bmap_finish(&args->trans, args->flist,
@@ -1448,7 +1444,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 	xfs_da_state_blk_t *blk;
 	xfs_inode_t *dp;
 	xfs_dabuf_t *bp;
-	int retval, error, committed;
+	int retval, error, committed, forkoff;
 
 	/*
 	 * Tie a string around our finger to remind us where we are.
@@ -1569,9 +1565,9 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 				      bp->data)->hdr.info.magic, ARCH_CONVERT)
 						       == XFS_ATTR_LEAF_MAGIC);
 
-		if (xfs_attr_shortform_allfit(bp, dp)) {
+		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
 			XFS_BMAP_INIT(args->flist, args->firstblock);
-			error = xfs_attr_leaf_to_shortform(bp, args);
+			error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (!error) {
 				error = xfs_bmap_finish(&args->trans,
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 45ab1c542ba..b2c7b9fcded 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ATTR_H__
 #define	__XFS_ATTR_H__
@@ -172,15 +158,15 @@ struct xfs_da_args;
 /*
  * Overall external interface routines.
  */
-int xfs_attr_get(bhv_desc_t *, char *, char *, int *, int, struct cred *);
-int xfs_attr_set(bhv_desc_t *, char *, char *, int, int, struct cred *);
-int xfs_attr_remove(bhv_desc_t *, char *, int, struct cred *);
+int xfs_attr_get(bhv_desc_t *, const char *, char *, int *, int, struct cred *);
+int xfs_attr_set(bhv_desc_t *, const char *, char *, int, int, struct cred *);
+int xfs_attr_remove(bhv_desc_t *, const char *, int, struct cred *);
 int xfs_attr_list(bhv_desc_t *, char *, int, int,
 			 struct attrlist_cursor_kern *, struct cred *);
 int xfs_attr_inactive(struct xfs_inode *dp);
 
 int xfs_attr_shortform_getvalue(struct xfs_da_args *);
-int xfs_attr_fetch(struct xfs_inode *, char *, int,
+int xfs_attr_fetch(struct xfs_inode *, const char *, int,
 			char *, int *, int, struct cred *);
 
 #endif	/* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1cdd574c63a..35e557b00db 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1,46 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-/*
- * xfs_attr_leaf.c
- *
- * GROT: figure out how to recover gracefully when bmap returns ENOSPC.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -48,23 +28,22 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 
 /*
  * xfs_attr_leaf.c
@@ -118,13 +97,82 @@ STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context,
 
 
 /*========================================================================
- * External routines when dirsize < XFS_LITINO(mp).
+ * External routines when attribute fork size < XFS_LITINO(mp).
  *========================================================================*/
 
 /*
- * Create the initial contents of a shortform attribute list.
+ * Query whether the requested number of additional bytes of extended
+ * attribute space will be able to fit inline.
+ * Returns zero if not, else the di_forkoff fork offset to be used in the
+ * literal area for attribute data once the new bytes have been added.
+ *
+ * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value;
+ * special case for dev/uuid inodes, they have fixed size data forks.
  */
 int
+xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
+{
+	int offset;
+	int minforkoff;	/* lower limit on valid forkoff locations */
+	int maxforkoff;	/* upper limit on valid forkoff locations */
+	xfs_mount_t *mp = dp->i_mount;
+
+	offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
+
+	switch (dp->i_d.di_format) {
+	case XFS_DINODE_FMT_DEV:
+		minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+		return (offset >= minforkoff) ? minforkoff : 0;
+	case XFS_DINODE_FMT_UUID:
+		minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
+		return (offset >= minforkoff) ? minforkoff : 0;
+	}
+
+	if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+		if (bytes <= XFS_IFORK_ASIZE(dp))
+			return mp->m_attroffset >> 3;
+		return 0;
+	}
+
+	/* data fork btree root can have at least this many key/ptr pairs */
+	minforkoff = MAX(dp->i_df.if_bytes, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+	minforkoff = roundup(minforkoff, 8) >> 3;
+
+	/* attr fork btree root can have at least this many key/ptr pairs */
+	maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
+	maxforkoff = maxforkoff >> 3;	/* rounded down */
+
+	if (offset >= minforkoff && offset < maxforkoff)
+		return offset;
+	if (offset >= maxforkoff)
+		return maxforkoff;
+	return 0;
+}
+
+/*
+ * Switch on the ATTR2 superblock bit (implies also FEATURES2)
+ */
+STATIC void
+xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
+{
+	unsigned long s;
+
+	if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
+		s = XFS_SB_LOCK(mp);
+		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+			XFS_SB_UNLOCK(mp, s);
+			xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
+		} else
+			XFS_SB_UNLOCK(mp, s);
+	}
+}
+
+/*
+ * Create the initial contents of a shortform attribute list.
+ */
+void
 xfs_attr_shortform_create(xfs_da_args_t *args)
 {
 	xfs_attr_sf_hdr_t *hdr;
@@ -148,29 +196,37 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
 	hdr->count = 0;
 	INT_SET(hdr->totsize, ARCH_CONVERT, sizeof(*hdr));
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
-	return(0);
 }
 
 /*
  * Add a name/value pair to the shortform attribute list.
  * Overflow from the inode has already been checked for.
  */
-int
-xfs_attr_shortform_add(xfs_da_args_t *args)
+void
+xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 {
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int i, offset, size;
+	xfs_mount_t *mp;
 	xfs_inode_t *dp;
 	xfs_ifork_t *ifp;
 
 	dp = args->dp;
+	mp = dp->i_mount;
+	dp->i_d.di_forkoff = forkoff;
+	dp->i_df.if_ext_max =
+		XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+	dp->i_afp->if_ext_max =
+		XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+
 	ifp = dp->i_afp;
 	ASSERT(ifp->if_flags & XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
 	sfe = &sf->list[0];
 	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+#ifdef DEBUG
 		if (sfe->namelen != args->namelen)
 			continue;
 		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
@@ -181,7 +237,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
-		return(XFS_ERROR(EEXIST));
+		ASSERT(0);
+#endif
 	}
 
 	offset = (char *)sfe - (char *)sf;
@@ -200,11 +257,11 @@ xfs_attr_shortform_add(xfs_da_args_t *args)
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 
-	return(0);
+	xfs_sbversion_add_attr2(mp, args->trans);
 }
 
 /*
- * Remove a name from the shortform attribute list structure.
+ * Remove an attribute from the shortform attribute list structure.
  */
 int
 xfs_attr_shortform_remove(xfs_da_args_t *args)
@@ -212,17 +269,16 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int base, size=0, end, totsize, i;
+	xfs_mount_t *mp;
 	xfs_inode_t *dp;
 
-	/*
-	 * Remove the attribute.
-	 */
 	dp = args->dp;
+	mp = dp->i_mount;
 	base = sizeof(xfs_attr_sf_hdr_t);
 	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
-	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT);
-				sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
+	end = INT_GET(sf->hdr.count, ARCH_CONVERT);
+	for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
 					base += size, i++) {
 		size = XFS_ATTR_SF_ENTSIZE(sfe);
 		if (sfe->namelen != args->namelen)
@@ -237,19 +293,51 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 			continue;
 		break;
 	}
-	if (i == INT_GET(sf->hdr.count, ARCH_CONVERT))
+	if (i == end)
 		return(XFS_ERROR(ENOATTR));
 
+	/*
+	 * Fix up the attribute fork data, covering the hole
+	 */
 	end = base + size;
 	totsize = INT_GET(sf->hdr.totsize, ARCH_CONVERT);
-	if (end != totsize) {
-		memmove(&((char *)sf)[base], &((char *)sf)[end],
-							totsize - end);
-	}
+	if (end != totsize)
+		memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
 	INT_MOD(sf->hdr.totsize, ARCH_CONVERT, -size);
-	xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
-	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
+
+	/*
+	 * Fix up the start offset of the attribute fork
+	 */
+	totsize -= size;
+	if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname) {
+		/*
+		 * Last attribute now removed, revert to original
+		 * inode format making all literal area available
+		 * to the data fork once more.
+		 */
+		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = 0;
+		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+		ASSERT(dp->i_d.di_anextents == 0);
+		ASSERT(dp->i_afp == NULL);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+	} else {
+		xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
+		ASSERT(dp->i_d.di_forkoff);
+		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname);
+		dp->i_afp->if_ext_max =
+			XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp,
+					XFS_ILOG_CORE | XFS_ILOG_ADATA);
+	}
+
+	xfs_sbversion_add_attr2(mp, args->trans);
 
 	return(0);
 }
@@ -561,7 +649,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	/*
 	 * Sort the entries on hash then entno.
 	 */
-	qsort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
+	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
 
 	/*
 	 * Re-find our place IN THE SORTED LIST.
@@ -649,14 +737,16 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 				+ name_loc->namelen
 				+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
 	}
-	return( bytes < XFS_IFORK_ASIZE(dp) );
+	if (bytes == sizeof(struct xfs_attr_sf_hdr))
+		return(-1);
+	return(xfs_attr_shortform_bytesfit(dp, bytes));
 }
 
 /*
  * Convert a leaf attribute list to shortform attribute list
  */
 int
-xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 {
 	xfs_attr_leafblock_t *leaf;
 	xfs_attr_leaf_entry_t *entry;
@@ -683,9 +773,25 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	error = xfs_da_shrink_inode(args, 0, bp);
 	if (error)
 		goto out;
-	error = xfs_attr_shortform_create(args);
-	if (error)
+
+	if (forkoff == -1) {
+		/*
+		 * Last attribute was removed, revert to original
+		 * inode format making all literal area available
+		 * to the data fork once more.
+		 */
+		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+		dp->i_d.di_forkoff = 0;
+		dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+		ASSERT(dp->i_d.di_anextents == 0);
+		ASSERT(dp->i_afp == NULL);
+		dp->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
 		goto out;
+	}
+
+	xfs_attr_shortform_create(args);
 
 	/*
 	 * Copy the attributes
@@ -713,7 +819,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args)
 		nargs.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
 		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
 			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
-		xfs_attr_shortform_add(&nargs);
+		xfs_attr_shortform_add(&nargs, forkoff);
 	}
 	error = 0;
 
@@ -898,7 +1004,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
 	ASSERT((args->index >= 0)
 		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
 	hdr = &leaf->hdr;
-	entsize = xfs_attr_leaf_newentsize(args,
+	entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 			   args->trans->t_mountp->m_sb.sb_blocksize, NULL);
 
 	/*
@@ -995,13 +1101,14 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	mp = args->trans->t_mountp;
 	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
 	ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0);
-	ASSERT(INT_GET(map->size, ARCH_CONVERT)
-				>= xfs_attr_leaf_newentsize(args,
-					     mp->m_sb.sb_blocksize, NULL));
+	ASSERT(INT_GET(map->size, ARCH_CONVERT) >=
+		xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
+					 mp->m_sb.sb_blocksize, NULL));
 	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
 	ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0);
 	INT_MOD(map->size, ARCH_CONVERT,
-		-xfs_attr_leaf_newentsize(args, mp->m_sb.sb_blocksize, &tmp));
+		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
+					  mp->m_sb.sb_blocksize, &tmp));
 	INT_SET(entry->nameidx, ARCH_CONVERT,
 					INT_GET(map->base, ARCH_CONVERT)
 				      + INT_GET(map->size, ARCH_CONVERT));
@@ -1357,8 +1464,10 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 	half  = (max+1) * sizeof(*entry);
 	half += INT_GET(hdr1->usedbytes, ARCH_CONVERT)
 				+ INT_GET(hdr2->usedbytes, ARCH_CONVERT)
-				+ xfs_attr_leaf_newentsize(state->args,
-						     state->blocksize, NULL);
+				+ xfs_attr_leaf_newentsize(
+						state->args->namelen,
+						state->args->valuelen,
+						state->blocksize, NULL);
 	half /= 2;
 	lastdelta = state->blocksize;
 	entry = &leaf1->entries[0];
@@ -1370,9 +1479,10 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 		 */
 		if (count == blk1->index) {
 			tmp = totallen + sizeof(*entry) +
-				xfs_attr_leaf_newentsize(state->args,
-							 state->blocksize,
-							 NULL);
+				xfs_attr_leaf_newentsize(
+						state->args->namelen,
+						state->args->valuelen,
+						state->blocksize, NULL);
 			if (XFS_ATTR_ABS(half - tmp) > lastdelta)
 				break;
 			lastdelta = XFS_ATTR_ABS(half - tmp);
@@ -1408,9 +1518,10 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 	totallen -= count * sizeof(*entry);
 	if (foundit) {
 		totallen -= sizeof(*entry) +
-				xfs_attr_leaf_newentsize(state->args,
-							 state->blocksize,
-							 NULL);
+				xfs_attr_leaf_newentsize(
+						state->args->namelen,
+						state->args->valuelen,
+						state->blocksize, NULL);
 	}
 
 	*countarg = count;
@@ -2253,17 +2364,17 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
  * a "local" or a "remote" attribute.
  */
 int
-xfs_attr_leaf_newentsize(xfs_da_args_t *args, int blocksize, int *local)
+xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
 {
 	int size;
 
-	size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(args->namelen, args->valuelen);
+	size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(namelen, valuelen);
 	if (size < XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize)) {
 		if (local) {
 			*local = 1;
 		}
 	} else {
-		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(args->namelen);
+		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(namelen);
 		if (local) {
 			*local = 0;
 		}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 0a4cfad6df9..f6143ff251a 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ATTR_LEAF_H__
 #define	__XFS_ATTR_LEAF_H__
@@ -146,65 +132,58 @@ typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
 /*
  * Cast typed pointers for "local" and "remote" name/value structs.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_REMOTE)
-xfs_attr_leaf_name_remote_t *
-xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx);
 #define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)	\
 	xfs_attr_leaf_name_remote(leafp,idx)
-#else
-#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)	/* remote name struct ptr */ \
-	((xfs_attr_leaf_name_remote_t *)		\
-	 &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_LOCAL)
-xfs_attr_leaf_name_local_t *
-xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx);
+static inline xfs_attr_leaf_name_remote_t *
+xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
+{
+	return (xfs_attr_leaf_name_remote_t *) &((char *)
+		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+}
+
 #define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	\
 	xfs_attr_leaf_name_local(leafp,idx)
-#else
-#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	/* local name struct ptr */ \
-	((xfs_attr_leaf_name_local_t *)		\
-	 &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME)
-char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx);
+static inline xfs_attr_leaf_name_local_t *
+xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
+{
+	return (xfs_attr_leaf_name_local_t *) &((char *)
+		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
+}
+
 #define XFS_ATTR_LEAF_NAME(leafp,idx)		xfs_attr_leaf_name(leafp,idx)
-#else
-#define XFS_ATTR_LEAF_NAME(leafp,idx)		/* generic name struct ptr */ \
-	(&((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
-#endif
+static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
+{
+	return (&((char *)
+		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]);
+}
 
 /*
  * Calculate total bytes used (including trailing pad for alignment) for
  * a "local" name/value structure, a "remote" name/value structure, and
  * a pointer which might be either.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_REMOTE)
-int xfs_attr_leaf_entsize_remote(int nlen);
 #define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)	\
 	xfs_attr_leaf_entsize_remote(nlen)
-#else
-#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)	/* space for remote struct */ \
-	(((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
-	  XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL)
-int xfs_attr_leaf_entsize_local(int nlen, int vlen);
+static inline int xfs_attr_leaf_entsize_remote(int nlen)
+{
+	return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
+		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+}
+
 #define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen)	\
 	xfs_attr_leaf_entsize_local(nlen,vlen)
-#else
-#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen)	/* space for local struct */ \
-	(((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + \
-	  XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX)
-int xfs_attr_leaf_entsize_local_max(int bsize);
+static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
+{
+	return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
+		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+}
+
 #define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize)	\
 	xfs_attr_leaf_entsize_local_max(bsize)
-#else
-#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize)	/* max local struct size */ \
-	(((bsize) >> 1) + ((bsize) >> 2))
-#endif
+static inline int xfs_attr_leaf_entsize_local_max(int bsize)
+{
+	return (((bsize) >> 1) + ((bsize) >> 2));
+}
 
 
 /*========================================================================
@@ -237,23 +216,25 @@ typedef struct xfs_attr_inactive_list {
  *========================================================================*/
 
 /*
- * Internal routines when dirsize < XFS_LITINO(mp).
+ * Internal routines when attribute fork size < XFS_LITINO(mp).
  */
-int	xfs_attr_shortform_create(struct xfs_da_args *args);
-int	xfs_attr_shortform_add(struct xfs_da_args *add);
+void	xfs_attr_shortform_create(struct xfs_da_args *args);
+void	xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
 int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
-int	xfs_attr_shortform_remove(struct xfs_da_args *remove);
+int	xfs_attr_shortform_remove(struct xfs_da_args *args);
 int	xfs_attr_shortform_list(struct xfs_attr_list_context *context);
 int	xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+int	xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
+
 
 /*
- * Internal routines when dirsize == XFS_LBSIZE(mp).
+ * Internal routines when attribute fork size == XFS_LBSIZE(mp).
  */
 int	xfs_attr_leaf_to_node(struct xfs_da_args *args);
 int	xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
-					  struct xfs_da_args *args);
+				   struct xfs_da_args *args, int forkoff);
 int	xfs_attr_leaf_clearflag(struct xfs_da_args *args);
 int	xfs_attr_leaf_setflag(struct xfs_da_args *args);
 int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
@@ -289,7 +270,7 @@ int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
 xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
 int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
 				   struct xfs_dabuf *leaf2_bp);
-int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize,
+int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
 					int *local);
 int	xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
 
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ef7d2942d30..ffed6ca81a5 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ATTR_SF_H__
 #define	__XFS_ATTR_SF_H__
@@ -71,38 +57,17 @@ typedef struct xfs_attr_sf_sort {
 	char		*name;		/* name value, pointer into buffer */
 } xfs_attr_sf_sort_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE_BYNAME)
-int xfs_attr_sf_entsize_byname(int nlen, int vlen);
-#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	\
-	xfs_attr_sf_entsize_byname(nlen,vlen)
-#else
 #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	/* space name/value uses */ \
-	((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))
-#endif
+	(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
 #define XFS_ATTR_SF_ENTSIZE_MAX			/* max space for name&value */ \
 	((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE)
-int xfs_attr_sf_entsize(xfs_attr_sf_entry_t *sfep);
-#define XFS_ATTR_SF_ENTSIZE(sfep)	xfs_attr_sf_entsize(sfep)
-#else
 #define XFS_ATTR_SF_ENTSIZE(sfep)		/* space an entry uses */ \
 	((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_NEXTENTRY)
-xfs_attr_sf_entry_t *xfs_attr_sf_nextentry(xfs_attr_sf_entry_t *sfep);
-#define XFS_ATTR_SF_NEXTENTRY(sfep)	xfs_attr_sf_nextentry(sfep)
-#else
 #define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
-	((xfs_attr_sf_entry_t *) \
-		((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_TOTSIZE)
-int xfs_attr_sf_totsize(struct xfs_inode *dp);
-#define XFS_ATTR_SF_TOTSIZE(dp)		xfs_attr_sf_totsize(dp)
-#else
+	((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
 #define XFS_ATTR_SF_TOTSIZE(dp)			/* total space in use */ \
-	(INT_GET(((xfs_attr_shortform_t *)((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
-#endif
+	(INT_GET(((xfs_attr_shortform_t *)	\
+		((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
 
 #if defined(XFS_ATTR_TRACE)
 /*
diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
index 16088e175ec..9880adae393 100644
--- a/fs/xfs/xfs_behavior.c
+++ b/fs/xfs/xfs_behavior.c
@@ -1,34 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
 
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index d5ed5a84392..2cd89bb5ab1 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_BEHAVIOR_H__
 #define __XFS_BEHAVIOR_H__
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 76c9ad3875e..43be6a7e47c 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -1,45 +1,29 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * XFS bit manipulation routines, used in non-realtime code.
- */
-
 #include "xfs.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 
+/*
+ * XFS bit manipulation routines, used in non-realtime code.
+ */
 
 #ifndef HAVE_ARCH_HIGHBIT
 /*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 1e7f57ddf7a..0bbe5681754 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_BIT_H__
 #define	__XFS_BIT_H__
@@ -39,30 +25,26 @@
 /*
  * masks with n high/low bits set, 32-bit values & 64-bit values
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32HI)
-__uint32_t xfs_mask32hi(int n);
 #define	XFS_MASK32HI(n)		xfs_mask32hi(n)
-#else
-#define	XFS_MASK32HI(n)		((__uint32_t)-1 << (32 - (n)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64HI)
-__uint64_t xfs_mask64hi(int n);
+static inline __uint32_t xfs_mask32hi(int n)
+{
+	return (__uint32_t)-1 << (32 - (n));
+}
 #define	XFS_MASK64HI(n)		xfs_mask64hi(n)
-#else
-#define	XFS_MASK64HI(n)		((__uint64_t)-1 << (64 - (n)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32LO)
-__uint32_t xfs_mask32lo(int n);
+static inline __uint64_t xfs_mask64hi(int n)
+{
+	return (__uint64_t)-1 << (64 - (n));
+}
 #define	XFS_MASK32LO(n)		xfs_mask32lo(n)
-#else
-#define	XFS_MASK32LO(n)		(((__uint32_t)1 << (n)) - 1)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64LO)
-__uint64_t xfs_mask64lo(int n);
+static inline __uint32_t xfs_mask32lo(int n)
+{
+	return ((__uint32_t)1 << (n)) - 1;
+}
 #define	XFS_MASK64LO(n)		xfs_mask64lo(n)
-#else
-#define	XFS_MASK64LO(n)		(((__uint64_t)1 << (n)) - 1)
-#endif
+static inline __uint64_t xfs_mask64lo(int n)
+{
+	return ((__uint64_t)1 << (n)) - 1;
+}
 
 /* Get high bit set out of 32-bit argument, -1 if none set */
 extern int xfs_highbit32(__uint32_t v);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 6f5d283888a..e415a4698e9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1,68 +1,53 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_ialloc.h"
 #include "xfs_itable.h"
+#include "xfs_inode_item.h"
 #include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
-#include "xfs_bit.h"
+#include "xfs_attr_leaf.h"
 #include "xfs_rw.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
@@ -438,6 +423,12 @@ xfs_bmap_count_leaves(
 	int			numrecs,
 	int			*count);
 
+STATIC int
+xfs_bmap_disk_count_leaves(
+	xfs_bmbt_rec_t		*frp,
+	int			numrecs,
+	int			*count);
+
 /*
  * Bmap internal routines.
  */
@@ -2772,8 +2763,8 @@ xfs_bmap_btree_to_extents(
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 	rblock = ifp->if_broot;
-	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) == 1);
-	ASSERT(INT_GET(rblock->bb_numrecs, ARCH_CONVERT) == 1);
+	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
+	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 	ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
 	mp = ip->i_mount;
 	pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
@@ -3216,11 +3207,11 @@ xfs_bmap_extents_to_btree(
 	 * Fill in the root.
 	 */
 	block = ifp->if_broot;
-	INT_SET(block->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
-	INT_SET(block->bb_level, ARCH_CONVERT, 1);
-	INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
-	INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
-	INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
+	block->bb_level = cpu_to_be16(1);
+	block->bb_numrecs = cpu_to_be16(1);
+	block->bb_leftsib = cpu_to_be64(NULLDFSBNO);
+	block->bb_rightsib = cpu_to_be64(NULLDFSBNO);
 	/*
 	 * Need a cursor.  Can't allocate until bb_level is filled in.
 	 */
@@ -3273,10 +3264,10 @@ xfs_bmap_extents_to_btree(
 	 * Fill in the child block.
 	 */
 	ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
-	INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+	ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
 	ablock->bb_level = 0;
-	INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
-	INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
+	ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
@@ -3286,8 +3277,8 @@ xfs_bmap_extents_to_btree(
 			arp++; cnt++;
 		}
 	}
-	INT_SET(ablock->bb_numrecs, ARCH_CONVERT, cnt);
-	ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
+	ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
+	ablock->bb_numrecs = cpu_to_be16(cnt);
 	/*
 	 * Fill in the root key and pointer.
 	 */
@@ -3301,7 +3292,7 @@ xfs_bmap_extents_to_btree(
 	 * the root is at the right level.
 	 */
 	xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
-	xfs_bmbt_log_recs(cur, abp, 1, INT_GET(ablock->bb_numrecs, ARCH_CONVERT));
+	xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 	ASSERT(*curp == NULL);
 	*curp = cur;
 	*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -3337,6 +3328,29 @@ xfs_bmap_insert_exlist(
 }
 
 /*
+ * Helper routine to reset inode di_forkoff field when switching
+ * attribute fork from local to extent format - we reset it where
+ * possible to make space available for inline data fork extents.
+ */
+STATIC void
+xfs_bmap_forkoff_reset(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	if (whichfork == XFS_ATTR_FORK &&
+	    (ip->i_d.di_format != XFS_DINODE_FMT_DEV) &&
+	    (ip->i_d.di_format != XFS_DINODE_FMT_UUID) &&
+	    ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) {
+		ip->i_d.di_forkoff = mp->m_attroffset >> 3;
+		ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) /
+					(uint)sizeof(xfs_bmbt_rec_t);
+		ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) /
+					(uint)sizeof(xfs_bmbt_rec_t);
+	}
+}
+
+/*
  * Convert a local file to an extents file.
  * This code is out of bounds for data forks of regular files,
  * since the file data needs to get logged so things will stay consistent.
@@ -3403,6 +3417,7 @@ xfs_bmap_local_to_extents(
 		memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
 			ifp->if_bytes);
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 		xfs_iext_realloc(ip, 1, whichfork);
 		ep = ifp->if_u1.if_extents;
@@ -3413,8 +3428,10 @@ xfs_bmap_local_to_extents(
 		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 		flags |= XFS_ILOG_FEXT(whichfork);
-	} else
+	} else {
 		ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
+		xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
+	}
 	ifp->if_flags &= ~XFS_IFINLINE;
 	ifp->if_flags |= XFS_IFEXTENTS;
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
@@ -3796,22 +3813,24 @@ xfs_bunmap_trace(
 int						/* error code */
 xfs_bmap_add_attrfork(
 	xfs_inode_t		*ip,		/* incore inode pointer */
-	int			rsvd)		/* OK to allocated reserved blocks in trans */
+	int			size,		/* space new attribute needs */
+	int			rsvd)		/* xact may use reserved blks */
 {
-	int			blks;		/* space reservation */
-	int			committed;	/* xaction was committed */
-	int			error;		/* error return value */
 	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
 	xfs_bmap_free_t		flist;		/* freed extent list */
-	int			logflags;	/* logging flags */
 	xfs_mount_t		*mp;		/* mount structure */
-	unsigned long		s;		/* spinlock spl value */
 	xfs_trans_t		*tp;		/* transaction pointer */
+	unsigned long		s;		/* spinlock spl value */
+	int			blks;		/* space reservation */
+	int			version = 1;	/* superblock attr version */
+	int			committed;	/* xaction was committed */
+	int			logflags;	/* logging flags */
+	int			error;		/* error return value */
 
+	ASSERT(XFS_IFORK_Q(ip) == 0);
 	ASSERT(ip->i_df.if_ext_max ==
 	       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
-	if (XFS_IFORK_Q(ip))
-		return 0;
+
 	mp = ip->i_mount;
 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
 	tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
@@ -3853,7 +3872,11 @@ xfs_bmap_add_attrfork(
 	case XFS_DINODE_FMT_LOCAL:
 	case XFS_DINODE_FMT_EXTENTS:
 	case XFS_DINODE_FMT_BTREE:
-		ip->i_d.di_forkoff = mp->m_attroffset >> 3;
+		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
+		if (!ip->i_d.di_forkoff)
+			ip->i_d.di_forkoff = mp->m_attroffset >> 3;
+		else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+			version = 2;
 		break;
 	default:
 		ASSERT(0);
@@ -3890,12 +3913,22 @@ xfs_bmap_add_attrfork(
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (error)
 		goto error2;
-	if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
+	if (!XFS_SB_VERSION_HASATTR(&mp->m_sb) ||
+	   (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) {
+		__int64_t sbfields = 0;
+
 		s = XFS_SB_LOCK(mp);
 		if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
 			XFS_SB_VERSION_ADDATTR(&mp->m_sb);
+			sbfields |= XFS_SB_VERSIONNUM;
+		}
+		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2) {
+			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+			sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
+		}
+		if (sbfields) {
 			XFS_SB_UNLOCK(mp, s);
-			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
+			xfs_mod_sb(tp, sbfields);
 		} else
 			XFS_SB_UNLOCK(mp, s);
 	}
@@ -3988,13 +4021,19 @@ xfs_bmap_compute_maxlevels(
 	 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
 	 * (a signed 16-bit number, xfs_aextnum_t).
 	 */
-	maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
+	if (whichfork == XFS_DATA_FORK) {
+		maxleafents = MAXEXTNUM;
+		sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+			mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+	} else {
+		maxleafents = MAXAEXTNUM;
+		sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
+			mp->m_sb.sb_inodesize - mp->m_attroffset :
+			XFS_BMDR_SPACE_CALC(MINABTPTRS);
+	}
+	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
 	minleafrecs = mp->m_bmap_dmnr[0];
 	minnoderecs = mp->m_bmap_dmnr[1];
-	sz = (whichfork == XFS_DATA_FORK) ?
-		mp->m_attroffset :
-		mp->m_sb.sb_inodesize - mp->m_attroffset;
-	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
 	for (level = 1; maxblocks > 1; level++) {
 		if (maxblocks <= maxrootrecs)
@@ -4332,8 +4371,8 @@ xfs_bmap_read_extents(
 	/*
 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 	 */
-	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
-	level = INT_GET(block->bb_level, ARCH_CONVERT);
+	level = be16_to_cpu(block->bb_level);
+	ASSERT(level > 0);
 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
 	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
@@ -4376,7 +4415,7 @@ xfs_bmap_read_extents(
 		xfs_extnum_t	num_recs;
 
 
-		num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		num_recs = be16_to_cpu(block->bb_numrecs);
 		if (unlikely(i + num_recs > room)) {
 			ASSERT(i + num_recs <= room);
 			xfs_fs_cmn_err(CE_WARN, ip->i_mount,
@@ -4393,7 +4432,7 @@ xfs_bmap_read_extents(
 		/*
 		 * Read-ahead the next leaf block, if any.
 		 */
-		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+		nextbno = be64_to_cpu(block->bb_rightsib);
 		if (nextbno != NULLFSBLOCK)
 			xfs_btree_reada_bufl(mp, nextbno, 1);
 		/*
@@ -4650,7 +4689,7 @@ xfs_bmapi(
 	}
 	if (wr && *firstblock == NULLFSBLOCK) {
 		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
-			minleft = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+			minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
 		else
 			minleft = 1;
 	} else
@@ -4754,10 +4793,20 @@ xfs_bmapi(
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
 							-(alen), rsvd);
-				if (!error)
+				if (!error) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
 							-(indlen), rsvd);
+					if (error && rt) {
+						xfs_mod_incore_sb(ip->i_mount,
+							XFS_SBS_FREXTENTS,
+							extsz, rsvd);
+					} else if (error) {
+						xfs_mod_incore_sb(ip->i_mount,
+							XFS_SBS_FDBLOCKS,
+							alen, rsvd);
+					}
+				}
 
 				if (error) {
 					if (XFS_IS_QUOTA_ON(ip->i_mount))
@@ -5682,12 +5731,13 @@ xfs_getbmap(
 			out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
 			out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
 			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-			if (prealloced &&
-			    map[i].br_startblock == HOLESTARTBLOCK &&
-			    out.bmv_offset + out.bmv_length == bmvend) {
-				/*
-				 * came to hole at end of file
-				 */
+                        if (map[i].br_startblock == HOLESTARTBLOCK &&
+                           ((prealloced && out.bmv_offset + out.bmv_length == bmvend) ||
+                             whichfork == XFS_ATTR_FORK )) {
+                                /*
+                                 * came to hole at end of file or the end of
+                                   attribute fork
+                                 */
 				goto unlock_and_return;
 			} else {
 				out.bmv_block =
@@ -5917,10 +5967,10 @@ xfs_check_block(
 	xfs_bmbt_ptr_t		*pp, *thispa;	/* pointer to block address */
 	xfs_bmbt_key_t		*prevp, *keyp;
 
-	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	ASSERT(be16_to_cpu(block->bb_level) > 0);
 
 	prevp = NULL;
-	for( i = 1; i <= INT_GET(block->bb_numrecs, ARCH_CONVERT);i++) {
+	for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) {
 		dmxr = mp->m_bmap_dmxr[0];
 
 		if (root) {
@@ -5945,7 +5995,7 @@ xfs_check_block(
 			pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
 				xfs_bmbt, block, i, dmxr);
 		}
-		for (j = i+1; j <= INT_GET(block->bb_numrecs, ARCH_CONVERT); j++) {
+		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 			if (root) {
 				thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
 			} else {
@@ -5998,8 +6048,8 @@ xfs_bmap_check_leaf_extents(
 	/*
 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 	 */
-	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
-	level = INT_GET(block->bb_level, ARCH_CONVERT);
+	level = be16_to_cpu(block->bb_level);
+	ASSERT(level > 0);
 	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
@@ -6059,13 +6109,13 @@ xfs_bmap_check_leaf_extents(
 		xfs_extnum_t	num_recs;
 
 
-		num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		num_recs = be16_to_cpu(block->bb_numrecs);
 
 		/*
 		 * Read-ahead the next leaf block, if any.
 		 */
 
-		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+		nextbno = be64_to_cpu(block->bb_rightsib);
 
 		/*
 		 * Check all the extents to make sure they are OK.
@@ -6121,7 +6171,7 @@ error0:
 		xfs_trans_brelse(NULL, bp);
 error_norelse:
 	cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents",
-		i, __FUNCTION__);
+		__FUNCTION__, i);
 	panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__);
 	return;
 }
@@ -6162,8 +6212,8 @@ xfs_bmap_count_blocks(
 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 	 */
 	block = ifp->if_broot;
-	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
-	level = INT_GET(block->bb_level, ARCH_CONVERT);
+	level = be16_to_cpu(block->bb_level);
+	ASSERT(level > 0);
 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
 	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
@@ -6208,14 +6258,14 @@ xfs_bmap_count_tree(
 
 	if (--level) {
 		/* Not at node above leafs, count this level of nodes */
-		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+		nextbno = be64_to_cpu(block->bb_rightsib);
 		while (nextbno != NULLFSBLOCK) {
 			if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
 				0, &nbp, XFS_BMAP_BTREE_REF)))
 				return error;
 			*count += 1;
 			nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp);
-			nextbno = INT_GET(nextblock->bb_rightsib, ARCH_CONVERT);
+			nextbno = be64_to_cpu(nextblock->bb_rightsib);
 			xfs_trans_brelse(tp, nbp);
 		}
 
@@ -6234,11 +6284,11 @@ xfs_bmap_count_tree(
 	} else {
 		/* count all level 1 nodes and their leaves */
 		for (;;) {
-			nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
-			numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+			nextbno = be64_to_cpu(block->bb_rightsib);
+			numrecs = be16_to_cpu(block->bb_numrecs);
 			frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
 				xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
-			if (unlikely(xfs_bmap_count_leaves(frp, numrecs, count) < 0)) {
+			if (unlikely(xfs_bmap_disk_count_leaves(frp, numrecs, count) < 0)) {
 				xfs_trans_brelse(tp, bp);
 				XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
 						 XFS_ERRLEVEL_LOW, mp);
@@ -6270,6 +6320,22 @@ xfs_bmap_count_leaves(
 	int		b;
 
 	for ( b = 1; b <= numrecs; b++, frp++)
+		*count += xfs_bmbt_get_blockcount(frp);
+	return 0;
+}
+
+/*
+ * Count leaf blocks given a pointer to an extent list originally in btree format.
+ */
+int
+xfs_bmap_disk_count_leaves(
+	xfs_bmbt_rec_t		*frp,
+	int			numrecs,
+	int			*count)
+{
+	int		b;
+
+	for ( b = 1; b <= numrecs; b++, frp++)
 		*count += xfs_bmbt_disk_get_blockcount(frp);
 	return 0;
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index e6d22ec9b2e..2e0717a0130 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_BMAP_H__
 #define	__XFS_BMAP_H__
@@ -77,12 +63,11 @@ typedef	struct xfs_bmap_free
 					/* combine contig. space */
 #define	XFS_BMAPI_CONTIG	0x400	/* must allocate only one extent */
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAPI_AFLAG)
-int xfs_bmapi_aflag(int w);
 #define	XFS_BMAPI_AFLAG(w)	xfs_bmapi_aflag(w)
-#else
-#define	XFS_BMAPI_AFLAG(w)	((w) == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0)
-#endif
+static inline int xfs_bmapi_aflag(int w)
+{
+	return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
+}
 
 /*
  * Special values for xfs_bmbt_irec_t br_startblock field.
@@ -90,14 +75,12 @@ int xfs_bmapi_aflag(int w);
 #define	DELAYSTARTBLOCK		((xfs_fsblock_t)-1LL)
 #define	HOLESTARTBLOCK		((xfs_fsblock_t)-2LL)
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_INIT)
-void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp);
 #define	XFS_BMAP_INIT(flp,fbp)	xfs_bmap_init(flp,fbp)
-#else
-#define	XFS_BMAP_INIT(flp,fbp)	\
+static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
+{
 	((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
-	 (flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK)
-#endif
+		(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
+}
 
 /*
  * Argument structure for xfs_bmap_alloc.
@@ -156,7 +139,8 @@ xfs_bmap_trace_exlist(
 int					/* error code */
 xfs_bmap_add_attrfork(
 	struct xfs_inode	*ip,	/* incore inode pointer */
-	int					rsvd);	/* flag for reserved block allocation */
+	int			size,	/* space needed for new attribute */
+	int			rsvd);	/* flag for reserved block allocation */
 
 /*
  * Add the extent to the list of extents to be free at transaction end.
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 09c413576ba..3f1383d160e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1,41 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,20 +28,19 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_alloc.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
@@ -382,7 +366,7 @@ xfs_bmbt_delrec(
 		return 0;
 	}
 	block = xfs_bmbt_get_block(cur, level, &bp);
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 #ifdef DEBUG
 	if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -427,7 +411,7 @@ xfs_bmbt_delrec(
 		}
 	}
 	numrecs--;
-	INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+	block->bb_numrecs = cpu_to_be16(numrecs);
 	xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
 	/*
 	 * We're at the root level.
@@ -463,8 +447,8 @@ xfs_bmbt_delrec(
 		*stat = 1;
 		return 0;
 	}
-	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
-	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+	rbno = be64_to_cpu(block->bb_rightsib);
+	lbno = be64_to_cpu(block->bb_leftsib);
 	/*
 	 * One child of root, need to get a chance to copy its contents
 	 * into the root and delete it. Can't go up to next level,
@@ -508,15 +492,15 @@ xfs_bmbt_delrec(
 			goto error0;
 		}
 #endif
-		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
-		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+		bno = be64_to_cpu(right->bb_leftsib);
+		if (be16_to_cpu(right->bb_numrecs) - 1 >=
 		    XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
 			if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
 				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 				goto error0;
 			}
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_BMAP_BLOCK_IMINRECS(level, tcur));
 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 				tcur = NULL;
@@ -533,7 +517,7 @@ xfs_bmbt_delrec(
 				return 0;
 			}
 		}
-		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		rrecs = be16_to_cpu(right->bb_numrecs);
 		if (lbno != NULLFSBLOCK) {
 			i = xfs_btree_firstrec(tcur, level);
 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
@@ -564,15 +548,15 @@ xfs_bmbt_delrec(
 			goto error0;
 		}
 #endif
-		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
-		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+		bno = be64_to_cpu(left->bb_rightsib);
+		if (be16_to_cpu(left->bb_numrecs) - 1 >=
 		    XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
 			if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
 				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 				goto error0;
 			}
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_BMAP_BLOCK_IMINRECS(level, tcur));
 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 				tcur = NULL;
@@ -583,14 +567,14 @@ xfs_bmbt_delrec(
 				return 0;
 			}
 		}
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 	}
 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 	tcur = NULL;
 	mp = cur->bc_mp;
 	ASSERT(bno != NULLFSBLOCK);
 	if (lbno != NULLFSBLOCK &&
-	    lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+	    lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
 		rbno = bno;
 		right = block;
 		rbp = bp;
@@ -605,7 +589,7 @@ xfs_bmbt_delrec(
 			goto error0;
 		}
 	} else if (rbno != NULLFSBLOCK &&
-		   rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		   rrecs + be16_to_cpu(block->bb_numrecs) <=
 		   XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
 		lbno = bno;
 		left = block;
@@ -620,7 +604,7 @@ xfs_bmbt_delrec(
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			goto error0;
 		}
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 	} else {
 		if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -630,8 +614,8 @@ xfs_bmbt_delrec(
 		*stat = 1;
 		return 0;
 	}
-	numlrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
-	numrrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+	numlrecs = be16_to_cpu(left->bb_numrecs);
+	numrrecs = be16_to_cpu(right->bb_numrecs);
 	if (level > 0) {
 		lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
 		lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
@@ -655,12 +639,12 @@ xfs_bmbt_delrec(
 		memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
 		xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	}
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, numrrecs);
-	left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+	be16_add(&left->bb_numrecs, numrrecs);
+	left->bb_rightsib = right->bb_rightsib;
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+	if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
 		if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
-				INT_GET(left->bb_rightsib, ARCH_CONVERT),
+				be64_to_cpu(left->bb_rightsib),
 				0, &rrbp, XFS_BMAP_BTREE_REF))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			goto error0;
@@ -670,7 +654,7 @@ xfs_bmbt_delrec(
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			goto error0;
 		}
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+		rrblock->bb_leftsib = cpu_to_be64(lbno);
 		xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
 	}
 	xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
@@ -727,7 +711,7 @@ xfs_bmbt_get_rec(
 	if ((error = xfs_btree_check_lblock(cur, block, 0, bp)))
 		return error;
 #endif
-	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+	if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
 		*stat = 0;
 		return 0;
 	}
@@ -788,7 +772,7 @@ xfs_bmbt_insrec(
 	}
 	XFS_STATS_INC(xs_bmbt_insrec);
 	block = xfs_bmbt_get_block(cur, level, &bp);
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 #ifdef DEBUG
 	if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -870,7 +854,7 @@ xfs_bmbt_insrec(
 			}
 		}
 	}
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 	if (level > 0) {
 		kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 		pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
@@ -897,7 +881,7 @@ xfs_bmbt_insrec(
 		kp[ptr - 1] = key;
 		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
 		numrecs++;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+		block->bb_numrecs = cpu_to_be16(numrecs);
 		xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
 		xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
 	} else {
@@ -906,7 +890,7 @@ xfs_bmbt_insrec(
 			(numrecs - ptr + 1) * sizeof(*rp));
 		rp[ptr - 1] = *recp;
 		numrecs++;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+		block->bb_numrecs = cpu_to_be16(numrecs);
 		xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
 	}
 	xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
@@ -971,7 +955,7 @@ xfs_bmbt_killroot(
 	/*
 	 * Give up if the root has multiple children.
 	 */
-	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) != 1) {
+	if (be16_to_cpu(block->bb_numrecs) != 1) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		return 0;
 	}
@@ -982,37 +966,37 @@ xfs_bmbt_killroot(
 	 */
 	cbp = cur->bc_bufs[level - 1];
 	cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
-	if (INT_GET(cblock->bb_numrecs, ARCH_CONVERT) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
+	if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		return 0;
 	}
-	ASSERT(INT_GET(cblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
-	ASSERT(INT_GET(cblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
+	ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
+	ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
 	ip = cur->bc_private.b.ip;
 	ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
 	ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
 	       XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
-	i = (int)(INT_GET(cblock->bb_numrecs, ARCH_CONVERT) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
+	i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
 	if (i) {
 		xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
 		block = ifp->if_broot;
 	}
-	INT_MOD(block->bb_numrecs, ARCH_CONVERT, i);
-	ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+	be16_add(&block->bb_numrecs, i);
+	ASSERT(block->bb_numrecs == cblock->bb_numrecs);
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	memcpy(kp, ckp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
-	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+	for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			return error;
 		}
 	}
 #endif
-	memcpy(pp, cpp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
 	xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
 			cur->bc_private.b.flist, cur->bc_mp);
 	ip->i_d.di_nblocks--;
@@ -1020,7 +1004,7 @@ xfs_bmbt_killroot(
 			XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(cur->bc_tp, cbp);
 	cur->bc_bufs[level - 1] = NULL;
-	INT_MOD(block->bb_level, ARCH_CONVERT, -1);
+	be16_add(&block->bb_level, -1);
 	xfs_trans_log_inode(cur->bc_tp, ip,
 		XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
 	cur->bc_nlevels--;
@@ -1176,7 +1160,7 @@ xfs_bmbt_lookup(
 			else
 				krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
 			low = 1;
-			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+			if (!(high = be16_to_cpu(block->bb_numrecs))) {
 				ASSERT(level == 0);
 				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
 				XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -1223,8 +1207,8 @@ xfs_bmbt_lookup(
 		 * If ge search and we went off the end of the block, but it's
 		 * not the last block, we're in the wrong block.
 		 */
-		if (dir == XFS_LOOKUP_GE && keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
-		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+		if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
+		    be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
 			cur->bc_ptrs[0] = keyno;
 			if ((error = xfs_bmbt_increment(cur, 0, &i))) {
 				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -1239,7 +1223,7 @@ xfs_bmbt_lookup(
 	else if (dir == XFS_LOOKUP_LE && diff > 0)
 		keyno--;
 	cur->bc_ptrs[0] = keyno;
-	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 	} else {
@@ -1296,7 +1280,7 @@ xfs_bmbt_lshift(
 		return error;
 	}
 #endif
-	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+	if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
@@ -1307,7 +1291,7 @@ xfs_bmbt_lshift(
 		return 0;
 	}
 	mp = cur->bc_mp;
-	if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0,
+	if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
 			&lbp, XFS_BMAP_BTREE_REF))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 		return error;
@@ -1317,12 +1301,12 @@ xfs_bmbt_lshift(
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 		return error;
 	}
-	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+	if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
 	}
-	lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	lrecs = be16_to_cpu(left->bb_numrecs) + 1;
 	if (level > 0) {
 		lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
 		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
@@ -1344,7 +1328,7 @@ xfs_bmbt_lshift(
 		*lrp = *rrp;
 		xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
 	}
-	INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
+	left->bb_numrecs = cpu_to_be16(lrecs);
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
 #ifdef DEBUG
 	if (level > 0)
@@ -1352,8 +1336,8 @@ xfs_bmbt_lshift(
 	else
 		xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
 #endif
-	rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1;
-	INT_SET(right->bb_numrecs, ARCH_CONVERT, rrecs);
+	rrecs = be16_to_cpu(right->bb_numrecs) - 1;
+	right->bb_numrecs = cpu_to_be16(rrecs);
 	xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
 	if (level > 0) {
 #ifdef DEBUG
@@ -1430,18 +1414,18 @@ xfs_bmbt_rshift(
 		return error;
 	}
 #endif
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+	if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
 	}
-	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+	if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
 	}
 	mp = cur->bc_mp;
-	if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+	if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
 			&rbp, XFS_BMAP_BTREE_REF))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 		return error;
@@ -1451,26 +1435,26 @@ xfs_bmbt_rshift(
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 		return error;
 	}
-	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+	if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
 	}
 	if (level > 0) {
-		lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
-		lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
+		lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
 		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+		for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
 			if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) {
 				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 				return error;
 			}
 		}
 #endif
-		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -1479,21 +1463,21 @@ xfs_bmbt_rshift(
 #endif
 		*rkp = *lkp;
 		*rpp = *lpp; /* INT_: direct copy */
-		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
+		xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
 	} else {
-		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
 		*rrp = *lrp;
-		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
 		INT_SET(key.br_startoff, ARCH_CONVERT,
 			xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&left->bb_numrecs, -1);
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
-	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	be16_add(&right->bb_numrecs, 1);
 #ifdef DEBUG
 	if (level > 0)
 		xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
@@ -1624,47 +1608,47 @@ xfs_bmbt_split(
 		return error;
 	}
 #endif
-	INT_SET(right->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
-	right->bb_level = left->bb_level; /* INT_: direct copy */
-	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
-	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
-	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
-		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
-	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
+	right->bb_level = left->bb_level;
+	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
+	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
+	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
+		be16_add(&right->bb_numrecs, 1);
+	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	if (level > 0) {
 		lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
 		lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
 		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
 		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
 			if ((error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) {
 				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 				return error;
 			}
 		}
 #endif
-		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
-		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
+		xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
 	} else {
 		lrp = XFS_BMAP_REC_IADDR(left, i, cur);
 		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
-		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
-		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
+		xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp);
 	}
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
-	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
-	INT_SET(left->bb_rightsib, ARCH_CONVERT, args.fsbno);
-	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	right->bb_rightsib = left->bb_rightsib;
+	left->bb_rightsib = cpu_to_be64(args.fsbno);
+	right->bb_leftsib = cpu_to_be64(lbno);
 	xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
-	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+	if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
 		if ((error = xfs_btree_read_bufl(args.mp, args.tp,
-				INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+				be64_to_cpu(right->bb_rightsib), 0, &rrbp,
 				XFS_BMAP_BTREE_REF))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			return error;
@@ -1674,12 +1658,12 @@ xfs_bmbt_split(
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			return error;
 		}
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.fsbno);
+		rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
 		xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
 	}
-	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+	if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
 		xfs_btree_setbuf(cur, level, rbp);
-		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
 	}
 	if (level + 1 < cur->bc_nlevels) {
 		if ((error = xfs_btree_dup_cursor(cur, curp))) {
@@ -1751,18 +1735,18 @@ xfs_bmdr_to_bmbt(
 	xfs_bmbt_key_t		*tkp;
 	xfs_bmbt_ptr_t		*tpp;
 
-	INT_SET(rblock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
-	rblock->bb_level = dblock->bb_level;	/* both in on-disk format */
-	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
-	rblock->bb_numrecs = dblock->bb_numrecs;/* both in on-disk format */
-	INT_SET(rblock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
-	INT_SET(rblock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
+	rblock->bb_level = dblock->bb_level;
+	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+	rblock->bb_numrecs = dblock->bb_numrecs;
+	rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
+	rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
 	dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
 	fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
 	fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
-	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+	dmxr = be16_to_cpu(dblock->bb_numrecs);
 	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
 	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
@@ -1805,7 +1789,7 @@ xfs_bmbt_decrement(
 		return error;
 	}
 #endif
-	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+	if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
@@ -1837,7 +1821,7 @@ xfs_bmbt_decrement(
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			return error;
 		}
-		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
 	}
 	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 	*stat = 1;
@@ -2017,7 +2001,7 @@ xfs_bmbt_get_state(
 				ext_flag);
 }
 
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
 /* Endian flipping versions of the bmbt extraction functions */
 void
 xfs_bmbt_disk_get_all(
@@ -2087,7 +2071,7 @@ xfs_bmbt_disk_get_state(
 	return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
 				ext_flag);
 }
-#endif
+#endif /* XFS_NATIVE_HOST */
 
 
 /*
@@ -2123,12 +2107,12 @@ xfs_bmbt_increment(
 		return error;
 	}
 #endif
-	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 1;
 		return 0;
 	}
-	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+	if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
 		return 0;
@@ -2141,7 +2125,7 @@ xfs_bmbt_increment(
 			return error;
 		}
 #endif
-		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
 			break;
 		if (lev < cur->bc_nlevels - 1)
 			xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
@@ -2403,23 +2387,23 @@ xfs_bmbt_newroot(
 	bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
 	cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
 	*cblock = *block;
-	INT_MOD(block->bb_level, ARCH_CONVERT, +1);
-	INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+	be16_add(&block->bb_level, 1);
+	block->bb_numrecs = cpu_to_be16(1);
 	cur->bc_nlevels++;
 	cur->bc_ptrs[level + 1] = 1;
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
-	memcpy(ckp, kp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
 	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
 #ifdef DEBUG
-	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+	for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) {
 			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 			return error;
 		}
 	}
 #endif
-	memcpy(cpp, pp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
 #ifdef DEBUG
 	if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
 			level))) {
@@ -2428,7 +2412,7 @@ xfs_bmbt_newroot(
 	}
 #endif
 	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
-	xfs_iroot_realloc(cur->bc_private.b.ip, 1 - INT_GET(cblock->bb_numrecs, ARCH_CONVERT),
+	xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
 		cur->bc_private.b.whichfork);
 	xfs_btree_setbuf(cur, level, bp);
 	/*
@@ -2436,8 +2420,8 @@ xfs_bmbt_newroot(
 	 * the root is at the right level.
 	 */
 	xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
-	xfs_bmbt_log_keys(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
-	xfs_bmbt_log_ptrs(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+	xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
+	xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
 	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 	*logflags |=
 		XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
@@ -2531,7 +2515,7 @@ xfs_bmbt_set_allf(
 #endif	/* XFS_BIG_BLKNOS */
 }
 
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
 /*
  * Set all the fields in a bmap extent record from the uncompressed form.
  */
@@ -2617,7 +2601,7 @@ xfs_bmbt_disk_set_allf(
 	}
 #endif	/* XFS_BIG_BLKNOS */
 }
-#endif
+#endif /* XFS_NATIVE_HOST */
 
 /*
  * Set the blockcount field in a bmap extent record.
@@ -2705,18 +2689,18 @@ xfs_bmbt_to_bmdr(
 	xfs_bmbt_key_t		*tkp;
 	xfs_bmbt_ptr_t		*tpp;
 
-	ASSERT(INT_GET(rblock->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC);
-	ASSERT(INT_GET(rblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
-	ASSERT(INT_GET(rblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
-	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
-	dblock->bb_level = rblock->bb_level;	/* both in on-disk format */
-	dblock->bb_numrecs = rblock->bb_numrecs;/* both in on-disk format */
+	ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
+	ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO);
+	ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO);
+	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+	dblock->bb_level = rblock->bb_level;
+	dblock->bb_numrecs = rblock->bb_numrecs;
 	dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
 	fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
 	tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
 	fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
 	tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
-	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+	dmxr = be16_to_cpu(dblock->bb_numrecs);
 	memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
 	memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
 }
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0a40cf126c2..e095a2d344a 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000,2002-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_BMAP_BTREE_H__
 #define __XFS_BMAP_BTREE_H__
@@ -42,10 +28,9 @@ struct xfs_inode;
 /*
  * Bmap root header, on-disk form only.
  */
-typedef struct xfs_bmdr_block
-{
-	__uint16_t	bb_level;	/* 0 is a leaf */
-	__uint16_t	bb_numrecs;	/* current # of data records */
+typedef struct xfs_bmdr_block {
+	__be16		bb_level;	/* 0 is a leaf */
+	__be16		bb_numrecs;	/* current # of data records */
 } xfs_bmdr_block_t;
 
 /*
@@ -62,7 +47,7 @@ typedef struct xfs_bmdr_block
  *  l1:0-20 are blockcount.
  */
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
 
 #define BMBT_TOTAL_BITLEN	128	/* 128 bits, 16 bytes */
 #define BMBT_EXNTFLAG_BITOFF	0
@@ -87,7 +72,7 @@ typedef struct xfs_bmdr_block
 #define BMBT_BLOCKCOUNT_BITOFF	64 /* Start of second 64 bit container */
 #define BMBT_BLOCKCOUNT_BITLEN	21
 
-#endif
+#endif /* XFS_NATIVE_HOST */
 
 
 #define BMBT_USE_64	1
@@ -114,31 +99,31 @@ typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
 	(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
 #define DSTARTBLOCKMASK		\
 	(((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLSTARTBLOCK)
-int isnullstartblock(xfs_fsblock_t x);
+
 #define ISNULLSTARTBLOCK(x)	isnullstartblock(x)
-#else
-#define ISNULLSTARTBLOCK(x)	(((x) & STARTBLOCKMASK) == STARTBLOCKMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLDSTARTBLOCK)
-int isnulldstartblock(xfs_dfsbno_t x);
+static inline int isnullstartblock(xfs_fsblock_t x)
+{
+	return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
+}
+
 #define ISNULLDSTARTBLOCK(x)	isnulldstartblock(x)
-#else
-#define ISNULLDSTARTBLOCK(x)	(((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_NULLSTARTBLOCK)
-xfs_fsblock_t nullstartblock(int k);
+static inline int isnulldstartblock(xfs_dfsbno_t x)
+{
+	return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
+}
+
 #define NULLSTARTBLOCK(k)	nullstartblock(k)
-#else
-#define NULLSTARTBLOCK(k)	\
-	((ASSERT(k < (1 << STARTBLOCKVALBITS))), (STARTBLOCKMASK | (k)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_STARTBLOCKVAL)
-xfs_filblks_t startblockval(xfs_fsblock_t x);
+static inline xfs_fsblock_t nullstartblock(int k)
+{
+	ASSERT(k < (1 << STARTBLOCKVALBITS));
+	return STARTBLOCKMASK | (k);
+}
+
 #define STARTBLOCKVAL(x)	startblockval(x)
-#else
-#define STARTBLOCKVAL(x)	((xfs_filblks_t)((x) & ~STARTBLOCKMASK))
-#endif
+static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
+{
+	return (xfs_filblks_t)((x) & ~STARTBLOCKMASK);
+}
 
 /*
  * Possible extent formats.
@@ -159,14 +144,9 @@ typedef enum {
 /*
  * Extent state and extent format macros.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTFMT_INODE )
-xfs_exntfmt_t xfs_extfmt_inode(struct xfs_inode *ip);
-#define XFS_EXTFMT_INODE(x)	xfs_extfmt_inode(x)
-#else
-#define XFS_EXTFMT_INODE(x) \
-  (XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
-	XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
-#endif
+#define XFS_EXTFMT_INODE(x)	\
+	(XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
+		XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
 #define ISUNWRITTEN(x)	((x)->br_state == XFS_EXT_UNWRITTEN)
 
 /*
@@ -192,248 +172,110 @@ typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;	/* btree pointer type */
 					/* btree block header type */
 typedef struct xfs_btree_lblock xfs_bmbt_block_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BMBT_BLOCK)
-xfs_bmbt_block_t *xfs_buf_to_bmbt_block(struct xfs_buf *bp);
-#define XFS_BUF_TO_BMBT_BLOCK(bp)		xfs_buf_to_bmbt_block(bp)
-#else
-#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)(XFS_BUF_PTR(bp)))
-#endif
+#define XFS_BUF_TO_BMBT_BLOCK(bp)	((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_DSIZE)
-int xfs_bmap_rblock_dsize(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_RBLOCK_DSIZE(lev,cur)		xfs_bmap_rblock_dsize(lev,cur)
-#else
-#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_ISIZE)
-int xfs_bmap_rblock_isize(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_RBLOCK_ISIZE(lev,cur)		xfs_bmap_rblock_isize(lev,cur)
-#else
-#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
+#define XFS_BMAP_IBLOCK_SIZE(lev,cur)	(1 << (cur)->bc_blocklog)
+#define XFS_BMAP_RBLOCK_DSIZE(lev,cur)	((cur)->bc_private.b.forksize)
+#define XFS_BMAP_RBLOCK_ISIZE(lev,cur)	\
 	((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
-			    (cur)->bc_private.b.whichfork)->if_broot_bytes)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_IBLOCK_SIZE)
-int xfs_bmap_iblock_size(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_IBLOCK_SIZE(lev,cur)		xfs_bmap_iblock_size(lev,cur)
-#else
-#define XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
-#endif
+		    (cur)->bc_private.b.whichfork)->if_broot_bytes)
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DSIZE)
-int xfs_bmap_block_dsize(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_DSIZE(lev,cur)		xfs_bmap_block_dsize(lev,cur)
-#else
-#define XFS_BMAP_BLOCK_DSIZE(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
-		XFS_BMAP_RBLOCK_DSIZE(lev,cur) : \
-		XFS_BMAP_IBLOCK_SIZE(lev,cur))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_ISIZE)
-int xfs_bmap_block_isize(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_ISIZE(lev,cur)		xfs_bmap_block_isize(lev,cur)
-#else
-#define XFS_BMAP_BLOCK_ISIZE(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
-		XFS_BMAP_RBLOCK_ISIZE(lev,cur) : \
-		XFS_BMAP_IBLOCK_SIZE(lev,cur))
-#endif
+#define XFS_BMAP_BLOCK_DSIZE(lev,cur)	\
+	(((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BMAP_RBLOCK_DSIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
+#define XFS_BMAP_BLOCK_ISIZE(lev,cur)	\
+	(((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BMAP_RBLOCK_ISIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMAXRECS)
-int xfs_bmap_block_dmaxrecs(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur)	xfs_bmap_block_dmaxrecs(lev,cur)
-#else
 #define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
+	(((lev) == (cur)->bc_nlevels - 1 ? \
 		XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
 			xfs_bmdr, (lev) == 0) : \
-		((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMAXRECS)
-int xfs_bmap_block_imaxrecs(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur)	xfs_bmap_block_imaxrecs(lev,cur)
-#else
+		((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
 #define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
-		XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
-			xfs_bmbt, (lev) == 0) : \
-		((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
-#endif
+	(((lev) == (cur)->bc_nlevels - 1 ? \
+			XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
+				xfs_bmbt, (lev) == 0) : \
+			((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMINRECS)
-int xfs_bmap_block_dminrecs(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_DMINRECS(lev,cur)	xfs_bmap_block_dminrecs(lev,cur)
-#else
 #define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
-		XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
-			xfs_bmdr, (lev) == 0) : \
-		((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMINRECS)
-int xfs_bmap_block_iminrecs(int lev, struct xfs_btree_cur *cur);
-#define XFS_BMAP_BLOCK_IMINRECS(lev,cur)	xfs_bmap_block_iminrecs(lev,cur)
-#else
+	(((lev) == (cur)->bc_nlevels - 1 ? \
+			XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\
+				xfs_bmdr, (lev) == 0) : \
+			((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
 #define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \
-	((lev) == (cur)->bc_nlevels - 1 ? \
-		XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
-			xfs_bmbt, (lev) == 0) : \
-		((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_DADDR)
-xfs_bmbt_rec_t *
-xfs_bmap_rec_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_REC_DADDR(bb,i,cur)		xfs_bmap_rec_daddr(bb,i,cur)
-#else
-#define XFS_BMAP_REC_DADDR(bb,i,cur) \
-	XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_IADDR)
-xfs_bmbt_rec_t *
-xfs_bmap_rec_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_REC_IADDR(bb,i,cur)		xfs_bmap_rec_iaddr(bb,i,cur)
-#else
-#define XFS_BMAP_REC_IADDR(bb,i,cur) \
-	XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_DADDR)
-xfs_bmbt_key_t *
-xfs_bmap_key_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_KEY_DADDR(bb,i,cur)		xfs_bmap_key_daddr(bb,i,cur)
-#else
-#define XFS_BMAP_KEY_DADDR(bb,i,cur) \
-	XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_IADDR)
-xfs_bmbt_key_t *
-xfs_bmap_key_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_KEY_IADDR(bb,i,cur)		xfs_bmap_key_iaddr(bb,i,cur)
-#else
-#define XFS_BMAP_KEY_IADDR(bb,i,cur) \
-	XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_DADDR)
-xfs_bmbt_ptr_t *
-xfs_bmap_ptr_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_PTR_DADDR(bb,i,cur)		xfs_bmap_ptr_daddr(bb,i,cur)
-#else
-#define XFS_BMAP_PTR_DADDR(bb,i,cur) \
-	XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_IADDR)
-xfs_bmbt_ptr_t *
-xfs_bmap_ptr_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define XFS_BMAP_PTR_IADDR(bb,i,cur)		xfs_bmap_ptr_iaddr(bb,i,cur)
-#else
-#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
-	XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
-		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
-		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
-			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
-#endif
+	(((lev) == (cur)->bc_nlevels - 1 ? \
+			XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
+				xfs_bmbt, (lev) == 0) : \
+			((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
+
+#define XFS_BMAP_REC_DADDR(bb,i,cur)	\
+	(XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
+#define XFS_BMAP_REC_IADDR(bb,i,cur)	\
+	(XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
+
+#define XFS_BMAP_KEY_DADDR(bb,i,cur)	\
+	(XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
+#define XFS_BMAP_KEY_IADDR(bb,i,cur)	\
+	(XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
+
+#define XFS_BMAP_PTR_DADDR(bb,i,cur)	\
+	(XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
+#define XFS_BMAP_PTR_IADDR(bb,i,cur)	\
+	(XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE(			\
+			be16_to_cpu((bb)->bb_level), cur),		\
+			xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+				be16_to_cpu((bb)->bb_level), cur)))
 
 /*
  * These are to be used when we know the size of the block and
  * we don't have a cursor.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_REC_ADDR)
-xfs_bmbt_rec_t *xfs_bmap_broot_rec_addr(xfs_bmbt_block_t *bb, int i, int sz);
-#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz)	xfs_bmap_broot_rec_addr(bb,i,sz)
-#else
 #define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
-	XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_KEY_ADDR)
-xfs_bmbt_key_t *xfs_bmap_broot_key_addr(xfs_bmbt_block_t *bb, int i, int sz);
-#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz)	xfs_bmap_broot_key_addr(bb,i,sz)
-#else
+	(XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
 #define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
-	XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_PTR_ADDR)
-xfs_bmbt_ptr_t *xfs_bmap_broot_ptr_addr(xfs_bmbt_block_t *bb, int i, int sz);
-#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz)	xfs_bmap_broot_ptr_addr(bb,i,sz)
-#else
+	(XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
 #define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
-	XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
-#endif
+	(XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
+
+#define XFS_BMAP_BROOT_NUMRECS(bb)	be16_to_cpu((bb)->bb_numrecs)
+#define XFS_BMAP_BROOT_MAXRECS(sz)	XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_NUMRECS)
-int xfs_bmap_broot_numrecs(xfs_bmdr_block_t *bb);
-#define XFS_BMAP_BROOT_NUMRECS(bb)		xfs_bmap_broot_numrecs(bb)
-#else
-#define XFS_BMAP_BROOT_NUMRECS(bb) (INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_MAXRECS)
-int xfs_bmap_broot_maxrecs(int sz);
-#define XFS_BMAP_BROOT_MAXRECS(sz)		xfs_bmap_broot_maxrecs(sz)
-#else
-#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE_CALC)
-int xfs_bmap_broot_space_calc(int nrecs);
-#define XFS_BMAP_BROOT_SPACE_CALC(nrecs)	xfs_bmap_broot_space_calc(nrecs)
-#else
 #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
-	((int)(sizeof(xfs_bmbt_block_t) + \
-	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE)
-int xfs_bmap_broot_space(xfs_bmdr_block_t *bb);
-#define XFS_BMAP_BROOT_SPACE(bb)		xfs_bmap_broot_space(bb)
-#else
+	(int)(sizeof(xfs_bmbt_block_t) + \
+	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
+
 #define XFS_BMAP_BROOT_SPACE(bb) \
-	XFS_BMAP_BROOT_SPACE_CALC(INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMDR_SPACE_CALC)
-int xfs_bmdr_space_calc(int nrecs);
-#define XFS_BMDR_SPACE_CALC(nrecs)		xfs_bmdr_space_calc(nrecs)
-#else
-#define XFS_BMDR_SPACE_CALC(nrecs)	\
-	((int)(sizeof(xfs_bmdr_block_t) + \
-	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
-#endif
+	(XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
+#define XFS_BMDR_SPACE_CALC(nrecs) \
+	(int)(sizeof(xfs_bmdr_block_t) + \
+	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
 
 /*
  * Maximum number of bmap btree levels.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BM_MAXLEVELS)
-int xfs_bm_maxlevels(struct xfs_mount *mp, int w);
-#define XFS_BM_MAXLEVELS(mp,w)			xfs_bm_maxlevels(mp,w)
-#else
-#define XFS_BM_MAXLEVELS(mp,w)		((mp)->m_bm_maxlevels[w])
-#endif
+#define XFS_BM_MAXLEVELS(mp,w)		((mp)->m_bm_maxlevels[(w)])
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_SANITY_CHECK)
-int xfs_bmap_sanity_check(struct xfs_mount *mp, xfs_bmbt_block_t *bb,
-	int level);
-#define XFS_BMAP_SANITY_CHECK(mp,bb,level)	\
-	xfs_bmap_sanity_check(mp,bb,level)
-#else
-#define XFS_BMAP_SANITY_CHECK(mp,bb,level)	\
-	(INT_GET((bb)->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC && \
-	 INT_GET((bb)->bb_level, ARCH_CONVERT) == level && \
-	 INT_GET((bb)->bb_numrecs, ARCH_CONVERT) > 0 && \
-	 INT_GET((bb)->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0])
-#endif
+#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
+	(be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
+	 be16_to_cpu((bb)->bb_level) == level && \
+	 be16_to_cpu((bb)->bb_numrecs) > 0 && \
+	 be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
 
 
 #ifdef __KERNEL__
@@ -459,234 +301,84 @@ extern ktrace_t	*xfs_bmbt_trace_buf;
 /*
  * Prototypes for xfs_bmap.c to call.
  */
-
-void
-xfs_bmdr_to_bmbt(
-	xfs_bmdr_block_t *,
-	int,
-	xfs_bmbt_block_t *,
-	int);
-
-int
-xfs_bmbt_decrement(
-	struct xfs_btree_cur *,
-	int,
-	int *);
-
-int
-xfs_bmbt_delete(
-	struct xfs_btree_cur *,
-	int *);
-
-void
-xfs_bmbt_get_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s);
-
-xfs_bmbt_block_t *
-xfs_bmbt_get_block(
-	struct xfs_btree_cur	*cur,
-	int			level,
-	struct xfs_buf		**bpp);
-
-xfs_filblks_t
-xfs_bmbt_get_blockcount(
-	xfs_bmbt_rec_t	*r);
-
-xfs_fsblock_t
-xfs_bmbt_get_startblock(
-	xfs_bmbt_rec_t	*r);
-
-xfs_fileoff_t
-xfs_bmbt_get_startoff(
-	xfs_bmbt_rec_t	*r);
-
-xfs_exntst_t
-xfs_bmbt_get_state(
-	xfs_bmbt_rec_t	*r);
-
-#if __BYTE_ORDER != __BIG_ENDIAN
-void
-xfs_bmbt_disk_get_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s);
-
-xfs_exntst_t
-xfs_bmbt_disk_get_state(
-	xfs_bmbt_rec_t	*r);
-
-xfs_filblks_t
-xfs_bmbt_disk_get_blockcount(
-	xfs_bmbt_rec_t	*r);
-
-xfs_fsblock_t
-xfs_bmbt_disk_get_startblock(
-	xfs_bmbt_rec_t	*r);
-
-xfs_fileoff_t
-xfs_bmbt_disk_get_startoff(
-	xfs_bmbt_rec_t	*r);
-
-#else
-#define xfs_bmbt_disk_get_all(r, s) \
-	xfs_bmbt_get_all(r, s)
-#define xfs_bmbt_disk_get_state(r) \
-	xfs_bmbt_get_state(r)
-#define xfs_bmbt_disk_get_blockcount(r) \
-	xfs_bmbt_get_blockcount(r)
-#define xfs_bmbt_disk_get_startblock(r) \
-	xfs_bmbt_get_blockcount(r)
-#define xfs_bmbt_disk_get_startoff(r) \
-	xfs_bmbt_get_startoff(r)
-#endif
-
-int
-xfs_bmbt_increment(
-	struct xfs_btree_cur *,
-	int,
-	int *);
-
-int
-xfs_bmbt_insert(
-	struct xfs_btree_cur *,
-	int *);
-
-void
-xfs_bmbt_log_block(
-	struct xfs_btree_cur *,
-	struct xfs_buf *,
-	int);
-
-void
-xfs_bmbt_log_recs(
-	struct xfs_btree_cur *,
-	struct xfs_buf *,
-	int,
-	int);
-
-int
-xfs_bmbt_lookup_eq(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t,
-	xfs_fsblock_t,
-	xfs_filblks_t,
-	int *);
-
-int
-xfs_bmbt_lookup_ge(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t,
-	xfs_fsblock_t,
-	xfs_filblks_t,
-	int *);
+extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int);
+extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *);
+extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
+extern void xfs_bmbt_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
+extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
+						int, struct xfs_buf **bpp);
+extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_t *r);
+extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_t *r);
+extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_t *r);
+extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_t *r);
+
+#ifndef XFS_NATIVE_HOST
+extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
+extern xfs_exntst_t xfs_bmbt_disk_get_state(xfs_bmbt_rec_t *r);
+extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
+extern xfs_fsblock_t xfs_bmbt_disk_get_startblock(xfs_bmbt_rec_t *r);
+extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
+#else
+#define xfs_bmbt_disk_get_all(r, s)	xfs_bmbt_get_all(r, s)
+#define xfs_bmbt_disk_get_state(r)	xfs_bmbt_get_state(r)
+#define xfs_bmbt_disk_get_blockcount(r)	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startblock(r)	xfs_bmbt_get_blockcount(r)
+#define xfs_bmbt_disk_get_startoff(r)	xfs_bmbt_get_startoff(r)
+#endif /* XFS_NATIVE_HOST */
+
+extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
+extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
+extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
+extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
+				int);
+extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
+				xfs_fsblock_t, xfs_filblks_t, int *);
+extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
+				xfs_fsblock_t, xfs_filblks_t, int *);
 
 /*
  * Give the bmap btree a new root block.  Copy the old broot contents
  * down into a real block and make the broot point to it.
  */
-int						/* error */
-xfs_bmbt_newroot(
-	struct xfs_btree_cur	*cur,		/* btree cursor */
-	int			*logflags,	/* logging flags for inode */
-	int			*stat);		/* return status - 0 fail */
-
-void
-xfs_bmbt_set_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s);
-
-void
-xfs_bmbt_set_allf(
-	xfs_bmbt_rec_t	*r,
-	xfs_fileoff_t	o,
-	xfs_fsblock_t	b,
-	xfs_filblks_t	c,
-	xfs_exntst_t	v);
-
-void
-xfs_bmbt_set_blockcount(
-	xfs_bmbt_rec_t	*r,
-	xfs_filblks_t	v);
-
-void
-xfs_bmbt_set_startblock(
-	xfs_bmbt_rec_t	*r,
-	xfs_fsblock_t	v);
-
-void
-xfs_bmbt_set_startoff(
-	xfs_bmbt_rec_t	*r,
-	xfs_fileoff_t	v);
-
-void
-xfs_bmbt_set_state(
-	xfs_bmbt_rec_t	*r,
-	xfs_exntst_t	v);
-
-#if __BYTE_ORDER != __BIG_ENDIAN
-void
-xfs_bmbt_disk_set_all(
-	xfs_bmbt_rec_t	*r,
-	xfs_bmbt_irec_t *s);
-
-void
-xfs_bmbt_disk_set_allf(
-	xfs_bmbt_rec_t	*r,
-	xfs_fileoff_t	o,
-	xfs_fsblock_t	b,
-	xfs_filblks_t	c,
-	xfs_exntst_t	v);
+extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
+
+extern void xfs_bmbt_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
+extern void xfs_bmbt_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
+			xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
+extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_t *r, xfs_filblks_t v);
+extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_t *r, xfs_fsblock_t v);
+extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_t *r, xfs_fileoff_t v);
+extern void xfs_bmbt_set_state(xfs_bmbt_rec_t *r, xfs_exntst_t v);
+
+#ifndef XFS_NATIVE_HOST
+extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
+extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
+			xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
 #else
-#define xfs_bmbt_disk_set_all(r, s) \
-	xfs_bmbt_set_all(r, s)
-#define xfs_bmbt_disk_set_allf(r, o, b, c, v) \
-	xfs_bmbt_set_allf(r, o, b, c, v)
-#endif
+#define xfs_bmbt_disk_set_all(r, s)		xfs_bmbt_set_all(r, s)
+#define xfs_bmbt_disk_set_allf(r, o, b, c, v)	xfs_bmbt_set_allf(r, o, b, c, v)
+#endif /* XFS_NATIVE_HOST */
 
-void
-xfs_bmbt_to_bmdr(
-	xfs_bmbt_block_t *,
-	int,
-	xfs_bmdr_block_t *,
-	int);
-
-int
-xfs_bmbt_update(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t,
-	xfs_fsblock_t,
-	xfs_filblks_t,
-	xfs_exntst_t);
+extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int);
+extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t,
+				xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t);
 
 #ifdef DEBUG
 /*
  * Get the data from the pointed-to record.
  */
-int
-xfs_bmbt_get_rec(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t *,
-	xfs_fsblock_t *,
-	xfs_filblks_t *,
-	xfs_exntst_t *,
-	int *);
+extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
+				xfs_fsblock_t *, xfs_filblks_t *,
+				xfs_exntst_t *, int *);
 #endif
 
-
 /*
  * Search an extent list for the extent which includes block
  * bno.
  */
-xfs_bmbt_rec_t *
-xfs_bmap_do_search_extents(
-	xfs_bmbt_rec_t *,
-	xfs_extnum_t,
-	xfs_extnum_t,
-	xfs_fileoff_t,
-	int *,
-	xfs_extnum_t *,
-	xfs_bmbt_irec_t *,
-	xfs_bmbt_irec_t *);
+xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
+			xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
+			xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 0cc63d657a1..52d5d095fc3 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -1,45 +1,26 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * This file contains common code for the space manager's btree implementations.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -47,17 +28,16 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_bit.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_error.h"
 
 /*
@@ -110,11 +90,14 @@ xfs_btree_maxrecs(
 	switch (cur->bc_btnum) {
 	case XFS_BTNUM_BNO:
 	case XFS_BTNUM_CNT:
-		return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+		return (int)XFS_ALLOC_BLOCK_MAXRECS(
+				be16_to_cpu(block->bb_h.bb_level), cur);
 	case XFS_BTNUM_BMAP:
-		return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+		return (int)XFS_BMAP_BLOCK_IMAXRECS(
+				be16_to_cpu(block->bb_h.bb_level), cur);
 	case XFS_BTNUM_INO:
-		return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+		return (int)XFS_INOBT_BLOCK_MAXRECS(
+				be16_to_cpu(block->bb_h.bb_level), cur);
 	default:
 		ASSERT(0);
 		return 0;
@@ -160,7 +143,7 @@ xfs_btree_check_key(
 
 		k1 = ak1;
 		k2 = ak2;
-		ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT));
+		ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
 		break;
 	    }
 	case XFS_BTNUM_CNT: {
@@ -169,9 +152,9 @@ xfs_btree_check_key(
 
 		k1 = ak1;
 		k2 = ak2;
-		ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) ||
-		       (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) &&
-			INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)));
+		ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
+		       (k1->ar_blockcount == k2->ar_blockcount &&
+			be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
 		break;
 	    }
 	case XFS_BTNUM_BMAP: {
@@ -214,16 +197,16 @@ xfs_btree_check_lblock(
 
 	mp = cur->bc_mp;
 	lblock_ok =
-		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
-		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
-		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+		be16_to_cpu(block->bb_level) == level &&
+		be16_to_cpu(block->bb_numrecs) <=
 			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
 		block->bb_leftsib &&
-		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO ||
-		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) &&
+		(be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
+		 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
 		block->bb_rightsib &&
-		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO ||
-		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT)));
+		(be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
+		 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
 			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
 		if (bp)
@@ -271,8 +254,9 @@ xfs_btree_check_rec(
 
 		r1 = ar1;
 		r2 = ar2;
-		ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <=
-		       INT_GET(r2->ar_startblock, ARCH_CONVERT));
+		ASSERT(be32_to_cpu(r1->ar_startblock) +
+		       be32_to_cpu(r1->ar_blockcount) <=
+		       be32_to_cpu(r2->ar_startblock));
 		break;
 	    }
 	case XFS_BTNUM_CNT: {
@@ -281,9 +265,9 @@ xfs_btree_check_rec(
 
 		r1 = ar1;
 		r2 = ar2;
-		ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) ||
-		       (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) &&
-			INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT)));
+		ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
+		       (r1->ar_blockcount == r2->ar_blockcount &&
+			be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
 		break;
 	    }
 	case XFS_BTNUM_BMAP: {
@@ -331,17 +315,17 @@ xfs_btree_check_sblock(
 
 	agbp = cur->bc_private.a.agbp;
 	agf = XFS_BUF_TO_AGF(agbp);
-	agflen = INT_GET(agf->agf_length, ARCH_CONVERT);
+	agflen = be32_to_cpu(agf->agf_length);
 	sblock_ok =
-		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
-		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
-		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+		be16_to_cpu(block->bb_level) == level &&
+		be16_to_cpu(block->bb_numrecs) <=
 			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
-		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK ||
-		 INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) &&
+		(be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
+		 be32_to_cpu(block->bb_leftsib) < agflen) &&
 		block->bb_leftsib &&
-		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK ||
-		 INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) &&
+		(be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
+		 be32_to_cpu(block->bb_rightsib) < agflen) &&
 		block->bb_rightsib;
 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
 			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
@@ -372,7 +356,7 @@ xfs_btree_check_sptr(
 	XFS_WANT_CORRUPTED_RETURN(
 		level > 0 &&
 		ptr != NULLAGBLOCK && ptr != 0 &&
-		ptr < INT_GET(agf->agf_length, ARCH_CONVERT));
+		ptr < be32_to_cpu(agf->agf_length));
 	return 0;
 }
 
@@ -611,15 +595,15 @@ xfs_btree_init_cursor(
 	case XFS_BTNUM_BNO:
 	case XFS_BTNUM_CNT:
 		agf = XFS_BUF_TO_AGF(agbp);
-		nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT);
+		nlevels = be32_to_cpu(agf->agf_levels[btnum]);
 		break;
 	case XFS_BTNUM_BMAP:
 		ifp = XFS_IFORK_PTR(ip, whichfork);
-		nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+		nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
 		break;
 	case XFS_BTNUM_INO:
 		agi = XFS_BUF_TO_AGI(agbp);
-		nlevels = INT_GET(agi->agi_level, ARCH_CONVERT);
+		nlevels = be32_to_cpu(agi->agi_level);
 		break;
 	default:
 		ASSERT(0);
@@ -683,9 +667,9 @@ xfs_btree_islastblock(
 	block = xfs_btree_get_block(cur, level, &bp);
 	xfs_btree_check_block(cur, block, level, bp);
 	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
-		return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO;
+		return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
 	else
-		return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK;
+		return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
 }
 
 /*
@@ -713,7 +697,7 @@ xfs_btree_lastrec(
 	/*
 	 * Set the ptr value to numrecs, that's the last record/key.
 	 */
-	cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT);
+	cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
 	return 1;
 }
 
@@ -883,38 +867,38 @@ xfs_btree_readahead_core(
 	case XFS_BTNUM_BNO:
 	case XFS_BTNUM_CNT:
 		a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
-		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-				INT_GET(a->bb_leftsib, ARCH_CONVERT), 1);
+				be32_to_cpu(a->bb_leftsib), 1);
 			rval++;
 		}
-		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-				INT_GET(a->bb_rightsib, ARCH_CONVERT), 1);
+				be32_to_cpu(a->bb_rightsib), 1);
 			rval++;
 		}
 		break;
 	case XFS_BTNUM_BMAP:
 		b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
-		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) {
-			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1);
+		if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
+			xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
 			rval++;
 		}
-		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
-			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1);
+		if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
+			xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
 			rval++;
 		}
 		break;
 	case XFS_BTNUM_INO:
 		i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
-		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
-				INT_GET(i->bb_leftsib, ARCH_CONVERT), 1);
+				be32_to_cpu(i->bb_leftsib), 1);
 			rval++;
 		}
-		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
-				INT_GET(i->bb_rightsib, ARCH_CONVERT), 1);
+				be32_to_cpu(i->bb_rightsib), 1);
 			rval++;
 		}
 		break;
@@ -946,14 +930,14 @@ xfs_btree_setbuf(
 		return;
 	b = XFS_BUF_TO_BLOCK(bp);
 	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
-		if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO)
+		if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-		if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO)
+		if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 	} else {
-		if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK)
+		if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-		if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK)
+		if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 	}
 }
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 09b4e1532a3..44f1bd98064 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_BTREE_H__
 #define	__XFS_BTREE_H__
@@ -53,25 +39,23 @@ struct xfs_trans;
 /*
  * Short form header: space allocation btrees.
  */
-typedef struct xfs_btree_sblock
-{
-	__uint32_t	bb_magic;	/* magic number for block type */
-	__uint16_t	bb_level;	/* 0 is a leaf */
-	__uint16_t	bb_numrecs;	/* current # of data records */
-	xfs_agblock_t	bb_leftsib;	/* left sibling block or NULLAGBLOCK */
-	xfs_agblock_t	bb_rightsib;	/* right sibling block or NULLAGBLOCK */
+typedef struct xfs_btree_sblock {
+	__be32		bb_magic;	/* magic number for block type */
+	__be16		bb_level;	/* 0 is a leaf */
+	__be16		bb_numrecs;	/* current # of data records */
+	__be32		bb_leftsib;	/* left sibling block or NULLAGBLOCK */
+	__be32		bb_rightsib;	/* right sibling block or NULLAGBLOCK */
 } xfs_btree_sblock_t;
 
 /*
  * Long form header: bmap btrees.
  */
-typedef struct xfs_btree_lblock
-{
-	__uint32_t	bb_magic;	/* magic number for block type */
-	__uint16_t	bb_level;	/* 0 is a leaf */
-	__uint16_t	bb_numrecs;	/* current # of data records */
-	xfs_dfsbno_t	bb_leftsib;	/* left sibling block or NULLDFSBNO */
-	xfs_dfsbno_t	bb_rightsib;	/* right sibling block or NULLDFSBNO */
+typedef struct xfs_btree_lblock {
+	__be32		bb_magic;	/* magic number for block type */
+	__be16		bb_level;	/* 0 is a leaf */
+	__be16		bb_numrecs;	/* current # of data records */
+	__be64		bb_leftsib;	/* left sibling block or NULLDFSBNO */
+	__be64		bb_rightsib;	/* right sibling block or NULLDFSBNO */
 } xfs_btree_lblock_t;
 
 /*
@@ -79,24 +63,23 @@ typedef struct xfs_btree_lblock
  */
 typedef struct xfs_btree_hdr
 {
-	__uint32_t	bb_magic;	/* magic number for block type */
-	__uint16_t	bb_level;	/* 0 is a leaf */
-	__uint16_t	bb_numrecs;	/* current # of data records */
+	__be32		bb_magic;	/* magic number for block type */
+	__be16		bb_level;	/* 0 is a leaf */
+	__be16		bb_numrecs;	/* current # of data records */
 } xfs_btree_hdr_t;
 
-typedef struct xfs_btree_block
-{
+typedef struct xfs_btree_block {
 	xfs_btree_hdr_t	bb_h;		/* header */
-	union		{
-		struct	{
-			xfs_agblock_t	bb_leftsib;
-			xfs_agblock_t	bb_rightsib;
-		}	s;		/* short form pointers */
+	union {
+		struct {
+			__be32		bb_leftsib;
+			__be32		bb_rightsib;
+		} s;			/* short form pointers */
 		struct	{
-			xfs_dfsbno_t	bb_leftsib;
-			xfs_dfsbno_t	bb_rightsib;
-		}	l;		/* long form pointers */
-	}		bb_u;		/* rest */
+			__be64		bb_leftsib;
+			__be64		bb_rightsib;
+		} l;			/* long form pointers */
+	} bb_u;				/* rest */
 } xfs_btree_block_t;
 
 /*
@@ -113,12 +96,7 @@ typedef struct xfs_btree_block
 /*
  * Boolean to select which form of xfs_btree_block_t.bb_u to use.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BTREE_LONG_PTRS)
-int xfs_btree_long_ptrs(xfs_btnum_t btnum);
-#define	XFS_BTREE_LONG_PTRS(btnum)	((btnum) == XFS_BTNUM_BMAP)
-#else
 #define	XFS_BTREE_LONG_PTRS(btnum)	((btnum) == XFS_BTNUM_BMAP)
-#endif
 
 /*
  * Magic numbers for btree blocks.
@@ -165,7 +143,7 @@ typedef struct xfs_btree_cur
 	struct xfs_trans	*bc_tp;	/* transaction we're in, if any */
 	struct xfs_mount	*bc_mp;	/* file system mount struct */
 	union {
-		xfs_alloc_rec_t		a;
+		xfs_alloc_rec_incore_t	a;
 		xfs_bmbt_irec_t		b;
 		xfs_inobt_rec_t		i;
 	}		bc_rec;		/* current insert/search record value */
@@ -205,24 +183,10 @@ typedef struct xfs_btree_cur
 /*
  * Convert from buffer to btree block header.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BLOCK)
-xfs_btree_block_t *xfs_buf_to_block(struct xfs_buf *bp);
-#define	XFS_BUF_TO_BLOCK(bp)	xfs_buf_to_block(bp)
-#else
-#define	XFS_BUF_TO_BLOCK(bp)	((xfs_btree_block_t *)(XFS_BUF_PTR(bp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_LBLOCK)
-xfs_btree_lblock_t *xfs_buf_to_lblock(struct xfs_buf *bp);
-#define	XFS_BUF_TO_LBLOCK(bp)	xfs_buf_to_lblock(bp)
-#else
-#define	XFS_BUF_TO_LBLOCK(bp)	((xfs_btree_lblock_t *)(XFS_BUF_PTR(bp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBLOCK)
-xfs_btree_sblock_t *xfs_buf_to_sblock(struct xfs_buf *bp);
-#define	XFS_BUF_TO_SBLOCK(bp)	xfs_buf_to_sblock(bp)
-#else
-#define	XFS_BUF_TO_SBLOCK(bp)	((xfs_btree_sblock_t *)(XFS_BUF_PTR(bp)))
-#endif
+#define	XFS_BUF_TO_BLOCK(bp)	((xfs_btree_block_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_LBLOCK(bp)	((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
+#define	XFS_BUF_TO_SBLOCK(bp)	((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
+
 
 #ifdef __KERNEL__
 
@@ -477,106 +441,33 @@ xfs_btree_setbuf(
 /*
  * Min and max functions for extlen, agblock, fileoff, and filblks types.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MIN)
-xfs_extlen_t xfs_extlen_min(xfs_extlen_t a, xfs_extlen_t b);
-#define	XFS_EXTLEN_MIN(a,b)	xfs_extlen_min(a,b)
-#else
 #define	XFS_EXTLEN_MIN(a,b)	\
 	((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
-	 (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MAX)
-xfs_extlen_t xfs_extlen_max(xfs_extlen_t a, xfs_extlen_t b);
-#define	XFS_EXTLEN_MAX(a,b)	xfs_extlen_max(a,b)
-#else
+		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
 #define	XFS_EXTLEN_MAX(a,b)	\
 	((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
-	 (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MIN)
-xfs_agblock_t xfs_agblock_min(xfs_agblock_t a, xfs_agblock_t b);
-#define	XFS_AGBLOCK_MIN(a,b)	xfs_agblock_min(a,b)
-#else
+		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
 #define	XFS_AGBLOCK_MIN(a,b)	\
 	((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
-	 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MAX)
-xfs_agblock_t xfs_agblock_max(xfs_agblock_t a, xfs_agblock_t b);
-#define	XFS_AGBLOCK_MAX(a,b)	xfs_agblock_max(a,b)
-#else
+		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
 #define	XFS_AGBLOCK_MAX(a,b)	\
 	((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
-	 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MIN)
-xfs_fileoff_t xfs_fileoff_min(xfs_fileoff_t a, xfs_fileoff_t b);
-#define	XFS_FILEOFF_MIN(a,b)	xfs_fileoff_min(a,b)
-#else
+		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
 #define	XFS_FILEOFF_MIN(a,b)	\
 	((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
-	 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MAX)
-xfs_fileoff_t xfs_fileoff_max(xfs_fileoff_t a, xfs_fileoff_t b);
-#define	XFS_FILEOFF_MAX(a,b)	xfs_fileoff_max(a,b)
-#else
+		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
 #define	XFS_FILEOFF_MAX(a,b)	\
 	((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
-	 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MIN)
-xfs_filblks_t xfs_filblks_min(xfs_filblks_t a, xfs_filblks_t b);
-#define	XFS_FILBLKS_MIN(a,b)	xfs_filblks_min(a,b)
-#else
+		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
 #define	XFS_FILBLKS_MIN(a,b)	\
 	((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
-	 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MAX)
-xfs_filblks_t xfs_filblks_max(xfs_filblks_t a, xfs_filblks_t b);
-#define	XFS_FILBLKS_MAX(a,b)	xfs_filblks_max(a,b)
-#else
+		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
 #define	XFS_FILBLKS_MAX(a,b)	\
 	((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
-	 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_SANITY_CHECK)
-int xfs_fsb_sanity_check(struct xfs_mount *mp, xfs_fsblock_t fsb);
-#define	XFS_FSB_SANITY_CHECK(mp,fsb)	xfs_fsb_sanity_check(mp,fsb)
-#else
+		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+
 #define	XFS_FSB_SANITY_CHECK(mp,fsb)	\
 	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
-	 XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
-#endif
-
-/*
- * Macros to set EFSCORRUPTED & return/branch.
- */
-#define	XFS_WANT_CORRUPTED_GOTO(x,l)	\
-	{ \
-		int fs_is_ok = (x); \
-		ASSERT(fs_is_ok); \
-		if (unlikely(!fs_is_ok)) { \
-			XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
-					 XFS_ERRLEVEL_LOW, NULL); \
-			error = XFS_ERROR(EFSCORRUPTED); \
-			goto l; \
-		} \
-	}
-
-#define	XFS_WANT_CORRUPTED_RETURN(x)	\
-	{ \
-		int fs_is_ok = (x); \
-		ASSERT(fs_is_ok); \
-		if (unlikely(!fs_is_ok)) { \
-			XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
-					 XFS_ERRLEVEL_LOW, NULL); \
-			return XFS_ERROR(EFSCORRUPTED); \
-		} \
-	}
+		XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
 
 #endif	/* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 30b8285ad47..07e2324152b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1,57 +1,33 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * This file contains the implementation of the xfs_buf_log_item.
- * It contains the item operations used to manipulate the buf log
- * items as well as utility routines used by the buffer specific
- * transaction routines.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
-#include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
-#include "xfs_rw.h"
-#include "xfs_bit.h"
 #include "xfs_error.h"
 
 
@@ -274,6 +250,7 @@ xfs_buf_item_format(
 		       ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
 	vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
 	vecp->i_len = base_size;
+	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT);
 	vecp++;
 	nvecs = 1;
 
@@ -320,12 +297,14 @@ xfs_buf_item_format(
 			buffer_offset = first_bit * XFS_BLI_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLI_CHUNK;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 			nvecs++;
 			break;
 		} else if (next_bit != last_bit + 1) {
 			buffer_offset = first_bit * XFS_BLI_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLI_CHUNK;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 			nvecs++;
 			vecp++;
 			first_bit = next_bit;
@@ -337,6 +316,7 @@ xfs_buf_item_format(
 			buffer_offset = first_bit * XFS_BLI_CHUNK;
 			vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 			vecp->i_len = nbits * XFS_BLI_CHUNK;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 /* You would think we need to bump the nvecs here too, but we do not
  * this number is used by recovery, and it gets confused by the boundary
  * split here
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 01aed5f2d57..07c708c2b52 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_BUF_ITEM_H__
 #define	__XFS_BUF_ITEM_H__
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
index 2deac730375..433ec537f9b 100644
--- a/fs/xfs/xfs_cap.h
+++ b/fs/xfs/xfs_cap.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_CAP_H__
 #define __XFS_CAP_H__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index b3215ffe0be..328a528b926 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_CLNT_H__
 #define __XFS_CLNT_H__
@@ -55,6 +41,7 @@
  */
 struct xfs_mount_args {
 	int	flags;		/* flags -> see XFSMNT_... macros below */
+	int	flags2;		/* flags -> see XFSMNT2_... macros below */
 	int	logbufs;	/* Number of log buffers, -1 to default */
 	int	logbufsize;	/* Size of log buffers, -1 to default */
 	char	fsname[MAXNAMELEN+1];	/* data device name */
@@ -68,9 +55,9 @@ struct xfs_mount_args {
 };
 
 /*
- * XFS mount option flags
+ * XFS mount option flags -- args->flags1
  */
-#define	XFSMNT_CHKLOG		0x00000001	/* check log */
+#define	XFSMNT_COMPAT_ATTR	0x00000001	/* do not use ATTR2 format */
 #define	XFSMNT_WSYNC		0x00000002	/* safe mode nfs mount
 						 * compatible */
 #define	XFSMNT_INO64		0x00000004	/* move inode numbers up
@@ -91,7 +78,7 @@ struct xfs_mount_args {
 #define XFSMNT_SHARED		0x00001000	/* shared XFS mount */
 #define XFSMNT_IOSIZE		0x00002000	/* optimize for I/O size */
 #define XFSMNT_OSYNCISOSYNC	0x00004000	/* o_sync is REALLY o_sync */
-						/* (osyncisdsync is now default) */
+						/* (osyncisdsync is default) */
 #define XFSMNT_32BITINODES	0x00200000	/* restrict inodes to 32
 						 * bits of address space */
 #define XFSMNT_GQUOTA		0x00400000	/* group quota accounting */
@@ -99,12 +86,19 @@ struct xfs_mount_args {
 						 * enforcement */
 #define XFSMNT_NOUUID		0x01000000	/* Ignore fs uuid */
 #define XFSMNT_DMAPI		0x02000000	/* enable dmapi/xdsm */
-#define XFSMNT_NOLOGFLUSH	0x04000000	/* Don't flush for log blocks */
+#define XFSMNT_BARRIER		0x04000000	/* use write barriers */
 #define XFSMNT_IDELETE		0x08000000	/* inode cluster delete */
 #define XFSMNT_SWALLOC		0x10000000	/* turn on stripe width
 						 * allocation */
 #define XFSMNT_IHASHSIZE	0x20000000	/* inode hash table size */
 #define XFSMNT_DIRSYNC		0x40000000	/* sync creat,link,unlink,rename
 						 * symlink,mkdir,rmdir,mknod */
+#define XFSMNT_FLAGS2		0x80000000	/* more flags set in flags2 */
+
+/*
+ * XFS mount option flags -- args->flags2
+ */
+#define XFSMNT2_COMPAT_IOSIZE	0x00000001	/* don't report large preferred
+						 * I/O size in stat(2) */
 
 #endif	/* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 8b792ddf216..473671fa5c1 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1,41 +1,26 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,19 +28,19 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_dir_leaf.h"
@@ -64,7 +49,6 @@
 #include "xfs_dir2_block.h"
 #include "xfs_dir2_node.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 
 /*
  * xfs_da_btree.c
@@ -190,9 +174,6 @@ xfs_da_split(xfs_da_state_t *state)
 		 */
 		switch (oldblk->magic) {
 		case XFS_ATTR_LEAF_MAGIC:
-#ifndef __KERNEL__
-			return(ENOTTY);
-#else
 			error = xfs_attr_leaf_split(state, oldblk, newblk);
 			if ((error != 0) && (error != ENOSPC)) {
 				return(error);	/* GROT: attr is inconsistent */
@@ -218,7 +199,6 @@ xfs_da_split(xfs_da_state_t *state)
 				return(error);	/* GROT: attr inconsistent */
 			addblk = newblk;
 			break;
-#endif
 		case XFS_DIR_LEAF_MAGIC:
 			ASSERT(XFS_DIR_IS_V1(state->mp));
 			error = xfs_dir_leaf_split(state, oldblk, newblk);
@@ -449,7 +429,8 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	/*
 	 * With V2 the extra block is data or freespace.
 	 */
-	useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
+	useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) ||
+			state->args->whichfork == XFS_ATTR_FORK);
 	newcount = 1 + useextra;
 	/*
 	 * Do we have to split the node?
@@ -706,18 +687,12 @@ xfs_da_join(xfs_da_state_t *state)
 		 */
 		switch (drop_blk->magic) {
 		case XFS_ATTR_LEAF_MAGIC:
-#ifndef __KERNEL__
-			error = ENOTTY;
-#else
 			error = xfs_attr_leaf_toosmall(state, &action);
-#endif
 			if (error)
 				return(error);
 			if (action == 0)
 				return(0);
-#ifdef __KERNEL__
 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
-#endif
 			break;
 		case XFS_DIR_LEAF_MAGIC:
 			ASSERT(XFS_DIR_IS_V1(state->mp));
@@ -973,13 +948,11 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 	level = path->active-1;
 	blk = &path->blk[ level ];
 	switch (blk->magic) {
-#ifdef __KERNEL__
 	case XFS_ATTR_LEAF_MAGIC:
 		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
 		if (count == 0)
 			return;
 		break;
-#endif
 	case XFS_DIR_LEAF_MAGIC:
 		ASSERT(XFS_DIR_IS_V1(state->mp));
 		lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
@@ -1220,12 +1193,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 				blkno = INT_GET(btree->before, ARCH_CONVERT);
 			}
 		}
-#ifdef __KERNEL__
 		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
-#endif
 		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
 			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
 			break;
@@ -1252,13 +1223,11 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
 							&blk->index, state);
 		}
-#ifdef __KERNEL__
 		else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
 			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
 			blk->index = args->index;
 			args->blkno = blk->blkno;
 		}
-#endif
 		if (((retval == ENOENT) || (retval == ENOATTR)) &&
 		    (blk->hashval == args->hashval)) {
 			error = xfs_da_path_shift(state, &state->path, 1, 1,
@@ -1268,12 +1237,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 			if (retval == 0) {
 				continue;
 			}
-#ifdef __KERNEL__
 			else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
 				/* path_shift() gives ENOENT */
 				retval = XFS_ERROR(ENOATTR);
 			}
-#endif
 		}
 		break;
 	}
@@ -1312,11 +1279,9 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	ASSERT(old_blk->magic == new_blk->magic);
 
 	switch (old_blk->magic) {
-#ifdef __KERNEL__
 	case XFS_ATTR_LEAF_MAGIC:
 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
 		break;
-#endif
 	case XFS_DIR_LEAF_MAGIC:
 		ASSERT(XFS_DIR_IS_V1(state->mp));
 		before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
@@ -1587,12 +1552,10 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 			ASSERT(level == path->active-1);
 			blk->index = 0;
 			switch(blk->magic) {
-#ifdef __KERNEL__
 			case XFS_ATTR_LEAF_MAGIC:
 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
 								      NULL);
 				break;
-#endif
 			case XFS_DIR_LEAF_MAGIC:
 				ASSERT(XFS_DIR_IS_V1(state->mp));
 				blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
@@ -1626,19 +1589,10 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
  * This is implemented with some source-level loop unrolling.
  */
 xfs_dahash_t
-xfs_da_hashname(uchar_t *name, int namelen)
+xfs_da_hashname(const uchar_t *name, int namelen)
 {
 	xfs_dahash_t hash;
 
-#ifdef SLOWVERSION
-	/*
-	 * This is the old one-byte-at-a-time version.
-	 */
-	for (hash = 0; namelen > 0; namelen--)
-		hash = *name++ ^ rol32(hash, 7);
-
-	return(hash);
-#else
 	/*
 	 * Do four characters at a time as long as we can.
 	 */
@@ -1657,12 +1611,9 @@ xfs_da_hashname(uchar_t *name, int namelen)
 		return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
 	case 1:
 		return (name[0] << 0) ^ rol32(hash, 7 * 1);
-	case 0:
+	default: /* case 0: */
 		return hash;
 	}
-	/* NOTREACHED */
-#endif
-	return 0; /* keep gcc happy */
 }
 
 /*
@@ -2200,20 +2151,16 @@ xfs_da_do_buf(
 			error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
 			break;
 		case 1:
-#ifndef __KERNEL__
 		case 2:
-#endif
 			bp = NULL;
 			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
 				mappedbno, nmapped, 0, &bp);
 			break;
-#ifdef __KERNEL__
 		case 3:
 			xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
 			error = 0;
 			bp = NULL;
 			break;
-#endif
 		}
 		if (error) {
 			if (bp)
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 3a9b9e809c6..41352113721 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DA_BTREE_H__
 #define	__XFS_DA_BTREE_H__
@@ -92,72 +78,24 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
 
 #define XFS_DA_MAXHASH	((xfs_dahash_t)-1) /* largest valid hash value */
 
-/*
- * Macros used by directory code to interface to the filesystem.
- */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBSIZE)
-int xfs_lbsize(struct xfs_mount *mp);
-#define	XFS_LBSIZE(mp)			xfs_lbsize(mp)
-#else
-#define	XFS_LBSIZE(mp)	((mp)->m_sb.sb_blocksize)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBLOG)
-int xfs_lblog(struct xfs_mount *mp);
-#define	XFS_LBLOG(mp)			xfs_lblog(mp)
-#else
-#define	XFS_LBLOG(mp)	((mp)->m_sb.sb_blocklog)
-#endif
+#define	XFS_LBSIZE(mp)	(mp)->m_sb.sb_blocksize
+#define	XFS_LBLOG(mp)	(mp)->m_sb.sb_blocklog
 
-/*
- * Macros used by directory code to interface to the kernel
- */
-
-/*
- * Macros used to manipulate directory off_t's
- */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_BNOENTRY)
-__uint32_t xfs_da_make_bnoentry(struct xfs_mount *mp, xfs_dablk_t bno,
-				int entry);
 #define	XFS_DA_MAKE_BNOENTRY(mp,bno,entry)	\
-	xfs_da_make_bnoentry(mp,bno,entry)
-#else
-#define	XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
 	(((bno) << (mp)->m_dircook_elog) | (entry))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_COOKIE)
-xfs_off_t xfs_da_make_cookie(struct xfs_mount *mp, xfs_dablk_t bno, int entry,
-				xfs_dahash_t hash);
 #define	XFS_DA_MAKE_COOKIE(mp,bno,entry,hash)	\
-	xfs_da_make_cookie(mp,bno,entry,hash)
-#else
-#define	XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
 	(((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_HASH)
-xfs_dahash_t xfs_da_cookie_hash(struct xfs_mount *mp, xfs_off_t cookie);
-#define	XFS_DA_COOKIE_HASH(mp,cookie)		xfs_da_cookie_hash(mp,cookie)
-#else
-#define	XFS_DA_COOKIE_HASH(mp,cookie)	((xfs_dahash_t)(cookie))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_BNO)
-xfs_dablk_t xfs_da_cookie_bno(struct xfs_mount *mp, xfs_off_t cookie);
-#define	XFS_DA_COOKIE_BNO(mp,cookie)		xfs_da_cookie_bno(mp,cookie)
-#else
-#define	XFS_DA_COOKIE_BNO(mp,cookie) \
-	(((xfs_off_t)(cookie) >> 31) == -1LL ? \
+#define	XFS_DA_COOKIE_HASH(mp,cookie)		((xfs_dahash_t)cookie)
+#define	XFS_DA_COOKIE_BNO(mp,cookie)		\
+	((((xfs_off_t)(cookie) >> 31) == -1LL ? \
 		(xfs_dablk_t)0 : \
-		(xfs_dablk_t)((xfs_off_t)(cookie) >> ((mp)->m_dircook_elog + 32)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_ENTRY)
-int xfs_da_cookie_entry(struct xfs_mount *mp, xfs_off_t cookie);
-#define	XFS_DA_COOKIE_ENTRY(mp,cookie)		xfs_da_cookie_entry(mp,cookie)
-#else
-#define	XFS_DA_COOKIE_ENTRY(mp,cookie) \
-	(((xfs_off_t)(cookie) >> 31) == -1LL ? \
+		(xfs_dablk_t)((xfs_off_t)(cookie) >> \
+				((mp)->m_dircook_elog + 32))))
+#define	XFS_DA_COOKIE_ENTRY(mp,cookie)		\
+	((((xfs_off_t)(cookie) >> 31) == -1LL ?	\
 		(xfs_dablk_t)0 : \
 		(xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
-			      ((1 << (mp)->m_dircook_elog) - 1)))
-#endif
+				((1 << (mp)->m_dircook_elog) - 1))))
 
 
 /*========================================================================
@@ -168,7 +106,7 @@ int xfs_da_cookie_entry(struct xfs_mount *mp, xfs_off_t cookie);
  * Structure to ease passing around component names.
  */
 typedef struct xfs_da_args {
-	uchar_t		*name;		/* string (maybe not NULL terminated) */
+	const uchar_t	*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
 	uchar_t		*value;		/* set of bytes (maybe contain NULLs) */
 	int		valuelen;	/* length of value */
@@ -314,7 +252,7 @@ xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  xfs_dabuf_t *dead_buf);
 
-uint xfs_da_hashname(uchar_t *name_string, int name_length);
+uint xfs_da_hashname(const uchar_t *name_string, int name_length);
 uint xfs_da_log2_roundup(uint i);
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 681be5c93af..070259a4254 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -1,58 +1,44 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_ag.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
+#include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_dfrag.h"
@@ -65,9 +51,9 @@
  */
 int
 xfs_swapext(
-	xfs_swapext_t	__user *sxp)
+	xfs_swapext_t	__user *sxu)
 {
-	xfs_swapext_t	sx;
+	xfs_swapext_t	*sxp;
 	xfs_inode_t     *ip=NULL, *tip=NULL, *ips[2];
 	xfs_trans_t     *tp;
 	xfs_mount_t     *mp;
@@ -76,20 +62,29 @@ xfs_swapext(
 	vnode_t		*vp, *tvp;
 	bhv_desc_t      *bdp, *tbdp;
 	vn_bhv_head_t   *bhp, *tbhp;
-	uint		lock_flags=0;
+	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
 	int		ilf_fields, tilf_fields;
 	int		error = 0;
-	xfs_ifork_t	tempif, *ifp, *tifp;
+	xfs_ifork_t	*tempifp, *ifp, *tifp;
 	__uint64_t	tmp;
 	int		aforkblks = 0;
 	int		taforkblks = 0;
-	int		locked = 0;
+	char		locked = 0;
 
-	if (copy_from_user(&sx, sxp, sizeof(sx)))
-		return XFS_ERROR(EFAULT);
+	sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
+	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
+	if (!sxp || !tempifp) {
+		error = XFS_ERROR(ENOMEM);
+		goto error0;
+	}
+
+	if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) {
+		error = XFS_ERROR(EFAULT);
+		goto error0;
+	}
 
 	/* Pull information for the target fd */
-	if (((fp = fget((int)sx.sx_fdtarget)) == NULL) ||
+	if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) ||
 	    ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL))  {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
@@ -104,7 +99,7 @@ xfs_swapext(
 		ip = XFS_BHVTOI(bdp);
 	}
 
-	if (((tfp = fget((int)sx.sx_fdtmp)) == NULL) ||
+	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
 	    ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
@@ -131,7 +126,7 @@ xfs_swapext(
 
 	mp = ip->i_mount;
 
-	sbp = &sx.sx_stat;
+	sbp = &sxp->sx_stat;
 
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		error =  XFS_ERROR(EIO);
@@ -148,7 +143,7 @@ xfs_swapext(
 		ips[0] = tip;
 		ips[1] = ip;
 	}
-	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
+
 	xfs_lock_inodes(ips, 2, 0, lock_flags);
 
 	/* Check permissions */
@@ -192,9 +187,9 @@ xfs_swapext(
 	}
 
 	/* Verify all data are being swapped */
-	if (sx.sx_offset != 0 ||
-	    sx.sx_length != ip->i_d.di_size ||
-	    sx.sx_length != tip->i_d.di_size) {
+	if (sxp->sx_offset != 0 ||
+	    sxp->sx_length != ip->i_d.di_size ||
+	    sxp->sx_length != tip->i_d.di_size) {
 		error = XFS_ERROR(EFAULT);
 		goto error0;
 	}
@@ -255,7 +250,8 @@ xfs_swapext(
 		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
 		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
 		xfs_trans_cancel(tp, 0);
-		return error;
+		locked = 0;
+		goto error0;
 	}
 	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
 
@@ -266,10 +262,8 @@ xfs_swapext(
 	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
 		if (error) {
-			xfs_iunlock(ip,  lock_flags);
-			xfs_iunlock(tip, lock_flags);
 			xfs_trans_cancel(tp, 0);
-			return error;
+			goto error0;
 		}
 	}
 	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
@@ -277,10 +271,8 @@ xfs_swapext(
 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
 			&taforkblks);
 		if (error) {
-			xfs_iunlock(ip,  lock_flags);
-			xfs_iunlock(tip, lock_flags);
 			xfs_trans_cancel(tp, 0);
-			return error;
+			goto error0;
 		}
 	}
 
@@ -289,9 +281,9 @@ xfs_swapext(
 	 */
 	ifp = &ip->i_df;
 	tifp = &tip->i_df;
-	tempif = *ifp;	/* struct copy */
-	*ifp = *tifp;	/* struct copy */
-	*tifp = tempif;	/* struct copy */
+	*tempifp = *ifp;	/* struct copy */
+	*ifp = *tifp;		/* struct copy */
+	*tifp = *tempifp;	/* struct copy */
 
 	/*
 	 * Fix the on-disk inode values
@@ -369,11 +361,7 @@ xfs_swapext(
 	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL);
-
-	fput(fp);
-	fput(tfp);
-
-	return error;
+	locked = 0;
 
  error0:
 	if (locked) {
@@ -381,8 +369,15 @@ xfs_swapext(
 		xfs_iunlock(tip, lock_flags);
 	}
 
-	if (fp != NULL) fput(fp);
-	if (tfp != NULL) fput(tfp);
+	if (fp != NULL)
+		fput(fp);
+	if (tfp != NULL)
+		fput(tfp);
+
+	if (sxp != NULL)
+		kmem_free(sxp, sizeof(xfs_swapext_t));
+	if (tempifp != NULL)
+		kmem_free(tempifp, sizeof(xfs_ifork_t));
 
 	return error;
 }
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index 904860594b8..f678559abc4 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DFRAG_H__
 #define	__XFS_DFRAG_H__
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index f5c932b064e..c5a0e537ff1 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DINODE_H__
 #define	__XFS_DINODE_H__
@@ -37,13 +23,8 @@ struct xfs_mount;
 
 #define	XFS_DINODE_VERSION_1	1
 #define	XFS_DINODE_VERSION_2	2
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DINODE_GOOD_VERSION)
-int xfs_dinode_good_version(int v);
-#define XFS_DINODE_GOOD_VERSION(v)	xfs_dinode_good_version(v)
-#else
-#define XFS_DINODE_GOOD_VERSION(v)	(((v) == XFS_DINODE_VERSION_1) || \
-					 ((v) == XFS_DINODE_VERSION_2))
-#endif
+#define XFS_DINODE_GOOD_VERSION(v)	\
+	(((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2))
 #define	XFS_DINODE_MAGIC	0x494e	/* 'IN' */
 
 /*
@@ -184,75 +165,30 @@ typedef enum xfs_dinode_fmt
 /*
  * Inode size for given fs.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LITINO)
-int xfs_litino(struct xfs_mount *mp);
-#define	XFS_LITINO(mp)		xfs_litino(mp)
-#else
 #define	XFS_LITINO(mp)	((mp)->m_litino)
-#endif
 #define	XFS_BROOT_SIZE_ADJ	\
 	(sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t))
 
 /*
- * Fork identifiers.  Here so utilities can use them without including
- * xfs_inode.h.
- */
-#define	XFS_DATA_FORK	0
-#define	XFS_ATTR_FORK	1
-
-/*
  * Inode data & attribute fork sizes, per inode.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_Q)
-int xfs_cfork_q_disk(xfs_dinode_core_t *dcp);
-int xfs_cfork_q(xfs_dinode_core_t *dcp);
-#define	XFS_CFORK_Q_DISK(dcp)               xfs_cfork_q_disk(dcp)
-#define	XFS_CFORK_Q(dcp)                    xfs_cfork_q(dcp)
-#else
-#define	XFS_CFORK_Q_DISK(dcp)		    ((dcp)->di_forkoff != 0)
 #define XFS_CFORK_Q(dcp)                    ((dcp)->di_forkoff != 0)
+#define	XFS_CFORK_Q_DISK(dcp)		    ((dcp)->di_forkoff != 0)
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF)
-int xfs_cfork_boff_disk(xfs_dinode_core_t *dcp);
-int xfs_cfork_boff(xfs_dinode_core_t *dcp);
-#define	XFS_CFORK_BOFF_DISK(dcp)	    xfs_cfork_boff_disk(dcp)
-#define	XFS_CFORK_BOFF(dcp)	            xfs_cfork_boff(dcp)
-#else
-#define	XFS_CFORK_BOFF_DISK(dcp)	    ((int)(INT_GET((dcp)->di_forkoff, ARCH_CONVERT) << 3))
 #define XFS_CFORK_BOFF(dcp)                 ((int)((dcp)->di_forkoff << 3))
+#define	XFS_CFORK_BOFF_DISK(dcp) \
+	((int)(INT_GET((dcp)->di_forkoff, ARCH_CONVERT) << 3))
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE)
-int xfs_cfork_dsize_disk(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
-int xfs_cfork_dsize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
-#define	XFS_CFORK_DSIZE_DISK(dcp,mp)        xfs_cfork_dsize_disk(dcp,mp)
-#define	XFS_CFORK_DSIZE(dcp,mp)             xfs_cfork_dsize(dcp,mp)
-#else
 #define	XFS_CFORK_DSIZE_DISK(dcp,mp) \
 	(XFS_CFORK_Q_DISK(dcp) ? XFS_CFORK_BOFF_DISK(dcp) : XFS_LITINO(mp))
 #define XFS_CFORK_DSIZE(dcp,mp) \
 	(XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp))
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE)
-int xfs_cfork_asize_disk(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
-int xfs_cfork_asize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
-#define	XFS_CFORK_ASIZE_DISK(dcp,mp)        xfs_cfork_asize_disk(dcp,mp)
-#define	XFS_CFORK_ASIZE(dcp,mp)             xfs_cfork_asize(dcp,mp)
-#else
 #define	XFS_CFORK_ASIZE_DISK(dcp,mp) \
 	(XFS_CFORK_Q_DISK(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_DISK(dcp) : 0)
 #define XFS_CFORK_ASIZE(dcp,mp) \
 	(XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0)
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE)
-int xfs_cfork_size_disk(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w);
-int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w);
-#define	XFS_CFORK_SIZE_DISK(dcp,mp,w)       xfs_cfork_size_disk(dcp,mp,w)
-#define	XFS_CFORK_SIZE(dcp,mp,w)            xfs_cfork_size(dcp,mp,w)
-#else
 #define	XFS_CFORK_SIZE_DISK(dcp,mp,w) \
 	((w) == XFS_DATA_FORK ? \
 		XFS_CFORK_DSIZE_DISK(dcp, mp) : \
@@ -261,93 +197,26 @@ int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w);
 	((w) == XFS_DATA_FORK ? \
 		XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp))
 
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DSIZE)
-int xfs_dfork_dsize(xfs_dinode_t *dip, struct xfs_mount *mp);
-#define	XFS_DFORK_DSIZE(dip,mp)             xfs_dfork_dsize(dip,mp)
-#else
-#define XFS_DFORK_DSIZE(dip,mp)             XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_ASIZE)
-int xfs_dfork_asize(xfs_dinode_t *dip, struct xfs_mount *mp);
-#define	XFS_DFORK_ASIZE(dip,mp)             xfs_dfork_asize(dip,mp)
-#else
-#define XFS_DFORK_ASIZE(dip,mp)             XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_DSIZE(dip,mp) \
+	XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_ASIZE(dip,mp) \
+	XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define	XFS_DFORK_SIZE(dip,mp,w) \
+	XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_SIZE)
-int xfs_dfork_size(xfs_dinode_t *dip, struct xfs_mount *mp, int w);
-#define	XFS_DFORK_SIZE(dip,mp,w)            xfs_dfork_size(dip,mp,w)
-#else
-#define	XFS_DFORK_SIZE(dip,mp,w)	    XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
-
-#endif
-
-/*
- * Macros for accessing per-fork disk inode information.
- */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_Q)
-int xfs_dfork_q(xfs_dinode_t *dip);
-#define	XFS_DFORK_Q(dip)	            xfs_dfork_q(dip)
-#else
 #define	XFS_DFORK_Q(dip)                    XFS_CFORK_Q_DISK(&(dip)->di_core)
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_BOFF)
-int xfs_dfork_boff(xfs_dinode_t *dip);
-#define	XFS_DFORK_BOFF(dip)		    xfs_dfork_boff(dip)
-#else
 #define	XFS_DFORK_BOFF(dip)		    XFS_CFORK_BOFF_DISK(&(dip)->di_core)
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DPTR)
-char *xfs_dfork_dptr(xfs_dinode_t *dip);
-#define	XFS_DFORK_DPTR(dip)	            xfs_dfork_dptr(dip)
-#else
 #define	XFS_DFORK_DPTR(dip)		    ((dip)->di_u.di_c)
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_APTR)
-char *xfs_dfork_aptr(xfs_dinode_t *dip);
-#define	XFS_DFORK_APTR(dip)                 xfs_dfork_aptr(dip)
-#else
-#define	XFS_DFORK_APTR(dip)		    ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip))
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_PTR)
-char *xfs_dfork_ptr(xfs_dinode_t *dip, int w);
-#define	XFS_DFORK_PTR(dip,w)                xfs_dfork_ptr(dip,w)
-#else
+#define	XFS_DFORK_APTR(dip)	\
+	((dip)->di_u.di_c + XFS_DFORK_BOFF(dip))
 #define	XFS_DFORK_PTR(dip,w)	\
 	((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip))
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FORMAT)
-int xfs_cfork_format(xfs_dinode_core_t *dcp, int w);
-#define	XFS_CFORK_FORMAT(dcp,w)             xfs_cfork_format(dcp,w)
-#else
-#define	XFS_CFORK_FORMAT(dcp,w) \
+#define	XFS_CFORK_FORMAT(dcp,w)	\
 	((w) == XFS_DATA_FORK ? (dcp)->di_format : (dcp)->di_aformat)
-
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FMT_SET)
-void xfs_cfork_fmt_set(xfs_dinode_core_t *dcp, int w, int n);
-#define	XFS_CFORK_FMT_SET(dcp,w,n)           xfs_cfork_fmt_set(dcp,w,n)
-#else
 #define	XFS_CFORK_FMT_SET(dcp,w,n) \
 	((w) == XFS_DATA_FORK ? \
-		((dcp)->di_format = (n)) : \
-		((dcp)->di_aformat = (n)))
+		((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXTENTS)
-int xfs_cfork_nextents_disk(xfs_dinode_core_t *dcp, int w);
-int xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w);
-#define	XFS_CFORK_NEXTENTS_DISK(dcp,w)       xfs_cfork_nextents_disk(dcp,w)
-#define	XFS_CFORK_NEXTENTS(dcp,w)            xfs_cfork_nextents(dcp,w)
-#else
 #define	XFS_CFORK_NEXTENTS_DISK(dcp,w) \
 	((w) == XFS_DATA_FORK ? \
 	 	INT_GET((dcp)->di_nextents, ARCH_CONVERT) : \
@@ -355,31 +224,13 @@ int xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w);
 #define XFS_CFORK_NEXTENTS(dcp,w) \
 	((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXT_SET)
-void xfs_cfork_next_set(xfs_dinode_core_t *dcp, int w, int n);
-#define	XFS_CFORK_NEXT_SET(dcp,w,n)	        xfs_cfork_next_set(dcp,w,n)
-#else
 #define	XFS_CFORK_NEXT_SET(dcp,w,n) \
 	((w) == XFS_DATA_FORK ? \
-		((dcp)->di_nextents = (n)) : \
-		((dcp)->di_anextents = (n)))
-
-#endif
+		((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXTENTS)
-int xfs_dfork_nextents(xfs_dinode_t *dip, int w);
-#define	XFS_DFORK_NEXTENTS(dip,w) xfs_dfork_nextents(dip,w)
-#else
 #define	XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
-#endif
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_DINODE)
-xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
-#define	XFS_BUF_TO_DINODE(bp)	xfs_buf_to_dinode(bp)
-#else
-#define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)(XFS_BUF_PTR(bp)))
-#endif
+#define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)XFS_BUF_PTR(bp))
 
 /*
  * Values for di_flags
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index ba30bc7682f..3dd30391f55 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -1,59 +1,43 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_error.h"
 
@@ -192,11 +176,23 @@ xfs_dir_mount(xfs_mount_t *mp)
 	uint shortcount, leafcount, count;
 
 	mp->m_dirversion = 1;
-	shortcount = (mp->m_attroffset - (uint)sizeof(xfs_dir_sf_hdr_t)) /
-		     (uint)sizeof(xfs_dir_sf_entry_t);
-	leafcount = (XFS_LBSIZE(mp) - (uint)sizeof(xfs_dir_leaf_hdr_t)) /
-		    ((uint)sizeof(xfs_dir_leaf_entry_t) +
-		     (uint)sizeof(xfs_dir_leaf_name_t));
+	if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) {
+		shortcount = (mp->m_attroffset -
+				(uint)sizeof(xfs_dir_sf_hdr_t)) /
+				 (uint)sizeof(xfs_dir_sf_entry_t);
+		leafcount = (XFS_LBSIZE(mp) -
+				(uint)sizeof(xfs_dir_leaf_hdr_t)) /
+				 ((uint)sizeof(xfs_dir_leaf_entry_t) +
+				  (uint)sizeof(xfs_dir_leaf_name_t));
+	} else {
+		shortcount = (XFS_BMDR_SPACE_CALC(MINABTPTRS) -
+			      (uint)sizeof(xfs_dir_sf_hdr_t)) /
+			       (uint)sizeof(xfs_dir_sf_entry_t);
+		leafcount = (XFS_LBSIZE(mp) -
+			    (uint)sizeof(xfs_dir_leaf_hdr_t)) /
+			     ((uint)sizeof(xfs_dir_leaf_entry_t) +
+			      (uint)sizeof(xfs_dir_leaf_name_t));
+	}
 	count = shortcount > leafcount ? shortcount : leafcount;
 	mp->m_dircook_elog = xfs_da_log2_roundup(count + 1);
 	ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 4dbc9f54cca..488defe86ba 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR_H__
 #define	__XFS_DIR_H__
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 49fc0a3695a..022c8398ab6 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -1,46 +1,26 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * XFS v2 directory implmentation.
- * Top-level and utility routines.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -48,16 +28,16 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
@@ -65,7 +45,6 @@
 #include "xfs_dir2_node.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 
 /*
  * Declarations for interface routines.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 8f4fc7f23bc..7e24ffeda9e 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_H__
 #define	__XFS_DIR2_H__
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index bc4c40fcd47..31bc99faa70 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1,61 +1,39 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * xfs_dir2_block.c
- * XFS V2 directory implementation, single-block form.
- * See xfs_dir2_block.h for the format.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
-#include "xfs_da_btree.h"
+#include "xfs_inode_item.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
@@ -1234,7 +1212,7 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
-	qsort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
+	xfs_sort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
 	/*
 	 * Log the leaf entry area and tail.
 	 * Already logged the header in data_init, ignore needlog.
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 5a578b84e24..a2e5cb98a83 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_BLOCK_H__
 #define	__XFS_DIR2_BLOCK_H__
@@ -74,53 +60,37 @@ typedef struct xfs_dir2_block {
 /*
  * Pointer to the leaf header embedded in a data block (1-block format)
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_TAIL_P)
-xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block);
 #define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	xfs_dir2_block_tail_p(mp,block)
-#else
-#define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	\
-	(((xfs_dir2_block_tail_t *)((char *)(block) + (mp)->m_dirblksize)) - 1)
-#endif
+static inline xfs_dir2_block_tail_t *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
+{
+	return (((xfs_dir2_block_tail_t *)
+		((char *)(block) + (mp)->m_dirblksize)) - 1);
+}
 
 /*
  * Pointer to the leaf entries embedded in a data block (1-block format)
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_LEAF_P)
-struct xfs_dir2_leaf_entry *xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp);
-#define	XFS_DIR2_BLOCK_LEAF_P(btp) \
-	xfs_dir2_block_leaf_p(btp)
-#else
-#define	XFS_DIR2_BLOCK_LEAF_P(btp)	\
-	(((struct xfs_dir2_leaf_entry *)(btp)) - INT_GET((btp)->count, ARCH_CONVERT))
-#endif
+#define	XFS_DIR2_BLOCK_LEAF_P(btp)	xfs_dir2_block_leaf_p(btp)
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
+{
+	return (((struct xfs_dir2_leaf_entry *)
+		(btp)) - INT_GET((btp)->count, ARCH_CONVERT));
+}
 
 /*
  * Function declarations.
  */
-
-extern int
-	xfs_dir2_block_addname(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_block_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
-				struct uio *uio, int *eofp, struct xfs_dirent *dbp,
-				xfs_dir2_put_t put);
-
-extern int
-	xfs_dir2_block_lookup(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_block_removename(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_block_replace(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_leaf_to_block(struct xfs_da_args *args, struct xfs_dabuf *lbp,
-			       struct xfs_dabuf *dbp);
-
-extern int
-	xfs_dir2_sf_to_block(struct xfs_da_args *args);
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+				   struct uio *uio, int *eofp,
+				   struct xfs_dirent *dbp, xfs_dir2_put_t put);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+				  struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
 
 #endif	/* __XFS_DIR2_BLOCK_H__ */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index a0aa0e44ff9..5b7c47e2f14 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -1,60 +1,38 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * xfs_dir2_data.c
- * Core data block handling routines for XFS V2 directories.
- * See xfs_dir2_data.h for data structures.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 476cac920bf..5e3a7f9ec73 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_DATA_H__
 #define	__XFS_DIR2_DATA_H__
@@ -137,88 +123,65 @@ typedef struct xfs_dir2_data {
 /*
  * Size of a data entry.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTSIZE)
-int xfs_dir2_data_entsize(int n);
 #define XFS_DIR2_DATA_ENTSIZE(n)	xfs_dir2_data_entsize(n)
-#else
-#define	XFS_DIR2_DATA_ENTSIZE(n)	\
-	((int)(roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
-		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN)))
-#endif
+static inline int xfs_dir2_data_entsize(int n)
+{
+	return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
+		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+}
 
 /*
  * Pointer to an entry's tag word.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTRY_TAG_P)
-xfs_dir2_data_off_t *xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep);
 #define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
-#else
-#define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	\
-	((xfs_dir2_data_off_t *)\
-	 ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
-	  (uint)sizeof(xfs_dir2_data_off_t)))
-#endif
+static inline xfs_dir2_data_off_t *
+xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
+{
+	return (xfs_dir2_data_off_t *) \
+		 ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
+		  (uint)sizeof(xfs_dir2_data_off_t));
+}
 
 /*
  * Pointer to a freespace's tag word.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_UNUSED_TAG_P)
-xfs_dir2_data_off_t *xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup);
 #define	XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
 	xfs_dir2_data_unused_tag_p(dup)
-#else
-#define	XFS_DIR2_DATA_UNUSED_TAG_P(dup)	\
-	((xfs_dir2_data_off_t *)\
-	 ((char *)(dup) + INT_GET((dup)->length, ARCH_CONVERT) \
-			- (uint)sizeof(xfs_dir2_data_off_t)))
-#endif
+static inline xfs_dir2_data_off_t *
+xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
+{
+	return (xfs_dir2_data_off_t *) \
+		 ((char *)(dup) + INT_GET((dup)->length, ARCH_CONVERT) \
+				- (uint)sizeof(xfs_dir2_data_off_t));
+}
 
 /*
  * Function declarations.
  */
-
 #ifdef DEBUG
-extern void
-	xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
 #else
 #define	xfs_dir2_data_check(dp,bp)
 #endif
-
-extern xfs_dir2_data_free_t *
-	xfs_dir2_data_freefind(xfs_dir2_data_t *d,
-			       xfs_dir2_data_unused_t *dup);
-
-extern xfs_dir2_data_free_t *
-	xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
-				 xfs_dir2_data_unused_t *dup, int *loghead);
-
-extern void
-	xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
-			       int *loghead, char *aendp);
-
-extern int
-	xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-			   struct xfs_dabuf **bpp);
-
-extern void
-	xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
+				xfs_dir2_data_unused_t *dup);
+extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
+				xfs_dir2_data_unused_t *dup, int *loghead);
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
+				int *loghead, char *aendp);
+extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+				struct xfs_dabuf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
 				xfs_dir2_data_entry_t *dep);
-
-extern void
-	xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
-
-extern void
-	xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
-				 xfs_dir2_data_unused_t *dup);
-
-extern void
-	xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+				struct xfs_dabuf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_unused_t *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
 				xfs_dir2_data_aoff_t offset,
 				xfs_dir2_data_aoff_t len, int *needlogp,
 				int *needscanp);
-
-extern void
-	xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
 			       xfs_dir2_data_unused_t *dup,
 			       xfs_dir2_data_aoff_t offset,
 			       xfs_dir2_data_aoff_t len, int *needlogp,
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 056f5283904..d342b6b5523 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1,49 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * xfs_dir2_leaf.c
- * XFS directory version 2 implementation - single leaf form
- * see xfs_dir2_leaf.h for data structures.
- * These directories have multiple XFS_DIR2_DATA blocks and one
- * XFS_DIR2_LEAF1 block containing the hash table and freespace map.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -51,6 +28,7 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
@@ -58,14 +36,12 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
 #include "xfs_dir2_node.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 
 /*
  * Local function declarations.
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 3303cd6f4c0..1393993d61e 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -1,41 +1,23 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_LEAF_H__
 #define	__XFS_DIR2_LEAF_H__
 
-/*
- * Directory version 2, leaf block structures.
- */
-
 struct uio;
 struct xfs_dabuf;
 struct xfs_da_args;
@@ -44,10 +26,6 @@ struct xfs_mount;
 struct xfs_trans;
 
 /*
- * Constants.
- */
-
-/*
  * Offset of the leaf/node space.  First block in this space
  * is the btree root.
  */
@@ -57,10 +35,6 @@ struct xfs_trans;
 	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
 
 /*
- * Types.
- */
-
-/*
  * Offset in data space of a data entry.
  */
 typedef	__uint32_t	xfs_dir2_dataptr_t;
@@ -68,10 +42,6 @@ typedef	__uint32_t	xfs_dir2_dataptr_t;
 #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
 
 /*
- * Structures.
- */
-
-/*
  * Leaf block header.
  */
 typedef struct xfs_dir2_leaf_hdr {
@@ -109,245 +79,193 @@ typedef struct xfs_dir2_leaf {
 } xfs_dir2_leaf_t;
 
 /*
- * Macros.
- * The DB blocks are logical directory block numbers, not filesystem blocks.
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
  */
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_MAX_LEAF_ENTS)
-int
-xfs_dir2_max_leaf_ents(struct xfs_mount *mp);
-#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	\
-	xfs_dir2_max_leaf_ents(mp)
-#else
-#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	\
-	((int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / \
-	       (uint)sizeof(xfs_dir2_leaf_entry_t)))
-#endif
+#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	xfs_dir2_max_leaf_ents(mp)
+static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+{
+	return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
+	       (uint)sizeof(xfs_dir2_leaf_entry_t));
+}
 
 /*
  * Get address of the bestcount field in the single-leaf block.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_TAIL_P)
-xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp);
-#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	\
-	xfs_dir2_leaf_tail_p(mp, lp)
-#else
-#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	\
-	((xfs_dir2_leaf_tail_t *)\
-	 ((char *)(lp) + (mp)->m_dirblksize - \
-	  (uint)sizeof(xfs_dir2_leaf_tail_t)))
-#endif
+#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	xfs_dir2_leaf_tail_p(mp, lp)
+static inline xfs_dir2_leaf_tail_t *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
+{
+	return (xfs_dir2_leaf_tail_t *)
+		((char *)(lp) + (mp)->m_dirblksize - 
+		  (uint)sizeof(xfs_dir2_leaf_tail_t));
+}
 
 /*
  * Get address of the bests array in the single-leaf block.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_BESTS_P)
-xfs_dir2_data_off_t *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp);
 #define	XFS_DIR2_LEAF_BESTS_P(ltp)	xfs_dir2_leaf_bests_p(ltp)
-#else
-#define	XFS_DIR2_LEAF_BESTS_P(ltp)	\
-	((xfs_dir2_data_off_t *)(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT))
-#endif
+static inline xfs_dir2_data_off_t *
+xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
+{
+	return (xfs_dir2_data_off_t *)
+		(ltp) - INT_GET((ltp)->bestcount, ARCH_CONVERT);
+}
 
 /*
  * Convert dataptr to byte in file space
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_BYTE)
-xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
 #define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	xfs_dir2_dataptr_to_byte(mp, dp)
-#else
-#define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	\
-	((xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG)
-#endif
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
+}
 
 /*
  * Convert byte in file space to dataptr.  It had better be aligned.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DATAPTR)
-xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by);
 #define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	xfs_dir2_byte_to_dataptr(mp,by)
-#else
-#define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	\
-	((xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG))
-#endif
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+#define	XFS_DIR2_BYTE_TO_DB(mp,by)	xfs_dir2_byte_to_db(mp, by)
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_db_t)((by) >> \
+		 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
+}
 
 /*
  * Convert dataptr to a block number
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_DB)
-xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
 #define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	xfs_dir2_dataptr_to_db(mp, dp)
-#else
-#define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
-#endif
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	xfs_dir2_byte_to_off(mp, by)
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return (xfs_dir2_data_aoff_t)((by) & \
+		((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
+}
 
 /*
  * Convert dataptr to a byte offset in a block
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_OFF)
-xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
 #define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	xfs_dir2_dataptr_to_off(mp, dp)
-#else
-#define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	\
-	XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
-#endif
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+	return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+}
 
 /*
  * Convert block and offset to byte in space
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_BYTE)
-xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
-			xfs_dir2_data_aoff_t o);
 #define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
 	xfs_dir2_db_off_to_byte(mp, db, o)
-#else
-#define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
-	(((xfs_dir2_off_t)(db) << \
-	 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o))
-#endif
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+			xfs_dir2_data_aoff_t o)
+{
+	return ((xfs_dir2_off_t)(db) << \
+		((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
+}
 
 /*
- * Convert byte in space to (DB) block
+ * Convert block (DB) to block (dablk)
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DB)
-xfs_dir2_db_t xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by);
-#define	XFS_DIR2_BYTE_TO_DB(mp,by)	xfs_dir2_byte_to_db(mp, by)
-#else
-#define	XFS_DIR2_BYTE_TO_DB(mp,by)	\
-	((xfs_dir2_db_t)((by) >> \
-			 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)))
-#endif
+#define	XFS_DIR2_DB_TO_DA(mp,db)	xfs_dir2_db_to_da(mp, db)
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
+}
 
 /*
  * Convert byte in space to (DA) block
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DA)
-xfs_dablk_t xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by);
 #define	XFS_DIR2_BYTE_TO_DA(mp,by)	xfs_dir2_byte_to_da(mp, by)
-#else
-#define	XFS_DIR2_BYTE_TO_DA(mp,by)	\
-	XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by))
-#endif
-
-/*
- * Convert byte in space to offset in a block
- */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_OFF)
-xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by);
-#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	xfs_dir2_byte_to_off(mp, by)
-#else
-#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	\
-	((xfs_dir2_data_aoff_t)((by) & \
-				((1 << ((mp)->m_sb.sb_blocklog + \
-					(mp)->m_sb.sb_dirblklog)) - 1)))
-#endif
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+	return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by));
+}
 
 /*
  * Convert block and offset to dataptr
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_DATAPTR)
-xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
-			   xfs_dir2_data_aoff_t o);
 #define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
 	xfs_dir2_db_off_to_dataptr(mp, db, o)
-#else
-#define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
-	XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o))
-#endif
-
-/*
- * Convert block (DB) to block (dablk)
- */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_DA)
-xfs_dablk_t xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db);
-#define	XFS_DIR2_DB_TO_DA(mp,db)	xfs_dir2_db_to_da(mp, db)
-#else
-#define	XFS_DIR2_DB_TO_DA(mp,db)	\
-	((xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog))
-#endif
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+			   xfs_dir2_data_aoff_t o)
+{
+	return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o));
+}
 
 /*
  * Convert block (dablk) to block (DB)
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_DB)
-xfs_dir2_db_t xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da);
 #define	XFS_DIR2_DA_TO_DB(mp,da)	xfs_dir2_da_to_db(mp, da)
-#else
-#define	XFS_DIR2_DA_TO_DB(mp,da)	\
-	((xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog))
-#endif
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
+{
+	return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
+}
 
 /*
  * Convert block (dablk) to byte offset in space
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_BYTE)
-xfs_dir2_off_t xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da);
 #define XFS_DIR2_DA_TO_BYTE(mp,da)	xfs_dir2_da_to_byte(mp, da)
-#else
-#define	XFS_DIR2_DA_TO_BYTE(mp,da)	\
-	XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0)
-#endif
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
+{
+	return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0);
+}
 
 /*
  * Function declarations.
  */
-
-extern int
-	xfs_dir2_block_to_leaf(struct xfs_da_args *args, struct xfs_dabuf *dbp);
-
-extern int
-	xfs_dir2_leaf_addname(struct xfs_da_args *args);
-
-extern void
-	xfs_dir2_leaf_compact(struct xfs_da_args *args, struct xfs_dabuf *bp);
-
-extern void
-	xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
-				 int *lowstalep, int *highstalep, int *lowlogp,
-				 int *highlogp);
-
-extern int
-	xfs_dir2_leaf_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
-			       struct uio *uio, int *eofp, struct xfs_dirent *dbp,
-			       xfs_dir2_put_t put);
-
-extern int
-	xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
-			   struct xfs_dabuf **bpp, int magic);
-
-extern void
-	xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
-			       int first, int last);
-
-extern void
-	xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
-
-extern int
-	xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_leaf_removename(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_leaf_replace(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-				  struct xfs_dabuf *lbp);
-extern int
-	xfs_dir2_leaf_trim_data(struct xfs_da_args *args, struct xfs_dabuf *lbp,				xfs_dir2_db_t db);
-
-extern int
-	xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+				  struct xfs_dabuf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
+				  struct xfs_dabuf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+				     int *lowstalep, int *highstalep,
+				     int *lowlogp, int *highlogp);
+extern int xfs_dir2_leaf_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+				  struct uio *uio, int *eofp,
+				  struct xfs_dirent *dbp, xfs_dir2_put_t put);
+extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+			      struct xfs_dabuf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				   int first, int last);
+extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
+				     struct xfs_dabuf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+				     struct xfs_dabuf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+				   struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
 
 #endif	/* __XFS_DIR2_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index a7615d86bfb..641f8633d25 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1,61 +1,39 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * xfs_dir2_node.c
- * XFS directory implementation, version 2, node form files
- * See data structures in xfs_dir2_node.h and xfs_da_btree.h.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index 96db420c7c5..0ab8fbd5951 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_NODE_H__
 #define	__XFS_DIR2_NODE_H__
@@ -45,10 +31,6 @@ struct xfs_inode;
 struct xfs_trans;
 
 /*
- * Constants.
- */
-
-/*
  * Offset of the freespace index.
  */
 #define	XFS_DIR2_FREE_SPACE	2
@@ -58,9 +40,6 @@ struct xfs_trans;
 
 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
 
-/*
- * Structures.
- */
 typedef	struct xfs_dir2_free_hdr {
 	__uint32_t		magic;		/* XFS_DIR2_FREE_MAGIC */
 	__int32_t		firstdb;	/* db of first entry */
@@ -73,87 +52,53 @@ typedef struct xfs_dir2_free {
 	xfs_dir2_data_off_t	bests[1];	/* best free counts */
 						/* unused entries are -1 */
 } xfs_dir2_free_t;
+
 #define	XFS_DIR2_MAX_FREE_BESTS(mp)	\
 	(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
 	 (uint)sizeof(xfs_dir2_data_off_t))
 
 /*
- * Macros.
- */
-
-/*
  * Convert data space db to the corresponding free db.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDB)
-xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db);
 #define	XFS_DIR2_DB_TO_FDB(mp,db)	xfs_dir2_db_to_fdb(mp, db)
-#else
-#define	XFS_DIR2_DB_TO_FDB(mp,db)	\
-	(XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp))
-#endif
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
+}
 
 /*
  * Convert data space db to the corresponding index in a free db.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDINDEX)
-int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db);
 #define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	xfs_dir2_db_to_fdindex(mp, db)
-#else
-#define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	((db) % XFS_DIR2_MAX_FREE_BESTS(mp))
-#endif
-
-/*
- * Functions.
- */
-
-extern void
-	xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
-				int first, int last);
-
-extern int
-	xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp);
-
-extern xfs_dahash_t
-	xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-
-extern int
-	xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
-				  struct xfs_da_args *args, int *indexp,
-				  struct xfs_da_state *state);
-
-extern int
-	xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
-			     struct xfs_dabuf *leaf2_bp);
-
-extern int
-	xfs_dir2_leafn_split(struct xfs_da_state *state,
-			     struct xfs_da_state_blk *oldblk,
-			     struct xfs_da_state_blk *newblk);
-
-extern int
-	xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-
-extern void
-	xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
-				 struct xfs_da_state_blk *drop_blk,
-				 struct xfs_da_state_blk *save_blk);
-
-extern int
-	xfs_dir2_node_addname(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_node_lookup(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_node_removename(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_node_replace(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
-				int *rvalp);
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+	return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
+}
+
+extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				    int first, int last);
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+				 struct xfs_dabuf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+				     struct xfs_da_args *args, int *indexp,
+				     struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+				struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+				struct xfs_da_state_blk *oldblk,
+				struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+				     struct xfs_da_state_blk *drop_blk,
+				     struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+				   int *rvalp);
 
 #endif	/* __XFS_DIR2_NODE_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 6bbc6167441..ec8e7476c8b 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -1,60 +1,39 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * xfs_dir2_sf.c
- * Shortform directory implementation for v2 directories.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
-#include "xfs_da_btree.h"
+#include "xfs_inode_item.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_error.h"
 #include "xfs_dir2_data.h"
@@ -107,7 +86,7 @@ xfs_dir2_block_sfsize(
 	int			isdotdot;	/* entry is ".." */
 	xfs_mount_t		*mp;		/* mount structure pointer */
 	int			namelen;	/* total name bytes */
-	xfs_ino_t		parent;		/* parent inode number */
+	xfs_ino_t		parent = 0;	/* parent inode number */
 	int			size=0;		/* total computed size */
 
 	mp = dp->i_mount;
@@ -298,11 +277,11 @@ xfs_dir2_sf_addname(
 	int			incr_isize;	/* total change in size */
 	int			new_isize;	/* di_size after adding name */
 	int			objchange;	/* changing to 8-byte inodes */
-	xfs_dir2_data_aoff_t	offset;		/* offset for new entry */
+	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
 	int			old_isize;	/* di_size before adding name */
 	int			pick;		/* which algorithm to use */
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
-	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
+	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
 
 	xfs_dir2_trace_args("sf_addname", args);
 	ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index bac6f5a2a31..42f015b7001 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_SF_H__
 #define	__XFS_DIR2_SF_H__
@@ -104,140 +90,106 @@ typedef struct xfs_dir2_sf {
 	xfs_dir2_sf_entry_t	list[1];	/* shortform entries */
 } xfs_dir2_sf_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_HDR_SIZE)
-int xfs_dir2_sf_hdr_size(int i8count);
 #define	XFS_DIR2_SF_HDR_SIZE(i8count)	xfs_dir2_sf_hdr_size(i8count)
-#else
-#define	XFS_DIR2_SF_HDR_SIZE(i8count)	\
-	((uint)sizeof(xfs_dir2_sf_hdr_t) - \
-	 ((i8count) == 0) * \
-	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_INUMBERP)
-xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep);
+static inline int xfs_dir2_sf_hdr_size(int i8count)
+{
+	return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
+		((i8count) == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
 #define	XFS_DIR2_SF_INUMBERP(sfep)	xfs_dir2_sf_inumberp(sfep)
-#else
-#define	XFS_DIR2_SF_INUMBERP(sfep)	\
-	((xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen])
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_INUMBER)
-xfs_intino_t xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from);
-#define	XFS_DIR2_SF_GET_INUMBER(sfp, from)	\
-	xfs_dir2_sf_get_inumber(sfp, from)
+static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
+{
+	return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
+}
 
-#else
-#define	XFS_DIR2_SF_GET_INUMBER(sfp, from)	\
-	((sfp)->hdr.i8count == 0 ? \
+#define	XFS_DIR2_SF_GET_INUMBER(sfp, from) \
+	xfs_dir2_sf_get_inumber(sfp, from)
+static inline xfs_intino_t
+xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
+{
+	return ((sfp)->hdr.i8count == 0 ? \
 		(xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
-		(xfs_intino_t)XFS_GET_DIR_INO8((from)->i8))
-#endif
+		(xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_INUMBER)
-void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
-				     xfs_dir2_inou_t *to);
-#define	XFS_DIR2_SF_PUT_INUMBER(sfp,from,to)	\
+#define	XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
 	xfs_dir2_sf_put_inumber(sfp,from,to)
-#else
-#define	XFS_DIR2_SF_PUT_INUMBER(sfp,from,to)	\
-	if ((sfp)->hdr.i8count == 0) { \
-		XFS_PUT_DIR_INO4(*(from), (to)->i4); \
-	} else { \
-		XFS_PUT_DIR_INO8(*(from), (to)->i8); \
-	}
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_OFFSET)
-xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep);
+static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
+						xfs_dir2_inou_t *to)
+{
+	if ((sfp)->hdr.i8count == 0)
+		XFS_PUT_DIR_INO4(*(from), (to)->i4);
+	else
+		XFS_PUT_DIR_INO8(*(from), (to)->i8);
+}
+
 #define	XFS_DIR2_SF_GET_OFFSET(sfep)	\
 	xfs_dir2_sf_get_offset(sfep)
-#else
-#define	XFS_DIR2_SF_GET_OFFSET(sfep)	\
-	INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i)
-#endif
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
+{
+	return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_OFFSET)
-void xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep,
-				    xfs_dir2_data_aoff_t off);
 #define	XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
 	xfs_dir2_sf_put_offset(sfep,off)
-#else
-#define	XFS_DIR2_SF_PUT_OFFSET(sfep,off)	\
-	INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i,off)
-#endif
+static inline void
+xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
+{
+	INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYNAME)
-int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len);
 #define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	\
 	xfs_dir2_sf_entsize_byname(sfp,len)
-#else
-#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	/* space a name uses */ \
-	((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
-	 ((sfp)->hdr.i8count == 0) * \
-	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYENTRY)
-int xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep);
+static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
+{
+	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
+		((sfp)->hdr.i8count == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
 #define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	\
 	xfs_dir2_sf_entsize_byentry(sfp,sfep)
-#else
-#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	/* space an entry uses */ \
-	((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
-	 ((sfp)->hdr.i8count == 0) * \
-	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_FIRSTENTRY)
-xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp);
+static inline int
+xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
+{
+	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
+		((sfp)->hdr.i8count == 0) * \
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
+}
+
 #define XFS_DIR2_SF_FIRSTENTRY(sfp)	xfs_dir2_sf_firstentry(sfp)
-#else
-#define XFS_DIR2_SF_FIRSTENTRY(sfp)	/* first entry in struct */ \
-	((xfs_dir2_sf_entry_t *) \
-	 ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_NEXTENTRY)
-xfs_dir2_sf_entry_t *xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp,
-					   xfs_dir2_sf_entry_t *sfep);
-#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)		xfs_dir2_sf_nextentry(sfp,sfep)
-#else
-#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)		/* next entry in struct */ \
-	((xfs_dir2_sf_entry_t *) \
-		((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)))
-#endif
+static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
+{
+	return ((xfs_dir2_sf_entry_t *) \
+		((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)));
+}
+
+#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)	xfs_dir2_sf_nextentry(sfp,sfep)
+static inline xfs_dir2_sf_entry_t *
+xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
+{
+	return ((xfs_dir2_sf_entry_t *) \
+		((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)));
+}
 
 /*
  * Functions.
  */
-
-extern int
-	xfs_dir2_block_sfsize(struct xfs_inode *dp,
-			      struct xfs_dir2_block *block,
-			      xfs_dir2_sf_hdr_t *sfhp);
-
-extern int
-	xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
-			     int size, xfs_dir2_sf_hdr_t *sfhp);
-
-extern int
-	xfs_dir2_sf_addname(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-
-extern int
-	xfs_dir2_sf_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
-			     struct xfs_dirent *dbp, xfs_dir2_put_t put);
-
-extern int
-	xfs_dir2_sf_lookup(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_sf_removename(struct xfs_da_args *args);
-
-extern int
-	xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+				 struct xfs_dir2_block *block,
+				 xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+				int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct uio *uio,
+				int *eofp, struct xfs_dirent *dbp,
+				xfs_dir2_put_t put);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
 
 #endif	/* __XFS_DIR2_SF_H__ */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index 9d641739314..c626943b411 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -1,52 +1,33 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * xfs_dir2_trace.c
- * Tracing for xfs v2 directories.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-
+#include "xfs_fs.h"
 #include "xfs_types.h"
 #include "xfs_inum.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2_trace.h"
 
 #ifdef XFS_DIR2_TRACE
diff --git a/fs/xfs/xfs_dir2_trace.h b/fs/xfs/xfs_dir2_trace.h
index 0a178bffa80..ca3c754f482 100644
--- a/fs/xfs/xfs_dir2_trace.h
+++ b/fs/xfs/xfs_dir2_trace.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR2_TRACE_H__
 #define __XFS_DIR2_TRACE_H__
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index c2ea6171fb0..950df31efc4 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -1,66 +1,44 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * xfs_dir_leaf.c
- *
- * GROT: figure out how to recover gracefully when bmap returns ENOSPC.
- */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
 #include "xfs_error.h"
 
@@ -508,7 +486,7 @@ xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
 	/*
 	 * Sort the entries on hash then entno.
 	 */
-	qsort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
+	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
 	/*
 	 * Stuff in last entry.
 	 */
@@ -650,7 +628,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
 	xfs_dir_leaf_name_t *namest;
 	xfs_da_args_t args;
 	xfs_inode_t *dp;
-	xfs_ino_t parent;
+	xfs_ino_t parent = 0;
 	char *tmpbuffer;
 	int retval, i;
 	xfs_dabuf_t *bp;
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index dd423ce1bc8..ab6b09eef9a 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR_LEAF_H__
 #define	__XFS_DIR_LEAF_H__
@@ -127,13 +113,13 @@ typedef union {
 	 * Watch the order here (endian-ness dependent).
 	 */
 	struct {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
 		xfs_dahash_t	h;	/* hash value */
 		__uint32_t	be;	/* block and entry */
-#else	/* __BYTE_ORDER == __BIG_ENDIAN */
+#else
 		__uint32_t	be;	/* block and entry */
 		xfs_dahash_t	h;	/* hash value */
-#endif	/* __BYTE_ORDER == __BIG_ENDIAN */
+#endif /* XFS_NATIVE_HOST */
 	} s;
 } xfs_dircook_t;
 
@@ -152,30 +138,26 @@ typedef struct xfs_dir_put_args
 	struct uio	*uio;		/* uio control structure */
 } xfs_dir_put_args_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYNAME)
-int xfs_dir_leaf_entsize_byname(int len);
 #define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	xfs_dir_leaf_entsize_byname(len)
-#else
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	/* space a name will use */ \
-	((uint)sizeof(xfs_dir_leaf_name_t)-1 + len)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYENTRY)
-int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry);
+static inline int xfs_dir_leaf_entsize_byname(int len)
+{
+	return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
+}
+
 #define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)	\
 	xfs_dir_leaf_entsize_byentry(entry)
-#else
-#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)	/* space an entry will use */ \
-	((uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_NAMESTRUCT)
-xfs_dir_leaf_name_t *
-xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset);
+static inline int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry)
+{
+	return (uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen;
+}
+
 #define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)	\
 	xfs_dir_leaf_namestruct(leafp,offset)
-#else
-#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)	/* point to name struct */ \
-	((xfs_dir_leaf_name_t *)&((char *)(leafp))[offset])
-#endif
+static inline xfs_dir_leaf_name_t *
+xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset)
+{
+	return (xfs_dir_leaf_name_t *)&((char *)(leafp))[offset];
+}
 
 /*========================================================================
  * Function prototypes for the kernel.
@@ -190,7 +172,7 @@ int xfs_dir_shortform_lookup(struct xfs_da_args *args);
 int xfs_dir_shortform_to_leaf(struct xfs_da_args *args);
 int xfs_dir_shortform_removename(struct xfs_da_args *args);
 int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
-				      struct xfs_dirent *dbp, xfs_dir_put_t put);
+			       struct xfs_dirent *dbp, xfs_dir_put_t put);
 int xfs_dir_shortform_replace(struct xfs_da_args *args);
 
 /*
@@ -237,7 +219,6 @@ int	xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa);
 int	xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa);
 int	xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
 
-
 /*
  * Global data.
  */
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
index a61bcfc2a87..fe44c6f4d56 100644
--- a/fs/xfs/xfs_dir_sf.h
+++ b/fs/xfs/xfs_dir_sf.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DIR_SF_H__
 #define	__XFS_DIR_SF_H__
@@ -76,49 +62,44 @@ typedef struct xfs_dir_sf_sort {
 	char		*name;		/* name value, pointer into buffer */
 } xfs_dir_sf_sort_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_GET_DIRINO)
-void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to);
-#define	XFS_DIR_SF_GET_DIRINO(from,to)		    xfs_dir_sf_get_dirino(from, to)
-#else
-#define	XFS_DIR_SF_GET_DIRINO(from,to)		    (*(to) = XFS_GET_DIR_INO8(*from))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_PUT_DIRINO)
-void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to);
-#define	XFS_DIR_SF_PUT_DIRINO(from,to)    xfs_dir_sf_put_dirino(from, to)
-#else
-#define	XFS_DIR_SF_PUT_DIRINO(from,to)    XFS_PUT_DIR_INO8(*(from), *(to))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYNAME)
-int xfs_dir_sf_entsize_byname(int len);
-#define XFS_DIR_SF_ENTSIZE_BYNAME(len)		xfs_dir_sf_entsize_byname(len)
-#else
-#define XFS_DIR_SF_ENTSIZE_BYNAME(len)		/* space a name uses */ \
-	((uint)sizeof(xfs_dir_sf_entry_t)-1 + (len))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYENTRY)
-int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep);
+#define	XFS_DIR_SF_GET_DIRINO(from,to)	xfs_dir_sf_get_dirino(from, to)
+static inline void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to)
+{
+	*(to) = XFS_GET_DIR_INO8(*from);
+}
+
+#define	XFS_DIR_SF_PUT_DIRINO(from,to)	xfs_dir_sf_put_dirino(from, to)
+static inline void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to)
+{
+	XFS_PUT_DIR_INO8(*(from), *(to));
+}
+
+#define XFS_DIR_SF_ENTSIZE_BYNAME(len)	xfs_dir_sf_entsize_byname(len)
+static inline int xfs_dir_sf_entsize_byname(int len)
+{
+	return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (len);
+}
+
 #define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)	xfs_dir_sf_entsize_byentry(sfep)
-#else
-#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)	/* space an entry uses */ \
-	((uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_NEXTENTRY)
-xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep);
+static inline int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep)
+{
+	return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen;
+}
+
 #define XFS_DIR_SF_NEXTENTRY(sfep)		xfs_dir_sf_nextentry(sfep)
-#else
-#define XFS_DIR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
-	((xfs_dir_sf_entry_t *) \
-		((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ALLFIT)
-int xfs_dir_sf_allfit(int count, int totallen);
+static inline xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep)
+{
+	return (xfs_dir_sf_entry_t *) \
+		((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep));
+}
+
 #define XFS_DIR_SF_ALLFIT(count,totallen)	\
 	xfs_dir_sf_allfit(count,totallen)
-#else
-#define XFS_DIR_SF_ALLFIT(count,totallen)	/* will all entries fit? */ \
-	((uint)sizeof(xfs_dir_sf_hdr_t) + \
-	       ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen))
-#endif
+static inline int xfs_dir_sf_allfit(int count, int totallen)
+{
+	return ((uint)sizeof(xfs_dir_sf_hdr_t) + \
+	       ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen));
+}
 
 #if defined(XFS_DIR_TRACE)
 
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 55c17adaaa3..5a5c7a63e80 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_DMAPI_H__
 #define __XFS_DMAPI_H__
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index cec54ba800e..629795b3b3d 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index dcd3fdd5c1f..d7b6b5d1670 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -1,51 +1,35 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
-#include "xfs_sb.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_utils.h"
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 52ee2b90b5e..06d8a8426c1 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -1,39 +1,25 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_ERROR_H__
 #define	__XFS_ERROR_H__
 
 #define prdev(fmt,targ,args...) \
-	printk("XFS: device %s- " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
+	printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
 
 #define XFS_ERECOVER	1	/* Failure to recover log */
 #define XFS_ELOGSTAT	2	/* Failure to stat log in user space */
@@ -54,24 +40,10 @@ extern int	xfs_error_trap(int);
 
 struct xfs_mount;
 
-extern void
-xfs_error_report(
-	char		*tag,
-	int		level,
-	struct xfs_mount *mp,
-	char		*fname,
-	int		linenum,
-	inst_t		*ra);
-
-extern void
-xfs_corruption_error(
-	char		*tag,
-	int		level,
-	struct xfs_mount *mp,
-	void		*p,
-	char		*fname,
-	int		linenum,
-	inst_t		*ra);
+extern void xfs_error_report(char *tag, int level, struct xfs_mount *mp,
+				char *fname, int linenum, inst_t *ra);
+extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
+				void *p, char *fname, int linenum, inst_t *ra);
 
 #define	XFS_ERROR_REPORT(e, lvl, mp)	\
 	xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -84,6 +56,32 @@ xfs_corruption_error(
 #define XFS_ERRLEVEL_HIGH	5
 
 /*
+ * Macros to set EFSCORRUPTED & return/branch.
+ */
+#define	XFS_WANT_CORRUPTED_GOTO(x,l)	\
+	{ \
+		int fs_is_ok = (x); \
+		ASSERT(fs_is_ok); \
+		if (unlikely(!fs_is_ok)) { \
+			XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
+					 XFS_ERRLEVEL_LOW, NULL); \
+			error = XFS_ERROR(EFSCORRUPTED); \
+			goto l; \
+		} \
+	}
+
+#define	XFS_WANT_CORRUPTED_RETURN(x)	\
+	{ \
+		int fs_is_ok = (x); \
+		ASSERT(fs_is_ok); \
+		if (unlikely(!fs_is_ok)) { \
+			XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
+					 XFS_ERRLEVEL_LOW, NULL); \
+			return XFS_ERROR(EFSCORRUPTED); \
+		} \
+	}
+
+/*
  * error injection tags - the labels can be anything you want
  * but each tag should have its own unique number
  */
@@ -139,8 +137,8 @@ xfs_corruption_error(
 #define	XFS_RANDOM_BMAPIFORMAT				XFS_RANDOM_DEFAULT
 
 #if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
-extern int	xfs_error_test(int, int *, char *, int, char *, unsigned long);
-void xfs_error_test_init(void);
+extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
+extern void xfs_error_test_init(void);
 
 #define	XFS_NUM_INJECT_ERROR				10
 
@@ -156,12 +154,10 @@ void xfs_error_test_init(void);
 			(rf)))
 #endif /* __ANSI_CPP__ */
 
-int		xfs_errortag_add(int error_tag, xfs_mount_t *mp);
-int		xfs_errortag_clear(int error_tag, xfs_mount_t *mp);
-
-int		xfs_errortag_clearall(xfs_mount_t *mp);
-int		xfs_errortag_clearall_umount(int64_t fsid, char *fsname,
-						int loud);
+extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
+extern int xfs_errortag_clear(int error_tag, xfs_mount_t *mp);
+extern int xfs_errortag_clearall(xfs_mount_t *mp);
+extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
 #else
 #define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)
 #define xfs_errortag_add(tag, mp)		(ENOSYS)
@@ -185,9 +181,9 @@ int		xfs_errortag_clearall_umount(int64_t fsid, char *fsname,
 
 struct xfs_mount;
 /* PRINTFLIKE4 */
-void		xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
+extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
 			    char *fmt, ...);
 /* PRINTFLIKE3 */
-void		xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
+extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
 
 #endif	/* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index db7cbd1bc85..f19282ec854 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -1,46 +1,25 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * This file contains the implementation of the xfs_efi_log_item
- * and xfs_efd_log_item items.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
@@ -107,6 +86,7 @@ xfs_efi_item_format(xfs_efi_log_item_t	*efip,
 
 	log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
 	log_vector->i_len = size;
+	XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT);
 	ASSERT(size >= sizeof(xfs_efi_log_format_t));
 }
 
@@ -426,6 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t	*efdp,
 
 	log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
 	log_vector->i_len = size;
+	XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT);
 	ASSERT(size >= sizeof(xfs_efd_log_format_t));
 }
 
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index d433bac9f59..5bf681708fe 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_EXTFREE_ITEM_H__
 #define	__XFS_EXTFREE_ITEM_H__
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 095af0a5cff..ba096f80f48 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -1,34 +1,19 @@
 /*
- * Copyright (c) 1995-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 1995-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
- * USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_FS_H__
 #define __XFS_FS_H__
@@ -251,6 +236,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_DIRV2	0x0080	/* directory version 2	*/
 #define XFS_FSOP_GEOM_FLAGS_LOGV2	0x0100	/* log format version 2	*/
 #define XFS_FSOP_GEOM_FLAGS_SECTOR	0x0200	/* sector sizes >1BB	*/
+#define XFS_FSOP_GEOM_FLAGS_ATTR2	0x0400	/* inline attributes rework */
 
 
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ca535d61319..7ceabd0e2d9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -1,66 +1,51 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
+#include "xfs_bit.h"
 #include "xfs_inum.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_ag.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_btree.h"
 #include "xfs_error.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_fsops.h"
 #include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_rtalloc.h"
-#include "xfs_dir2.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
+#include "xfs_rw.h"
 
 /*
  * File system operations
@@ -110,7 +95,9 @@ xfs_fs_geometry(
 			(XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
 			(XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
-				XFS_FSOP_GEOM_FLAGS_SECTOR : 0);
+				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+			(XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
+				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
 		geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
 				mp->m_sb.sb_logsectsize : BBSIZE;
 		geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -184,7 +171,7 @@ xfs_growfs_data_private(
 		memset(&mp->m_perag[oagcount], 0,
 			(nagcount - oagcount) * sizeof(xfs_perag_t));
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
-		nagimax = xfs_initialize_perag(mp, nagcount);
+		nagimax = xfs_initialize_perag(XFS_MTOVFS(mp), mp, nagcount);
 		up_write(&mp->m_peraglock);
 	}
 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
@@ -204,28 +191,26 @@ xfs_growfs_data_private(
 				  XFS_FSS_TO_BB(mp, 1), 0);
 		agf = XFS_BUF_TO_AGF(bp);
 		memset(agf, 0, mp->m_sb.sb_sectsize);
-		INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
-		INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
-		INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
+		agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+		agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+		agf->agf_seqno = cpu_to_be32(agno);
 		if (agno == nagcount - 1)
 			agsize =
 				nb -
 				(agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
 		else
 			agsize = mp->m_sb.sb_agblocks;
-		INT_SET(agf->agf_length, ARCH_CONVERT, agsize);
-		INT_SET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT,
-			XFS_BNO_BLOCK(mp));
-		INT_SET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT,
-			XFS_CNT_BLOCK(mp));
-		INT_SET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT, 1);
-		INT_SET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT, 1);
+		agf->agf_length = cpu_to_be32(agsize);
+		agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
+		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
+		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
+		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
 		agf->agf_flfirst = 0;
-		INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE(mp) - 1);
+		agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
 		agf->agf_flcount = 0;
 		tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
-		INT_SET(agf->agf_freeblks, ARCH_CONVERT, tmpsize);
-		INT_SET(agf->agf_longest, ARCH_CONVERT, tmpsize);
+		agf->agf_freeblks = cpu_to_be32(tmpsize);
+		agf->agf_longest = cpu_to_be32(tmpsize);
 		error = xfs_bwrite(mp, bp);
 		if (error) {
 			goto error0;
@@ -238,19 +223,18 @@ xfs_growfs_data_private(
 				  XFS_FSS_TO_BB(mp, 1), 0);
 		agi = XFS_BUF_TO_AGI(bp);
 		memset(agi, 0, mp->m_sb.sb_sectsize);
-		INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
-		INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
-		INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
-		INT_SET(agi->agi_length, ARCH_CONVERT, agsize);
+		agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
+		agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
+		agi->agi_seqno = cpu_to_be32(agno);
+		agi->agi_length = cpu_to_be32(agsize);
 		agi->agi_count = 0;
-		INT_SET(agi->agi_root, ARCH_CONVERT, XFS_IBT_BLOCK(mp));
-		INT_SET(agi->agi_level, ARCH_CONVERT, 1);
+		agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
+		agi->agi_level = cpu_to_be32(1);
 		agi->agi_freecount = 0;
-		INT_SET(agi->agi_newino, ARCH_CONVERT, NULLAGINO);
-		INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO);
+		agi->agi_newino = cpu_to_be32(NULLAGINO);
+		agi->agi_dirino = cpu_to_be32(NULLAGINO);
 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
-			INT_SET(agi->agi_unlinked[bucket], ARCH_CONVERT,
-				NULLAGINO);
+			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
 		error = xfs_bwrite(mp, bp);
 		if (error) {
 			goto error0;
@@ -263,17 +247,16 @@ xfs_growfs_data_private(
 			BTOBB(mp->m_sb.sb_blocksize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
 		memset(block, 0, mp->m_sb.sb_blocksize);
-		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC);
+		block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
 		block->bb_level = 0;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
-		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
-		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		block->bb_numrecs = cpu_to_be16(1);
+		block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+		block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 		arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc,
 			block, 1, mp->m_alloc_mxr[0]);
-		INT_SET(arec->ar_startblock, ARCH_CONVERT,
-			XFS_PREALLOC_BLOCKS(mp));
-		INT_SET(arec->ar_blockcount, ARCH_CONVERT,
-			agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT));
+		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+		arec->ar_blockcount = cpu_to_be32(
+			agsize - be32_to_cpu(arec->ar_startblock));
 		error = xfs_bwrite(mp, bp);
 		if (error) {
 			goto error0;
@@ -286,18 +269,17 @@ xfs_growfs_data_private(
 			BTOBB(mp->m_sb.sb_blocksize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
 		memset(block, 0, mp->m_sb.sb_blocksize);
-		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC);
+		block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
 		block->bb_level = 0;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
-		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
-		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		block->bb_numrecs = cpu_to_be16(1);
+		block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+		block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 		arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc,
 			block, 1, mp->m_alloc_mxr[0]);
-		INT_SET(arec->ar_startblock, ARCH_CONVERT,
-			XFS_PREALLOC_BLOCKS(mp));
-		INT_SET(arec->ar_blockcount, ARCH_CONVERT,
-			agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT));
-		nfree += INT_GET(arec->ar_blockcount, ARCH_CONVERT);
+		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+		arec->ar_blockcount = cpu_to_be32(
+			agsize - be32_to_cpu(arec->ar_startblock));
+		nfree += be32_to_cpu(arec->ar_blockcount);
 		error = xfs_bwrite(mp, bp);
 		if (error) {
 			goto error0;
@@ -310,11 +292,11 @@ xfs_growfs_data_private(
 			BTOBB(mp->m_sb.sb_blocksize), 0);
 		block = XFS_BUF_TO_SBLOCK(bp);
 		memset(block, 0, mp->m_sb.sb_blocksize);
-		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+		block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
 		block->bb_level = 0;
 		block->bb_numrecs = 0;
-		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
-		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+		block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 		error = xfs_bwrite(mp, bp);
 		if (error) {
 			goto error0;
@@ -334,10 +316,9 @@ xfs_growfs_data_private(
 		}
 		ASSERT(bp);
 		agi = XFS_BUF_TO_AGI(bp);
-		INT_MOD(agi->agi_length, ARCH_CONVERT, new);
+		be32_add(&agi->agi_length, new);
 		ASSERT(nagcount == oagcount ||
-		       INT_GET(agi->agi_length, ARCH_CONVERT) ==
-				mp->m_sb.sb_agblocks);
+		       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
 		xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
 		/*
 		 * Change agf length.
@@ -348,14 +329,14 @@ xfs_growfs_data_private(
 		}
 		ASSERT(bp);
 		agf = XFS_BUF_TO_AGF(bp);
-		INT_MOD(agf->agf_length, ARCH_CONVERT, new);
-		ASSERT(INT_GET(agf->agf_length, ARCH_CONVERT) ==
-				INT_GET(agi->agi_length, ARCH_CONVERT));
+		be32_add(&agf->agf_length, new);
+		ASSERT(be32_to_cpu(agf->agf_length) ==
+		       be32_to_cpu(agi->agi_length));
 		/*
 		 * Free the new space.
 		 */
 		error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno,
-			INT_GET(agf->agf_length, ARCH_CONVERT) - new), new);
+			be32_to_cpu(agf->agf_length) - new), new);
 		if (error) {
 			goto error0;
 		}
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index b61486173a6..f32713f14f9 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -1,67 +1,29 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_FSOPS_H__
 #define	__XFS_FSOPS_H__
 
-int
-xfs_fs_geometry(
-	xfs_mount_t		*mp,
-	xfs_fsop_geom_t		*geo,
-	int			new_version);
-
-int
-xfs_growfs_data(
-	xfs_mount_t		*mp,
-	xfs_growfs_data_t	*in);
-
-int
-xfs_growfs_log(
-	xfs_mount_t		*mp,
-	xfs_growfs_log_t	*in);
-
-int
-xfs_fs_counts(
-	xfs_mount_t		*mp,
-	xfs_fsop_counts_t	*cnt);
-
-int
-xfs_reserve_blocks(
-	xfs_mount_t		*mp,
-	__uint64_t		*inval,
-	xfs_fsop_resblks_t	*outval);
-
-int
-xfs_fs_goingdown(
-	xfs_mount_t		*mp,
-	__uint32_t		inflags);
+extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
+extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
+extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
+extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
+extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
+				xfs_fsop_resblks_t *outval);
+extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
 
 #endif	/* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index ce5fee9eaec..8f3fae1aa98 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1,41 +1,26 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,18 +28,17 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_bmap.h"
@@ -194,8 +178,8 @@ xfs_ialloc_ag_alloc(
 	 * Ideally they should be spaced out through the a.g.
 	 * For now, just allocate blocks up front.
 	 */
-	args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
-	args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+	args.agbno = be32_to_cpu(agi->agi_root);
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
 				    args.agbno);
 	/*
 	 * Allocate a fixed-size extent of inodes.
@@ -217,9 +201,9 @@ xfs_ialloc_ag_alloc(
 	 */
 	if (isaligned && args.fsbno == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
-		args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+		args.agbno = be32_to_cpu(agi->agi_root);
 		args.fsbno = XFS_AGB_TO_FSB(args.mp,
-				INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno);
+				be32_to_cpu(agi->agi_seqno), args.agbno);
 		if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
 			args.mp->m_sb.sb_inoalignmt >=
 			XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
@@ -274,7 +258,7 @@ xfs_ialloc_ag_alloc(
 		/*
 		 * Get the block.
 		 */
-		d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+		d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
 				     args.agbno + (j * blks_per_cluster));
 		fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
 					 args.mp->m_bsize * blks_per_cluster,
@@ -294,17 +278,17 @@ xfs_ialloc_ag_alloc(
 		}
 		xfs_trans_inode_alloc_buf(tp, fbuf);
 	}
-	INT_MOD(agi->agi_count, ARCH_CONVERT, newlen);
-	INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen);
+	be32_add(&agi->agi_count, newlen);
+	be32_add(&agi->agi_freecount, newlen);
 	down_read(&args.mp->m_peraglock);
-	args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen;
+	args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
 	up_read(&args.mp->m_peraglock);
-	INT_SET(agi->agi_newino, ARCH_CONVERT, newino);
+	agi->agi_newino = cpu_to_be32(newino);
 	/*
 	 * Insert records describing the new inode chunk into the btree.
 	 */
 	cur = xfs_btree_init_cursor(args.mp, tp, agbp,
-			INT_GET(agi->agi_seqno, ARCH_CONVERT),
+			be32_to_cpu(agi->agi_seqno),
 			XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
 	for (thisino = newino;
 	     thisino < newino + newlen;
@@ -544,7 +528,7 @@ xfs_dialloc(
 			return 0;
 		}
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 	} else {
 		/*
 		 * Continue where we left off before.  In this case, we
@@ -552,12 +536,12 @@ xfs_dialloc(
 		 */
 		agbp = *IO_agbp;
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
-		ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
 	}
 	mp = tp->t_mountp;
 	agcount = mp->m_sb.sb_agcount;
-	agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+	agno = be32_to_cpu(agi->agi_seqno);
 	tagno = agno;
 	pagno = XFS_INO_TO_AGNO(mp, parent);
 	pagino = XFS_INO_TO_AGINO(mp, parent);
@@ -605,7 +589,7 @@ xfs_dialloc(
 				 * can commit the current transaction and call
 				 * us again where we left off.
 				 */
-				ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
 				*alloc_done = B_TRUE;
 				*IO_agbp = agbp;
 				*inop = NULLFSINO;
@@ -636,7 +620,7 @@ nextag:
 		if (error)
 			goto nextag;
 		agi = XFS_BUF_TO_AGI(agbp);
-		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 	}
 	/*
 	 * Here with an allocation group that has a free inode.
@@ -645,14 +629,14 @@ nextag:
 	 */
 	agno = tagno;
 	*IO_agbp = NULL;
-	cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+	cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno),
 				    XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
 	/*
 	 * If pagino is 0 (this is the root inode allocation) use newino.
 	 * This must work because we've just allocated some.
 	 */
 	if (!pagino)
-		pagino = INT_GET(agi->agi_newino, ARCH_CONVERT);
+		pagino = be32_to_cpu(agi->agi_newino);
 #ifdef DEBUG
 	if (cur->bc_nlevels == 1) {
 		int	freecount = 0;
@@ -670,7 +654,7 @@ nextag:
 				goto error0;
 		} while (i == 1);
 
-		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
 		       XFS_FORCED_SHUTDOWN(mp));
 	}
 #endif
@@ -829,9 +813,9 @@ nextag:
 	 * In a different a.g. from the parent.
 	 * See if the most recently allocated block has any free.
 	 */
-	else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) {
+	else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
 		if ((error = xfs_inobt_lookup_eq(cur,
-				INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i)))
+				be32_to_cpu(agi->agi_newino), 0, 0, &i)))
 			goto error0;
 		if (i == 1 &&
 		    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
@@ -878,7 +862,7 @@ nextag:
 	if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
 			rec.ir_free)))
 		goto error0;
-	INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1);
+	be32_add(&agi->agi_freecount, -1);
 	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 	down_read(&mp->m_peraglock);
 	mp->m_perag[tagno].pagi_freecount--;
@@ -898,7 +882,7 @@ nextag:
 			if ((error = xfs_inobt_increment(cur, 0, &i)))
 				goto error0;
 		} while (i == 1);
-		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
 		       XFS_FORCED_SHUTDOWN(mp));
 	}
 #endif
@@ -957,8 +941,11 @@ xfs_difree(
 	agino = XFS_INO_TO_AGINO(mp, inode);
 	if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
 		cmn_err(CE_WARN,
-			"xfs_difree: inode != XFS_AGINO_TO_INO() (%d != %d) on %s.  Returning EINVAL.",
-			inode, XFS_AGINO_TO_INO(mp, agno, agino), mp->m_fsname);
+			"xfs_difree: inode != XFS_AGINO_TO_INO() "
+			"(%llu != %llu) on %s.  Returning EINVAL.",
+			(unsigned long long)inode,
+			(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
+			mp->m_fsname);
 		ASSERT(0);
 		return XFS_ERROR(EINVAL);
 	}
@@ -983,8 +970,8 @@ xfs_difree(
 		return error;
 	}
 	agi = XFS_BUF_TO_AGI(agbp);
-	ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
-	ASSERT(agbno < INT_GET(agi->agi_length, ARCH_CONVERT));
+	ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+	ASSERT(agbno < be32_to_cpu(agi->agi_length));
 	/*
 	 * Initialize the cursor.
 	 */
@@ -1006,7 +993,7 @@ xfs_difree(
 					goto error0;
 			}
 		} while (i == 1);
-		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
 		       XFS_FORCED_SHUTDOWN(mp));
 	}
 #endif
@@ -1055,8 +1042,8 @@ xfs_difree(
 		 * to be freed when the transaction is committed.
 		 */
 		ilen = XFS_IALLOC_INODES(mp);
-		INT_MOD(agi->agi_count, ARCH_CONVERT, -ilen);
-		INT_MOD(agi->agi_freecount, ARCH_CONVERT, -(ilen - 1));
+		be32_add(&agi->agi_count, -ilen);
+		be32_add(&agi->agi_freecount, -(ilen - 1));
 		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
 		down_read(&mp->m_peraglock);
 		mp->m_perag[agno].pagi_freecount -= ilen - 1;
@@ -1085,7 +1072,7 @@ xfs_difree(
 		/* 
 		 * Change the inode free counts and log the ag/sb changes.
 		 */
-		INT_MOD(agi->agi_freecount, ARCH_CONVERT, 1);
+		be32_add(&agi->agi_freecount, 1);
 		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 		down_read(&mp->m_peraglock);
 		mp->m_perag[agno].pagi_freecount++;
@@ -1111,7 +1098,7 @@ xfs_difree(
 					goto error0;
 			}
 		} while (i == 1);
-		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
 		       XFS_FORCED_SHUTDOWN(mp));
 	}
 #endif
@@ -1320,7 +1307,7 @@ xfs_ialloc_log_agi(
 	xfs_agi_t		*agi;	/* allocation group header */
 
 	agi = XFS_BUF_TO_AGI(bp);
-	ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+	ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 #endif
 	/*
 	 * Compute byte offsets for the first and last fields.
@@ -1362,9 +1349,8 @@ xfs_ialloc_read_agi(
 	 */
 	agi = XFS_BUF_TO_AGI(bp);
 	agi_ok =
-		INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
-		XFS_AGI_GOOD_VERSION(
-			INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+		be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+		XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
 	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
 			XFS_RANDOM_IALLOC_READ_AGI))) {
 		XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
@@ -1374,16 +1360,15 @@ xfs_ialloc_read_agi(
 	}
 	pag = &mp->m_perag[agno];
 	if (!pag->pagi_init) {
-		pag->pagi_freecount = INT_GET(agi->agi_freecount, ARCH_CONVERT);
+		pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
 		pag->pagi_init = 1;
 	} else {
 		/*
 		 * It's possible for these to be out of sync if
 		 * we are in the middle of a forced shutdown.
 		 */
-		ASSERT(pag->pagi_freecount ==
-				INT_GET(agi->agi_freecount, ARCH_CONVERT)
-			|| XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
+			XFS_FORCED_SHUTDOWN(mp));
 	}
 
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index db6d0015cec..7f5debe1acb 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_IALLOC_H__
 #define	__XFS_IALLOC_H__
@@ -40,18 +26,8 @@ struct xfs_trans;
 /*
  * Allocation parameters for inode allocation.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_INODES)
-int xfs_ialloc_inodes(struct xfs_mount *mp);
-#define	XFS_IALLOC_INODES(mp)	xfs_ialloc_inodes(mp)
-#else
-#define	XFS_IALLOC_INODES(mp)	((mp)->m_ialloc_inos)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_BLOCKS)
-xfs_extlen_t xfs_ialloc_blocks(struct xfs_mount *mp);
-#define	XFS_IALLOC_BLOCKS(mp)	xfs_ialloc_blocks(mp)
-#else
-#define	XFS_IALLOC_BLOCKS(mp)	((mp)->m_ialloc_blks)
-#endif
+#define	XFS_IALLOC_INODES(mp)	(mp)->m_ialloc_inos
+#define	XFS_IALLOC_BLOCKS(mp)	(mp)->m_ialloc_blks
 
 /*
  * For small block file systems, move inodes in clusters of this size.
@@ -67,31 +43,25 @@ xfs_extlen_t xfs_ialloc_blocks(struct xfs_mount *mp);
 /*
  * Make an inode pointer out of the buffer/offset.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MAKE_IPTR)
-struct xfs_dinode *xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o);
 #define	XFS_MAKE_IPTR(mp,b,o)		xfs_make_iptr(mp,b,o)
-#else
-#define	XFS_MAKE_IPTR(mp,b,o) \
-	((xfs_dinode_t *)(xfs_buf_offset(b, (o) << (mp)->m_sb.sb_inodelog)))
-#endif
+static inline struct xfs_dinode *
+xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
+{
+	return (xfs_dinode_t *)
+		(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
+}
 
 /*
  * Find a free (set) bit in the inode bitmask.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_FIND_FREE)
-int xfs_ialloc_find_free(xfs_inofree_t *fp);
 #define	XFS_IALLOC_FIND_FREE(fp)	xfs_ialloc_find_free(fp)
-#else
-#define	XFS_IALLOC_FIND_FREE(fp)	xfs_lowbit64(*(fp))
-#endif
+static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
+{
+	return xfs_lowbit64(*fp);
+}
 
 
 #ifdef __KERNEL__
-
-/*
- * Prototypes for visible xfs_ialloc.c routines.
- */
-
 /*
  * Allocate an inode on disk.
  * Mode is used to tell whether the new inode will need space, and whether
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 2d4daecec99..60c65683462 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -1,63 +1,46 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 
-/*
- * Inode allocation management for XFS.
- */
-
-/*
- * Prototypes for internal functions.
- */
-
 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
@@ -70,10 +53,6 @@ STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
 
 /*
- * Internal functions.
- */
-
-/*
  * Single level of the xfs_inobt_delete record deletion routine.
  * Delete record pointed to by cur/level.
  * Remove the record from its block then rebalance the tree.
@@ -139,7 +118,7 @@ xfs_inobt_delrec(
 	 * Fail if we're off the end of the block.
 	 */
 
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 	if (ptr > numrecs) {
 		*stat = 0;
 		return 0;
@@ -154,7 +133,7 @@ xfs_inobt_delrec(
 		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
 #ifdef DEBUG
 		for (i = ptr; i < numrecs; i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)))
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
 				return error;
 		}
 #endif
@@ -191,7 +170,7 @@ xfs_inobt_delrec(
 	 * Decrement and log the number of entries in the block.
 	 */
 	numrecs--;
-	INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+	block->bb_numrecs = cpu_to_be16(numrecs);
 	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
 	/*
 	 * Is this the root level?  If so, we're almost done.
@@ -210,9 +189,9 @@ xfs_inobt_delrec(
 			 * pp is still set to the first pointer in the block.
 			 * Make it the new root of the btree.
 			 */
-			bno = INT_GET(agi->agi_root, ARCH_CONVERT);
+			bno = be32_to_cpu(agi->agi_root);
 			agi->agi_root = *pp;
-			INT_MOD(agi->agi_level, ARCH_CONVERT, -1);
+			be32_add(&agi->agi_level, -1);
 			/*
 			 * Free the block.
 			 */
@@ -255,8 +234,8 @@ xfs_inobt_delrec(
 	 * tree balanced.  Look at the left and right sibling blocks to
 	 * see if we can re-balance by moving only one record.
 	 */
-	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
-	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+	rbno = be32_to_cpu(block->bb_rightsib);
+	lbno = be32_to_cpu(block->bb_leftsib);
 	bno = NULLAGBLOCK;
 	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
 	/*
@@ -293,18 +272,18 @@ xfs_inobt_delrec(
 		/*
 		 * Grab the current block number, for future use.
 		 */
-		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		bno = be32_to_cpu(right->bb_leftsib);
 		/*
 		 * If right block is full enough so that removing one entry
 		 * won't make it too empty, and left-shifting an entry out
 		 * of right to us works, we're done.
 		 */
-		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+		if (be16_to_cpu(right->bb_numrecs) - 1 >=
 		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
 			if ((error = xfs_inobt_lshift(tcur, level, &i)))
 				goto error0;
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_INOBT_BLOCK_MINRECS(level, cur));
 				xfs_btree_del_cursor(tcur,
 						     XFS_BTREE_NOERROR);
@@ -321,7 +300,7 @@ xfs_inobt_delrec(
 		 * future reference, and fix up the temp cursor to point
 		 * to our block again (last record).
 		 */
-		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		rrecs = be16_to_cpu(right->bb_numrecs);
 		if (lbno != NULLAGBLOCK) {
 			xfs_btree_firstrec(tcur, level);
 			if ((error = xfs_inobt_decrement(tcur, level, &i)))
@@ -353,18 +332,18 @@ xfs_inobt_delrec(
 		/*
 		 * Grab the current block number, for future use.
 		 */
-		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		bno = be32_to_cpu(left->bb_rightsib);
 		/*
 		 * If left block is full enough so that removing one entry
 		 * won't make it too empty, and right-shifting an entry out
 		 * of left to us works, we're done.
 		 */
-		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+		if (be16_to_cpu(left->bb_numrecs) - 1 >=
 		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
 			if ((error = xfs_inobt_rshift(tcur, level, &i)))
 				goto error0;
 			if (i) {
-				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				ASSERT(be16_to_cpu(block->bb_numrecs) >=
 				       XFS_INOBT_BLOCK_MINRECS(level, cur));
 				xfs_btree_del_cursor(tcur,
 						     XFS_BTREE_NOERROR);
@@ -378,7 +357,7 @@ xfs_inobt_delrec(
 		 * Otherwise, grab the number of records in right for
 		 * future reference.
 		 */
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 	}
 	/*
 	 * Delete the temp cursor, we're done with it.
@@ -399,14 +378,14 @@ xfs_inobt_delrec(
 		 */
 		rbno = bno;
 		right = block;
-		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		rrecs = be16_to_cpu(right->bb_numrecs);
 		rbp = bp;
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 				cur->bc_private.i.agno, lbno, 0, &lbp,
 				XFS_INO_BTREE_REF)))
 			return error;
 		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
 			return error;
 	}
@@ -421,14 +400,14 @@ xfs_inobt_delrec(
 		 */
 		lbno = bno;
 		left = block;
-		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		lrecs = be16_to_cpu(left->bb_numrecs);
 		lbp = bp;
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 				cur->bc_private.i.agno, rbno, 0, &rbp,
 				XFS_INO_BTREE_REF)))
 			return error;
 		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
-		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		rrecs = be16_to_cpu(right->bb_numrecs);
 		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
 			return error;
 	}
@@ -456,7 +435,7 @@ xfs_inobt_delrec(
 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
 		for (i = 0; i < rrecs; i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
 				return error;
 		}
 #endif
@@ -492,7 +471,7 @@ xfs_inobt_delrec(
 	 * Fix up the number of records in the surviving block.
 	 */
 	lrecs += rrecs;
-	INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
+	left->bb_numrecs = cpu_to_be16(lrecs);
 	/*
 	 * Fix up the right block pointer in the surviving block, and log it.
 	 */
@@ -502,18 +481,18 @@ xfs_inobt_delrec(
 	 * If there is a right sibling now, make it point to the
 	 * remaining block.
 	 */
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
 		xfs_inobt_block_t	*rrblock;
 		xfs_buf_t		*rrbp;
 
 		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
-				cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+				cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0,
 				&rrbp, XFS_INO_BTREE_REF)))
 			return error;
 		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
 			return error;
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+		rrblock->bb_leftsib = cpu_to_be32(lbno);
 		xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
 	}
 	/*
@@ -572,6 +551,13 @@ xfs_inobt_insrec(
 	xfs_inobt_rec_t		*rp=NULL;	/* pointer to btree records */
 
 	/*
+	 * GCC doesn't understand the (arguably complex) control flow in
+	 * this function and complains about uninitialized structure fields
+	 * without this.
+	 */
+	memset(&nrec, 0, sizeof(nrec));
+
+	/*
 	 * If we made it to the root level, allocate a new root block
 	 * and we're done.
 	 */
@@ -598,7 +584,7 @@ xfs_inobt_insrec(
 	 */
 	bp = cur->bc_bufs[level];
 	block = XFS_BUF_TO_INOBT_BLOCK(bp);
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 #ifdef DEBUG
 	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
 		return error;
@@ -672,7 +658,7 @@ xfs_inobt_insrec(
 	 * At this point we know there's room for our new entry in the block
 	 * we're pointing at.
 	 */
-	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	numrecs = be16_to_cpu(block->bb_numrecs);
 	if (level > 0) {
 		/*
 		 * It's a non-leaf entry.  Make a hole for the new data
@@ -682,7 +668,7 @@ xfs_inobt_insrec(
 		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
 #ifdef DEBUG
 		for (i = numrecs; i >= ptr; i--) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
 				return error;
 		}
 #endif
@@ -698,9 +684,9 @@ xfs_inobt_insrec(
 			return error;
 #endif
 		kp[ptr - 1] = key; /* INT_: struct copy */
-		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+		pp[ptr - 1] = cpu_to_be32(*bnop);
 		numrecs++;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+		block->bb_numrecs = cpu_to_be16(numrecs);
 		xfs_inobt_log_keys(cur, bp, ptr, numrecs);
 		xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
 	} else {
@@ -716,7 +702,7 @@ xfs_inobt_insrec(
 		 */
 		rp[ptr - 1] = *recp; /* INT_: struct copy */
 		numrecs++;
-		INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
+		block->bb_numrecs = cpu_to_be16(numrecs);
 		xfs_inobt_log_recs(cur, bp, ptr, numrecs);
 	}
 	/*
@@ -871,8 +857,8 @@ xfs_inobt_lookup(
 		xfs_agi_t	*agi;	/* a.g. inode header */
 
 		agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
-		agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
-		agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+		agno = be32_to_cpu(agi->agi_seqno);
+		agbno = be32_to_cpu(agi->agi_root);
 	}
 	/*
 	 * Iterate over each level in the btree, starting at the root.
@@ -939,7 +925,7 @@ xfs_inobt_lookup(
 			 * Set low and high entry numbers, 1-based.
 			 */
 			low = 1;
-			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+			if (!(high = be16_to_cpu(block->bb_numrecs))) {
 				/*
 				 * If the block is empty, the tree must
 				 * be an empty leaf.
@@ -1006,7 +992,7 @@ xfs_inobt_lookup(
 			 */
 			if (diff > 0 && --keyno < 1)
 				keyno = 1;
-			agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+			agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
 #ifdef DEBUG
 			if ((error = xfs_btree_check_sptr(cur, agbno, level)))
 				return error;
@@ -1025,8 +1011,8 @@ xfs_inobt_lookup(
 		 * not the last block, we're in the wrong block.
 		 */
 		if (dir == XFS_LOOKUP_GE &&
-		    keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
-		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		    keyno > be16_to_cpu(block->bb_numrecs) &&
+		    be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
 			int	i;
 
 			cur->bc_ptrs[0] = keyno;
@@ -1043,7 +1029,7 @@ xfs_inobt_lookup(
 	/*
 	 * Return if we succeeded or not.
 	 */
-	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+	if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
 		*stat = 0;
 	else
 		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
@@ -1089,7 +1075,7 @@ xfs_inobt_lshift(
 	/*
 	 * If we've got no left sibling then we can't shift an entry left.
 	 */
-	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1105,8 +1091,8 @@ xfs_inobt_lshift(
 	 * Set up the left neighbor as "left".
 	 */
 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
-			XFS_INO_BTREE_REF)))
+			cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib),
+			0, &lbp, XFS_INO_BTREE_REF)))
 		return error;
 	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
 	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
@@ -1114,11 +1100,11 @@ xfs_inobt_lshift(
 	/*
 	 * If it's full, it can't take another entry.
 	 */
-	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+	if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
 		*stat = 0;
 		return 0;
 	}
-	nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	nrec = be16_to_cpu(left->bb_numrecs) + 1;
 	/*
 	 * If non-leaf, copy a key and a ptr to the left block.
 	 */
@@ -1130,7 +1116,7 @@ xfs_inobt_lshift(
 		lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)))
+		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
 			return error;
 #endif
 		*lpp = *rpp; /* INT_: no-change copy */
@@ -1148,7 +1134,7 @@ xfs_inobt_lshift(
 	/*
 	 * Bump and log left's numrecs, decrement and log right's numrecs.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+	be16_add(&left->bb_numrecs, 1);
 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
 #ifdef DEBUG
 	if (level > 0)
@@ -1156,26 +1142,26 @@ xfs_inobt_lshift(
 	else
 		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
 #endif
-	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&right->bb_numrecs, -1);
 	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Slide the contents of right down one entry.
 	 */
 	if (level > 0) {
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
 					level)))
 				return error;
 		}
 #endif
-		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
-		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
+		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 	} else {
-		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
-		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
+		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
 		rkp = &key;
 	}
@@ -1227,7 +1213,7 @@ xfs_inobt_newroot(
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
 	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
-		INT_GET(agi->agi_root, ARCH_CONVERT));
+		be32_to_cpu(agi->agi_root));
 	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
 		args.isfl = args.userdata = args.minalignslop = 0;
 	args.minlen = args.maxlen = args.prod = 1;
@@ -1247,8 +1233,8 @@ xfs_inobt_newroot(
 	/*
 	 * Set the root data in the a.g. inode structure.
 	 */
-	INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno);
-	INT_MOD(agi->agi_level, ARCH_CONVERT, 1);
+	agi->agi_root = cpu_to_be32(args.agbno);
+	be32_add(&agi->agi_level, 1);
 	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
 		XFS_AGI_ROOT | XFS_AGI_LEVEL);
 	/*
@@ -1263,14 +1249,14 @@ xfs_inobt_newroot(
 	if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
 		return error;
 #endif
-	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
 		/*
 		 * Our block is left, pick up the right block.
 		 */
 		lbp = bp;
 		lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
 		left = block;
-		rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		rbno = be32_to_cpu(left->bb_rightsib);
 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
 				rbno, 0, &rbp, XFS_INO_BTREE_REF)))
 			return error;
@@ -1287,7 +1273,7 @@ xfs_inobt_newroot(
 		rbp = bp;
 		rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
 		right = block;
-		lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		lbno = be32_to_cpu(right->bb_leftsib);
 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
 				lbno, 0, &lbp, XFS_INO_BTREE_REF)))
 			return error;
@@ -1301,18 +1287,18 @@ xfs_inobt_newroot(
 	/*
 	 * Fill in the new block's btree header and log it.
 	 */
-	INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
-	INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
-	INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
-	INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
-	INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+	new->bb_level = cpu_to_be16(cur->bc_nlevels);
+	new->bb_numrecs = cpu_to_be16(2);
+	new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+	new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 	xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
 	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
 	/*
 	 * Fill in the key data in the new root.
 	 */
 	kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
-	if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+	if (be16_to_cpu(left->bb_level) > 0) {
 		kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
 		kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
 	} else {
@@ -1326,8 +1312,8 @@ xfs_inobt_newroot(
 	 * Fill in the pointer data in the new root.
 	 */
 	pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
-	INT_SET(pp[0], ARCH_CONVERT, lbno);
-	INT_SET(pp[1], ARCH_CONVERT, rbno);
+	pp[0] = cpu_to_be32(lbno);
+	pp[1] = cpu_to_be32(rbno);
 	xfs_inobt_log_ptrs(cur, nbp, 1, 2);
 	/*
 	 * Fix up the cursor.
@@ -1376,7 +1362,7 @@ xfs_inobt_rshift(
 	/*
 	 * If we've got no right sibling then we can't shift an entry right.
 	 */
-	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1384,7 +1370,7 @@ xfs_inobt_rshift(
 	 * If the cursor entry is the one that would be moved, don't
 	 * do it... it's too complicated.
 	 */
-	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+	if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
 		*stat = 0;
 		return 0;
 	}
@@ -1392,8 +1378,8 @@ xfs_inobt_rshift(
 	 * Set up the right neighbor as "right".
 	 */
 	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
-			XFS_INO_BTREE_REF)))
+			cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib),
+			0, &rbp, XFS_INO_BTREE_REF)))
 		return error;
 	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
 	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
@@ -1401,7 +1387,7 @@ xfs_inobt_rshift(
 	/*
 	 * If it's full, it can't take another entry.
 	 */
-	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+	if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
 		*stat = 0;
 		return 0;
 	}
@@ -1410,41 +1396,41 @@ xfs_inobt_rshift(
 	 * copy the last left block entry to the hole.
 	 */
 	if (level > 0) {
-		lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
-		lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
+		lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
+		for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
 				return error;
 		}
 #endif
-		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
 #ifdef DEBUG
-		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
+		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
 			return error;
 #endif
 		*rkp = *lkp; /* INT_: no change copy */
 		*rpp = *lpp; /* INT_: no change copy */
-		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
-		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
+		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
 	} else {
-		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
 		*rrp = *lrp;
-		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
 		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
 		rkp = &key;
 	}
 	/*
 	 * Decrement and log left's numrecs, bump and log right's numrecs.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	be16_add(&left->bb_numrecs, -1);
 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	be16_add(&right->bb_numrecs, 1);
 #ifdef DEBUG
 	if (level > 0)
 		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
@@ -1536,17 +1522,17 @@ xfs_inobt_split(
 	/*
 	 * Fill in the btree header for the new block.
 	 */
-	INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
-	right->bb_level = left->bb_level; /* INT_: direct copy */
-	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+	right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+	right->bb_level = left->bb_level;
+	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
 	/*
 	 * Make sure that if there's an odd number of entries now, that
 	 * each new block will have the same number of entries.
 	 */
-	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
-	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
-		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
-	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
+	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
+		be16_add(&right->bb_numrecs, 1);
+	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	/*
 	 * For non-leaf blocks, copy keys and addresses over to the new block.
 	 */
@@ -1556,15 +1542,15 @@ xfs_inobt_split(
 		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
 		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
 #ifdef DEBUG
-		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
-			if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
+		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
 				return error;
 		}
 #endif
-		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
-		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
-		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
-		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
+		memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
+		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
+		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		*keyp = *rkp;
 	}
 	/*
@@ -1573,36 +1559,36 @@ xfs_inobt_split(
 	else {
 		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
 		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
-		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
-		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
+		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
 	}
 	/*
 	 * Find the left block number by looking in the buffer.
 	 * Adjust numrecs, sibling pointers.
 	 */
-	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
-	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
-	INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno);
-	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	right->bb_rightsib = left->bb_rightsib;
+	left->bb_rightsib = cpu_to_be32(args.agbno);
+	right->bb_leftsib = cpu_to_be32(lbno);
 	xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
 	xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 	/*
 	 * If there's a block to the new block's right, make that block
 	 * point back to right instead of to left.
 	 */
-	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+	if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
 		xfs_inobt_block_t	*rrblock;	/* rr btree block */
 		xfs_buf_t		*rrbp;		/* buffer for rrblock */
 
 		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
-				INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+				be32_to_cpu(right->bb_rightsib), 0, &rrbp,
 				XFS_INO_BTREE_REF)))
 			return error;
 		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
 		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
 			return error;
-		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno);
+		rrblock->bb_leftsib = cpu_to_be32(args.agbno);
 		xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
 	}
 	/*
@@ -1610,9 +1596,9 @@ xfs_inobt_split(
 	 * If it's just pointing past the last entry in left, then we'll
 	 * insert there, so don't change anything in that case.
 	 */
-	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+	if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
 		xfs_btree_setbuf(cur, level, rbp);
-		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
 	}
 	/*
 	 * If there are more levels, we'll need another cursor which refers
@@ -1710,7 +1696,7 @@ xfs_inobt_decrement(
 	/*
 	 * If we just went off the left edge of the tree, return failure.
 	 */
-	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1739,7 +1725,7 @@ xfs_inobt_decrement(
 		xfs_agblock_t	agbno;	/* block number of btree block */
 		xfs_buf_t	*bp;	/* buffer containing btree block */
 
-		agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
 				cur->bc_private.i.agno, agbno, 0, &bp,
 				XFS_INO_BTREE_REF)))
@@ -1749,7 +1735,7 @@ xfs_inobt_decrement(
 		block = XFS_BUF_TO_INOBT_BLOCK(bp);
 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
 			return error;
-		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
 	}
 	*stat = 1;
 	return 0;
@@ -1821,7 +1807,7 @@ xfs_inobt_get_rec(
 	/*
 	 * Off the right end or left end, return failure.
 	 */
-	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+	if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
 		*stat = 0;
 		return 0;
 	}
@@ -1869,14 +1855,14 @@ xfs_inobt_increment(
 	 * Increment the ptr at this level.  If we're still in the block
 	 * then we're done.
 	 */
-	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+	if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
 		*stat = 1;
 		return 0;
 	}
 	/*
 	 * If we just went off the right edge of the tree, return failure.
 	 */
-	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+	if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
 		*stat = 0;
 		return 0;
 	}
@@ -1891,7 +1877,7 @@ xfs_inobt_increment(
 		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
 			return error;
 #endif
-		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
 			break;
 		/*
 		 * Read-ahead the right block, we're going to read it
@@ -1911,7 +1897,7 @@ xfs_inobt_increment(
 	     lev > level; ) {
 		xfs_agblock_t	agbno;	/* block number of btree block */
 
-		agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
 		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
 				cur->bc_private.i.agno, agbno, 0, &bp,
 				XFS_INO_BTREE_REF)))
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 44be188674a..ae3904cb1ee 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_IALLOC_BTREE_H__
 #define	__XFS_IALLOC_BTREE_H__
@@ -51,14 +37,12 @@ typedef	__uint64_t	xfs_inofree_t;
 #define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3)
 #define	XFS_INOBT_ALL_FREE	((xfs_inofree_t)-1)
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASKN)
-xfs_inofree_t xfs_inobt_maskn(int i, int n);
 #define	XFS_INOBT_MASKN(i,n)		xfs_inobt_maskn(i,n)
-#else
-#define	XFS_INOBT_MASKN(i,n)	\
-	((((n) >= XFS_INODES_PER_CHUNK ? \
-		(xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i))
-#endif
+static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
+{
+	return (((n) >= XFS_INODES_PER_CHUNK ? \
+		(xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i);
+}
 
 /*
  * Data record structure
@@ -78,241 +62,116 @@ typedef struct xfs_inobt_key
 	xfs_agino_t	ir_startino;	/* starting inode number */
 } xfs_inobt_key_t;
 
-typedef xfs_agblock_t xfs_inobt_ptr_t;	/* btree pointer type */
-					/* btree block header type */
+/* btree pointer type */
+typedef __be32 xfs_inobt_ptr_t;
+
+/* btree block header type */
 typedef	struct xfs_btree_sblock xfs_inobt_block_t;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_INOBT_BLOCK)
-xfs_inobt_block_t *xfs_buf_to_inobt_block(struct xfs_buf *bp);
-#define	XFS_BUF_TO_INOBT_BLOCK(bp)	xfs_buf_to_inobt_block(bp)
-#else
-#define	XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)(XFS_BUF_PTR(bp)))
-#endif
+#define	XFS_BUF_TO_INOBT_BLOCK(bp)	((xfs_inobt_block_t *)XFS_BUF_PTR(bp))
 
 /*
  * Bit manipulations for ir_free.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASK)
-xfs_inofree_t xfs_inobt_mask(int i);
-#define	XFS_INOBT_MASK(i)		xfs_inobt_mask(i)
-#else
 #define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE)
-int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i);
-#define	XFS_INOBT_IS_FREE(rp,i)		xfs_inobt_is_free(rp,i)
-#define	XFS_INOBT_IS_FREE_DISK(rp,i)	xfs_inobt_is_free_disk(rp,i)
-#else
-#define	XFS_INOBT_IS_FREE(rp,i)	\
-	(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
-#define XFS_INOBT_IS_FREE_DISK(rp,i) \
-	((INT_GET((rp)->ir_free, ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE)
-void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i);
-#define	XFS_INOBT_SET_FREE(rp,i)	xfs_inobt_set_free(rp,i)
-#else
+#define	XFS_INOBT_IS_FREE(rp,i)		\
+		(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
+#define	XFS_INOBT_IS_FREE_DISK(rp,i)	\
+		((INT_GET((rp)->ir_free,ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0)
 #define	XFS_INOBT_SET_FREE(rp,i)	((rp)->ir_free |= XFS_INOBT_MASK(i))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_CLR_FREE)
-void xfs_inobt_clr_free(xfs_inobt_rec_t *rp, int i);
-#define	XFS_INOBT_CLR_FREE(rp,i)	xfs_inobt_clr_free(rp,i)
-#else
 #define	XFS_INOBT_CLR_FREE(rp,i)	((rp)->ir_free &= ~XFS_INOBT_MASK(i))
-#endif
 
 /*
  * Real block structures have a size equal to the disk block size.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_SIZE)
-int xfs_inobt_block_size(int lev, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_BLOCK_SIZE(lev,cur)	xfs_inobt_block_size(lev,cur)
-#else
 #define	XFS_INOBT_BLOCK_SIZE(lev,cur)	(1 << (cur)->bc_blocklog)
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MAXRECS)
-int xfs_inobt_block_maxrecs(int lev, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_BLOCK_MAXRECS(lev,cur)	xfs_inobt_block_maxrecs(lev,cur)
-#else
-#define	XFS_INOBT_BLOCK_MAXRECS(lev,cur)	\
-	((cur)->bc_mp->m_inobt_mxr[lev != 0])
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MINRECS)
-int xfs_inobt_block_minrecs(int lev, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_BLOCK_MINRECS(lev,cur)	xfs_inobt_block_minrecs(lev,cur)
-#else
-#define	XFS_INOBT_BLOCK_MINRECS(lev,cur)	\
-	((cur)->bc_mp->m_inobt_mnr[lev != 0])
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_LAST_REC)
-int xfs_inobt_is_last_rec(struct xfs_btree_cur *cur);
-#define	XFS_INOBT_IS_LAST_REC(cur)	xfs_inobt_is_last_rec(cur)
-#else
+#define	XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
+#define	XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
 #define	XFS_INOBT_IS_LAST_REC(cur)	\
-	((cur)->bc_ptrs[0] == \
-		INT_GET(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs, ARCH_CONVERT))
-#endif
+	((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs))
 
 /*
  * Maximum number of inode btree levels.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IN_MAXLEVELS)
-int xfs_in_maxlevels(struct xfs_mount *mp);
-#define	XFS_IN_MAXLEVELS(mp)		xfs_in_maxlevels(mp)
-#else
 #define	XFS_IN_MAXLEVELS(mp)		((mp)->m_in_maxlevels)
-#endif
 
 /*
  * block numbers in the AG.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IBT_BLOCK)
-xfs_agblock_t xfs_ibt_block(struct xfs_mount *mp);
-#define	XFS_IBT_BLOCK(mp)		xfs_ibt_block(mp)
-#else
-#define	XFS_IBT_BLOCK(mp)	((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_PREALLOC_BLOCKS)
-xfs_agblock_t xfs_prealloc_blocks(struct xfs_mount *mp);
-#define	XFS_PREALLOC_BLOCKS(mp)		xfs_prealloc_blocks(mp)
-#else
-#define	XFS_PREALLOC_BLOCKS(mp)	((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
-#endif
+#define	XFS_IBT_BLOCK(mp)		((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#define	XFS_PREALLOC_BLOCKS(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
 
 /*
  * Record, key, and pointer address macros for btree blocks.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_REC_ADDR)
-xfs_inobt_rec_t *
-xfs_inobt_rec_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_REC_ADDR(bb,i,cur)	xfs_inobt_rec_addr(bb,i,cur)
-#else
-#define	XFS_INOBT_REC_ADDR(bb,i,cur)	\
-	XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, i, \
-		XFS_INOBT_BLOCK_MAXRECS(0, cur))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_KEY_ADDR)
-xfs_inobt_key_t *
-xfs_inobt_key_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_KEY_ADDR(bb,i,cur)	xfs_inobt_key_addr(bb,i,cur)
-#else
-#define	XFS_INOBT_KEY_ADDR(bb,i,cur)	\
-	XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
-		XFS_INOBT_BLOCK_MAXRECS(1, cur))
-#endif
+#define XFS_INOBT_REC_ADDR(bb,i,cur) \
+	(XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, \
+				i, XFS_INOBT_BLOCK_MAXRECS(0, cur)))
+#define	XFS_INOBT_KEY_ADDR(bb,i,cur) \
+	(XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \
+				i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_PTR_ADDR)
-xfs_inobt_ptr_t *
-xfs_inobt_ptr_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
-#define	XFS_INOBT_PTR_ADDR(bb,i,cur)	xfs_inobt_ptr_addr(bb,i,cur)
-#else
-#define	XFS_INOBT_PTR_ADDR(bb,i,cur)	\
-	XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
-		XFS_INOBT_BLOCK_MAXRECS(1, cur))
-#endif
-
-/*
- * Prototypes for externally visible routines.
- */
+#define	XFS_INOBT_PTR_ADDR(bb,i,cur) \
+	(XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \
+				i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
 
 /*
  * Decrement cursor by one record at the level.
  * For nonzero levels the leaf-ward information is untouched.
  */
-int					/* error */
-xfs_inobt_decrement(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			level,	/* level in btree, 0 is leaf */
-	int			*stat); /* success/failure */
+extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat);
 
 /*
  * Delete the record pointed to by cur.
  * The cursor refers to the place where the record was (could be inserted)
  * when the operation returns.
  */
-int					/* error */
-xfs_inobt_delete(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat);
 
 /*
  * Get the data from the pointed-to record.
  */
-int					/* error */
-xfs_inobt_get_rec(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agino_t		*ino,	/* output: starting inode of chunk */
-	__int32_t		*fcnt,	/* output: number of free inodes */
-	xfs_inofree_t		*free,	/* output: free inode mask */
-	int			*stat);	/* output: success/failure */
+extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
+			     __int32_t *fcnt, xfs_inofree_t *free, int *stat);
 
 /*
  * Increment cursor by one record at the level.
  * For nonzero levels the leaf-ward information is untouched.
  */
-int					/* error */
-xfs_inobt_increment(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			level,	/* level in btree, 0 is leaf */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat);
 
 /*
  * Insert the current record at the point referenced by cur.
  * The cursor may be inconsistent on return if splits have been done.
  */
-int					/* error */
-xfs_inobt_insert(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat);
 
 /*
  * Lookup the record equal to ino in the btree given by cur.
  */
-int					/* error */
-xfs_inobt_lookup_eq(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agino_t		ino,	/* starting inode of chunk */
-	__int32_t		fcnt,	/* free inode count */
-	xfs_inofree_t		free,	/* free inode mask */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino,
+				__int32_t fcnt, xfs_inofree_t free, int *stat);
 
 /*
  * Lookup the first record greater than or equal to ino
  * in the btree given by cur.
  */
-int					/* error */
-xfs_inobt_lookup_ge(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agino_t		ino,	/* starting inode of chunk */
-	__int32_t		fcnt,	/* free inode count */
-	xfs_inofree_t		free,	/* free inode mask */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
+				__int32_t fcnt,	xfs_inofree_t free, int *stat);
 
 /*
  * Lookup the first record less than or equal to ino
  * in the btree given by cur.
  */
-int					/* error */
-xfs_inobt_lookup_le(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agino_t		ino,	/* starting inode of chunk */
-	__int32_t		fcnt,	/* free inode count */
-	xfs_inofree_t		free,	/* free inode mask */
-	int			*stat);	/* success/failure */
+extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
+				__int32_t fcnt,	xfs_inofree_t free, int *stat);
 
 /*
  * Update the record referred to by cur, to the value given
  * by [ino, fcnt, free].
  * This either works (return 0) or gets an EFSCORRUPTED error.
  */
-int					/* error */
-xfs_inobt_update(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_agino_t		ino,	/* starting inode of chunk */
-	__int32_t		fcnt,	/* free inode count */
-	xfs_inofree_t		free);	/* free inode mask */
+extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino,
+				__int32_t fcnt, xfs_inofree_t free);
 
 #endif	/* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d3da00045f2..fc19eedbd11 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -1,41 +1,26 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,19 +28,18 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_quota.h"
 #include "xfs_utils.h"
-#include "xfs_bit.h"
 
 /*
  * Initialize the inode hash table for the newly mounted file system.
@@ -505,17 +489,15 @@ xfs_iget(
 	vnode_t		*vp = NULL;
 	int		error;
 
-retry:
 	XFS_STATS_INC(xs_ig_attempts);
 
+retry:
 	if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
 		bhv_desc_t	*bdp;
 		xfs_inode_t	*ip;
-		int		newnode;
 
 		vp = LINVFS_GET_VP(inode);
 		if (inode->i_state & I_NEW) {
-inode_allocate:
 			vn_initialize(inode);
 			error = xfs_iget_core(vp, mp, tp, ino, flags,
 					lock_flags, ipp, bno);
@@ -526,32 +508,25 @@ inode_allocate:
 				iput(inode);
 			}
 		} else {
-			/* These are true if the inode is in inactive or
-			 * reclaim. The linux inode is about to go away,
-			 * wait for that path to finish, and try again.
+			/*
+			 * If the inode is not fully constructed due to
+			 * filehandle mistmatches wait for the inode to go
+			 * away and try again.
+			 *
+			 * iget_locked will call __wait_on_freeing_inode
+			 * to wait for the inode to go away.
 			 */
-			if (vp->v_flag & (VINACT | VRECLM)) {
-				vn_wait(vp);
+			if (is_bad_inode(inode) ||
+			    ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
+						  &xfs_vnodeops)) == NULL)) {
 				iput(inode);
+				delay(1);
 				goto retry;
 			}
 
-			if (is_bad_inode(inode)) {
-				iput(inode);
-				return EIO;
-			}
-
-			bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
-			if (bdp == NULL) {
-				XFS_STATS_INC(xs_ig_dup);
-				goto inode_allocate;
-			}
 			ip = XFS_BHVTOI(bdp);
 			if (lock_flags != 0)
 				xfs_ilock(ip, lock_flags);
-			newnode = (ip->i_d.di_mode == 0);
-			if (newnode)
-				xfs_iocore_inode_reinit(ip);
 			XFS_STATS_INC(xs_ig_found);
 			*ipp = ip;
 			error = 0;
diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h
index e385064a066..d3645000398 100644
--- a/fs/xfs/xfs_imap.h
+++ b/fs/xfs/xfs_imap.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_IMAP_H__
 #define	__XFS_IMAP_H__
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 34bdf590968..df0d4572d70 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1,40 +1,27 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_imap.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_sb.h"
@@ -43,24 +30,22 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_imap.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
-#include "xfs_bmap.h"
 #include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
 #include "xfs_rw.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 #include "xfs_utils.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
@@ -194,9 +179,10 @@ xfs_inotobp(
 	if ((imap.im_blkno + imap.im_len) >
 	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
 		cmn_err(CE_WARN,
-	"xfs_inotobp: inode number (%d + %d) maps to a block outside the bounds "
+	"xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
 	"of the file system %s.  Returning EINVAL.",
-			imap.im_blkno, imap.im_len,mp->m_fsname);
+			(unsigned long long)imap.im_blkno,
+			imap.im_len, mp->m_fsname);
 		return XFS_ERROR(EINVAL);
 	}
 
@@ -1128,7 +1114,6 @@ xfs_ialloc(
 	ASSERT(ip != NULL);
 
 	vp = XFS_ITOV(ip);
-	vp->v_type = IFTOVT(mode);
 	ip->i_d.di_mode = (__uint16_t)mode;
 	ip->i_d.di_onlink = 0;
 	ip->i_d.di_nlink = nlink;
@@ -1250,7 +1235,7 @@ xfs_ialloc(
 	 */
 	xfs_trans_log_inode(tp, ip, flags);
 
-	/* now that we have a v_type we can set Linux inode ops (& unlock) */
+	/* now that we have an i_mode  we can set Linux inode ops (& unlock) */
 	VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
 
 	*ipp = ip;
@@ -1879,8 +1864,8 @@ xfs_iunlink(
 	 */
 	agi = XFS_BUF_TO_AGI(agibp);
 	agi_ok =
-		INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
-		XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+		be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+		XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
 	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK,
 			XFS_RANDOM_IUNLINK))) {
 		XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi);
@@ -1895,9 +1880,9 @@ xfs_iunlink(
 	ASSERT(agino != 0);
 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
 	ASSERT(agi->agi_unlinked[bucket_index]);
-	ASSERT(INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != agino);
+	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
 
-	if (INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != NULLAGINO) {
+	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
 		/*
 		 * There is already another inode in the bucket we need
 		 * to add ourselves to.  Add us at the front of the list.
@@ -1924,7 +1909,7 @@ xfs_iunlink(
 	 * Point the bucket head pointer at the inode being inserted.
 	 */
 	ASSERT(agino != 0);
-	INT_SET(agi->agi_unlinked[bucket_index], ARCH_CONVERT, agino);
+	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
 	offset = offsetof(xfs_agi_t, agi_unlinked) +
 		(sizeof(xfs_agino_t) * bucket_index);
 	xfs_trans_log_buf(tp, agibp, offset,
@@ -1982,8 +1967,8 @@ xfs_iunlink_remove(
 	 */
 	agi = XFS_BUF_TO_AGI(agibp);
 	agi_ok =
-		INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
-		XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+		be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+		XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
 	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE,
 			XFS_RANDOM_IUNLINK_REMOVE))) {
 		XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW,
@@ -2001,10 +1986,10 @@ xfs_iunlink_remove(
 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	ASSERT(agino != 0);
 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
-	ASSERT(INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != NULLAGINO);
+	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
 	ASSERT(agi->agi_unlinked[bucket_index]);
 
-	if (INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) == agino) {
+	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
 		/*
 		 * We're at the head of the list.  Get the inode's
 		 * on-disk buffer to see if there is anyone after us
@@ -2038,7 +2023,7 @@ xfs_iunlink_remove(
 		 */
 		ASSERT(next_agino != 0);
 		ASSERT(next_agino != agino);
-		INT_SET(agi->agi_unlinked[bucket_index], ARCH_CONVERT, next_agino);
+		agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
 		offset = offsetof(xfs_agi_t, agi_unlinked) +
 			(sizeof(xfs_agino_t) * bucket_index);
 		xfs_trans_log_buf(tp, agibp, offset,
@@ -2047,7 +2032,7 @@ xfs_iunlink_remove(
 		/*
 		 * We need to search the list for the inode being freed.
 		 */
-		next_agino = INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT);
+		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
 		last_ibp = NULL;
 		while (next_agino != agino) {
 			/*
@@ -3688,73 +3673,6 @@ xfs_iroundup(
 	return( 0 );
 }
 
-/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- *
- * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
- *		with XFS_ICHGTIME_ACC to be sure that access time
- *		update will take.  Calling first with XFS_ICHGTIME_ACC
- *		and then XFS_ICHGTIME_MOD may fail to modify the access
- *		timestamp if the filesystem is mounted noacctm.
- */
-void
-xfs_ichgtime(xfs_inode_t *ip,
-	     int flags)
-{
-	timespec_t	tv;
-	vnode_t		*vp = XFS_ITOV(ip);
-	struct inode	*inode = LINVFS_GET_IP(vp);
-
-	/*
-	 * We're not supposed to change timestamps in readonly-mounted
-	 * filesystems.  Throw it away if anyone asks us.
-	 */
-	if (unlikely(vp->v_vfsp->vfs_flag & VFS_RDONLY))
-		return;
-
-	/*
-	 * Don't update access timestamps on reads if mounted "noatime"
-	 * Throw it away if anyone asks us.
-	 */
-	if ((ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-	    ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG))
-			== XFS_ICHGTIME_ACC))
-		return;
-
-	nanotime(&tv);
-	if (flags & XFS_ICHGTIME_MOD) {
-		VN_MTIMESET(vp, &tv);
-		ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
-		ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
-	}
-	if (flags & XFS_ICHGTIME_ACC) {
-		VN_ATIMESET(vp, &tv);
-		ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
-		ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
-	}
-	if (flags & XFS_ICHGTIME_CHG) {
-		VN_CTIMESET(vp, &tv);
-		ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
-		ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
-	}
-
-	/*
-	 * We update the i_update_core field _after_ changing
-	 * the timestamps in order to coordinate properly with
-	 * xfs_iflush() so that we don't lose timestamp updates.
-	 * This keeps us from having to hold the inode lock
-	 * while doing this.  We use the SYNCHRONIZE macro to
-	 * ensure that the compiler does not reorder the update
-	 * of i_update_core above the timestamp updates above.
-	 */
-	SYNCHRONIZE();
-	ip->i_update_core = 1;
-	if (!(inode->i_state & I_LOCK))
-		mark_inode_dirty_sync(inode);
-}
-
 #ifdef XFS_ILOCK_TRACE
 ktrace_t	*xfs_ilock_trace_buf;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 54d9e54c7c9..124d30e6143 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -1,38 +1,30 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_INODE_H__
 #define	__XFS_INODE_H__
 
 /*
+ * Fork identifiers.
+ */
+#define	XFS_DATA_FORK	0
+#define	XFS_ATTR_FORK	1
+
+/*
  * File incore extent information, present for each of data & attr forks.
  */
 #define	XFS_INLINE_EXTS	2
@@ -107,24 +99,6 @@ extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
 #define	xfs_ilock_trace(i,n,f,ra)
 #endif
 
-/*
- * This structure is used to communicate which extents of a file
- * were holes when a write started from xfs_write_file() to
- * xfs_strat_read().  This is necessary so that we can know which
- * blocks need to be zeroed when they are read in in xfs_strat_read()
- * if they weren\'t allocated when the buffer given to xfs_strat_read()
- * was mapped.
- *
- * We keep a list of these attached to the inode.  The list is
- * protected by the inode lock and the fact that the io lock is
- * held exclusively by writers.
- */
-typedef struct xfs_gap {
-	struct xfs_gap	*xg_next;
-	xfs_fileoff_t	xg_offset_fsb;
-	xfs_extlen_t	xg_count_fsb;
-} xfs_gap_t;
-
 typedef struct dm_attrs_s {
 	__uint32_t	da_dmevmask;	/* DMIG event mask */
 	__uint16_t	da_dmstate;	/* DMIG state info */
@@ -311,60 +285,16 @@ typedef struct xfs_inode {
 /*
  * Fork handling.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_PTR)
-xfs_ifork_t *xfs_ifork_ptr(xfs_inode_t *ip, int w);
-#define	XFS_IFORK_PTR(ip,w)		xfs_ifork_ptr(ip,w)
-#else
-#define	XFS_IFORK_PTR(ip,w)   ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_Q)
-int xfs_ifork_q(xfs_inode_t *ip);
-#define	XFS_IFORK_Q(ip)			xfs_ifork_q(ip)
-#else
+#define	XFS_IFORK_PTR(ip,w)		\
+	((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
 #define	XFS_IFORK_Q(ip)			XFS_CFORK_Q(&(ip)->i_d)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_DSIZE)
-int xfs_ifork_dsize(xfs_inode_t *ip);
-#define	XFS_IFORK_DSIZE(ip)		xfs_ifork_dsize(ip)
-#else
 #define	XFS_IFORK_DSIZE(ip)		XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_ASIZE)
-int xfs_ifork_asize(xfs_inode_t *ip);
-#define	XFS_IFORK_ASIZE(ip)		xfs_ifork_asize(ip)
-#else
 #define	XFS_IFORK_ASIZE(ip)		XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_SIZE)
-int xfs_ifork_size(xfs_inode_t *ip, int w);
-#define	XFS_IFORK_SIZE(ip,w)		xfs_ifork_size(ip,w)
-#else
 #define	XFS_IFORK_SIZE(ip,w)		XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FORMAT)
-int xfs_ifork_format(xfs_inode_t *ip, int w);
-#define	XFS_IFORK_FORMAT(ip,w)		xfs_ifork_format(ip,w)
-#else
 #define	XFS_IFORK_FORMAT(ip,w)		XFS_CFORK_FORMAT(&ip->i_d, w)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FMT_SET)
-void xfs_ifork_fmt_set(xfs_inode_t *ip, int w, int n);
-#define	XFS_IFORK_FMT_SET(ip,w,n)	xfs_ifork_fmt_set(ip,w,n)
-#else
 #define	XFS_IFORK_FMT_SET(ip,w,n)	XFS_CFORK_FMT_SET(&ip->i_d, w, n)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXTENTS)
-int xfs_ifork_nextents(xfs_inode_t *ip, int w);
-#define	XFS_IFORK_NEXTENTS(ip,w)	xfs_ifork_nextents(ip,w)
-#else
 #define	XFS_IFORK_NEXTENTS(ip,w)	XFS_CFORK_NEXTENTS(&ip->i_d, w)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXT_SET)
-void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
-#define	XFS_IFORK_NEXT_SET(ip,w,n)	xfs_ifork_next_set(ip,w,n)
-#else
 #define	XFS_IFORK_NEXT_SET(ip,w,n)	XFS_CFORK_NEXT_SET(&ip->i_d, w, n)
-#endif
 
 
 #ifdef __KERNEL__
@@ -388,6 +318,7 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
 #define	XFS_ILOCK_EXCL		0x004
 #define	XFS_ILOCK_SHARED	0x008
 #define	XFS_IUNLOCK_NONOTIFY	0x010
+/*	XFS_IOLOCK_NESTED	0x020 */
 #define XFS_EXTENT_TOKEN_RD	0x040
 #define XFS_SIZE_TOKEN_RD	0x080
 #define XFS_EXTSIZE_RD		(XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
@@ -395,7 +326,7 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
 #define XFS_EXTENT_TOKEN_WR	(XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
 #define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
 #define XFS_EXTSIZE_WR		(XFS_EXTSIZE_RD | XFS_WILLLEND)
-
+/*	XFS_SIZE_TOKEN_WANT	0x200 */
 
 #define XFS_LOCK_MASK	\
 	(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
@@ -417,28 +348,11 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
 #define	XFS_ITRUNC_DEFINITE	0x1
 #define	XFS_ITRUNC_MAYBE	0x2
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOV)
-struct vnode *xfs_itov(xfs_inode_t *ip);
-#define	XFS_ITOV(ip)		xfs_itov(ip)
-#else
 #define	XFS_ITOV(ip)		BHV_TO_VNODE(XFS_ITOBHV(ip))
-#endif
 #define	XFS_ITOV_NULL(ip)	BHV_TO_VNODE_NULL(XFS_ITOBHV(ip))
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOBHV)
-struct bhv_desc *xfs_itobhv(xfs_inode_t *ip);
-#define	XFS_ITOBHV(ip)		xfs_itobhv(ip)
-#else
 #define	XFS_ITOBHV(ip)		((struct bhv_desc *)(&((ip)->i_bhv_desc)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOI)
-xfs_inode_t *xfs_bhvtoi(struct bhv_desc *bhvp);
-#define	XFS_BHVTOI(bhvp)	xfs_bhvtoi(bhvp)
-#else
-#define	XFS_BHVTOI(bhvp)	\
-	((xfs_inode_t *)((char *)(bhvp) - \
-			 (char *)&(((xfs_inode_t *)0)->i_bhv_desc)))
-#endif
-
+#define	XFS_BHVTOI(bhvp)	((xfs_inode_t *)((char *)(bhvp) - \
+				(char *)&(((xfs_inode_t *)0)->i_bhv_desc)))
 #define BHV_IS_XFS(bdp)		(BHV_OPS(bdp) == &xfs_vnodeops)
 
 /*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0eed30f5cb1..7f3363c621e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -1,66 +1,46 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * This file contains the implementation of the xfs_inode_log_item.
- * It contains the item operations used to manipulate the inode log
- * items as well as utility routines used by the inode specific
- * transaction routines.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
-#include "xfs_ag.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_rw.h"
 
 
@@ -248,6 +228,7 @@ xfs_inode_item_format(
 
 	vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
 	vecp->i_len  = sizeof(xfs_inode_log_format_t);
+	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
 	vecp++;
 	nvecs	     = 1;
 
@@ -292,6 +273,7 @@ xfs_inode_item_format(
 
 	vecp->i_addr = (xfs_caddr_t)&ip->i_d;
 	vecp->i_len  = sizeof(xfs_dinode_core_t);
+	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
 	vecp++;
 	nvecs++;
 	iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -339,7 +321,7 @@ xfs_inode_item_format(
 			nrecs = ip->i_df.if_bytes /
 				(uint)sizeof(xfs_bmbt_rec_t);
 			ASSERT(nrecs > 0);
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
 			if (nrecs == ip->i_d.di_nextents) {
 				/*
 				 * There are no delayed allocation
@@ -349,6 +331,7 @@ xfs_inode_item_format(
 				vecp->i_addr =
 					(char *)(ip->i_df.if_u1.if_extents);
 				vecp->i_len = ip->i_df.if_bytes;
+				XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
 			} else
 #endif
 			{
@@ -367,6 +350,7 @@ xfs_inode_item_format(
 				vecp->i_addr = (xfs_caddr_t)ext_buffer;
 				vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
 						XFS_DATA_FORK);
+				XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
 			}
 			ASSERT(vecp->i_len <= ip->i_df.if_bytes);
 			iip->ili_format.ilf_dsize = vecp->i_len;
@@ -384,6 +368,7 @@ xfs_inode_item_format(
 			ASSERT(ip->i_df.if_broot != NULL);
 			vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
 			vecp->i_len = ip->i_df.if_broot_bytes;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -409,6 +394,7 @@ xfs_inode_item_format(
 			ASSERT((ip->i_df.if_real_bytes == 0) ||
 			       (ip->i_df.if_real_bytes == data_bytes));
 			vecp->i_len = (int)data_bytes;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -467,7 +453,7 @@ xfs_inode_item_format(
 #endif
 			ASSERT(nrecs > 0);
 			ASSERT(nrecs == ip->i_d.di_anextents);
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
 			/*
 			 * There are not delayed allocation extents
 			 * for attributes, so just point at the array.
@@ -486,6 +472,7 @@ xfs_inode_item_format(
 			vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
 					XFS_ATTR_FORK);
 #endif
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
 			iip->ili_format.ilf_asize = vecp->i_len;
 			vecp++;
 			nvecs++;
@@ -500,6 +487,7 @@ xfs_inode_item_format(
 			ASSERT(ip->i_afp->if_broot != NULL);
 			vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
 			vecp->i_len = ip->i_afp->if_broot_bytes;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -523,6 +511,7 @@ xfs_inode_item_format(
 			ASSERT((ip->i_afp->if_real_bytes == 0) ||
 			       (ip->i_afp->if_real_bytes == data_bytes));
 			vecp->i_len = (int)data_bytes;
+			XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_asize = (unsigned)data_bytes;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d8775e0d629..c5dbf93b666 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_INODE_ITEM_H__
 #define	__XFS_INODE_ITEM_H__
@@ -159,38 +145,33 @@ typedef struct xfs_inode_log_item {
 } xfs_inode_log_item_t;
 
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FDATA)
-int xfs_ilog_fdata(int w);
 #define	XFS_ILOG_FDATA(w)	xfs_ilog_fdata(w)
-#else
-#define	XFS_ILOG_FDATA(w)	\
-	((w) == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA)
-#endif
+static inline int xfs_ilog_fdata(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
+}
 
 #endif	/* __KERNEL__ */
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FBROOT)
-int xfs_ilog_fbroot(int w);
 #define	XFS_ILOG_FBROOT(w)	xfs_ilog_fbroot(w)
-#else
-#define	XFS_ILOG_FBROOT(w)	\
-	((w) == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FEXT)
-int xfs_ilog_fext(int w);
+static inline int xfs_ilog_fbroot(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
+}
+
 #define	XFS_ILOG_FEXT(w)	xfs_ilog_fext(w)
-#else
-#define	XFS_ILOG_FEXT(w)	\
-	((w) == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT)
-#endif
+static inline int xfs_ilog_fext(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
+}
 
 #ifdef __KERNEL__
 
-void	xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
-void	xfs_inode_item_destroy(struct xfs_inode *);
-void	xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
-void	xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
-void	xfs_iflush_abort(struct xfs_inode *);
+extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
+extern void xfs_inode_item_destroy(struct xfs_inode *);
+extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
+extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
+extern void xfs_iflush_abort(struct xfs_inode *);
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index a3af2d5a6eb..7a28191cb0d 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_INUM_H__
 #define	__XFS_INUM_H__
@@ -58,109 +44,31 @@ typedef	__uint32_t	xfs_intino_t;
 
 struct xfs_mount;
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_MASK)
-__uint32_t xfs_ino_mask(int k);
-#define	XFS_INO_MASK(k)			xfs_ino_mask(k)
-#else
-#define	XFS_INO_MASK(k)	((__uint32_t)((1ULL << (k)) - 1))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_OFFSET_BITS)
-int xfs_ino_offset_bits(struct xfs_mount *mp);
-#define	XFS_INO_OFFSET_BITS(mp)		xfs_ino_offset_bits(mp)
-#else
-#define	XFS_INO_OFFSET_BITS(mp)	((mp)->m_sb.sb_inopblog)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGBNO_BITS)
-int xfs_ino_agbno_bits(struct xfs_mount *mp);
-#define	XFS_INO_AGBNO_BITS(mp)		xfs_ino_agbno_bits(mp)
-#else
-#define	XFS_INO_AGBNO_BITS(mp)	((mp)->m_sb.sb_agblklog)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGINO_BITS)
-int xfs_ino_agino_bits(struct xfs_mount *mp);
-#define	XFS_INO_AGINO_BITS(mp)		xfs_ino_agino_bits(mp)
-#else
-#define	XFS_INO_AGINO_BITS(mp)		((mp)->m_agino_log)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGNO_BITS)
-int xfs_ino_agno_bits(struct xfs_mount *mp);
-#define	XFS_INO_AGNO_BITS(mp)		xfs_ino_agno_bits(mp)
-#else
-#define	XFS_INO_AGNO_BITS(mp)	((mp)->m_agno_log)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_BITS)
-int xfs_ino_bits(struct xfs_mount *mp);
-#define	XFS_INO_BITS(mp)		xfs_ino_bits(mp)
-#else
-#define	XFS_INO_BITS(mp)	(XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGNO)
-xfs_agnumber_t xfs_ino_to_agno(struct xfs_mount *mp, xfs_ino_t i);
-#define	XFS_INO_TO_AGNO(mp,i)		xfs_ino_to_agno(mp,i)
-#else
-#define	XFS_INO_TO_AGNO(mp,i)	\
+#define	XFS_INO_MASK(k)			(__uint32_t)((1ULL << (k)) - 1)
+#define	XFS_INO_OFFSET_BITS(mp)		(mp)->m_sb.sb_inopblog
+#define	XFS_INO_AGBNO_BITS(mp)		(mp)->m_sb.sb_agblklog
+#define	XFS_INO_AGINO_BITS(mp)		(mp)->m_agino_log
+#define	XFS_INO_AGNO_BITS(mp)		(mp)->m_agno_log
+#define	XFS_INO_BITS(mp)		\
+	XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
+#define	XFS_INO_TO_AGNO(mp,i)		\
 	((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGINO)
-xfs_agino_t xfs_ino_to_agino(struct xfs_mount *mp, xfs_ino_t i);
-#define	XFS_INO_TO_AGINO(mp,i)		xfs_ino_to_agino(mp,i)
-#else
-#define	XFS_INO_TO_AGINO(mp,i)	\
+#define	XFS_INO_TO_AGINO(mp,i)		\
 	((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGBNO)
-xfs_agblock_t xfs_ino_to_agbno(struct xfs_mount *mp, xfs_ino_t i);
-#define	XFS_INO_TO_AGBNO(mp,i)		xfs_ino_to_agbno(mp,i)
-#else
-#define	XFS_INO_TO_AGBNO(mp,i)	\
+#define	XFS_INO_TO_AGBNO(mp,i)		\
 	(((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \
-	 XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_OFFSET)
-int xfs_ino_to_offset(struct xfs_mount *mp, xfs_ino_t i);
-#define	XFS_INO_TO_OFFSET(mp,i)		xfs_ino_to_offset(mp,i)
-#else
-#define	XFS_INO_TO_OFFSET(mp,i)	\
+		XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp)))
+#define	XFS_INO_TO_OFFSET(mp,i)		\
 	((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_FSB)
-xfs_fsblock_t xfs_ino_to_fsb(struct xfs_mount *mp, xfs_ino_t i);
-#define	XFS_INO_TO_FSB(mp,i)		xfs_ino_to_fsb(mp,i)
-#else
-#define	XFS_INO_TO_FSB(mp,i)	\
+#define	XFS_INO_TO_FSB(mp,i)		\
 	XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_INO)
-xfs_ino_t
-xfs_agino_to_ino(struct xfs_mount *mp, xfs_agnumber_t a, xfs_agino_t i);
-#define	XFS_AGINO_TO_INO(mp,a,i)	xfs_agino_to_ino(mp,a,i)
-#else
 #define	XFS_AGINO_TO_INO(mp,a,i)	\
 	(((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_AGBNO)
-xfs_agblock_t xfs_agino_to_agbno(struct xfs_mount *mp, xfs_agino_t i);
-#define	XFS_AGINO_TO_AGBNO(mp,i)	xfs_agino_to_agbno(mp,i)
-#else
 #define	XFS_AGINO_TO_AGBNO(mp,i)	((i) >> XFS_INO_OFFSET_BITS(mp))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_OFFSET)
-int xfs_agino_to_offset(struct xfs_mount *mp, xfs_agino_t i);
-#define	XFS_AGINO_TO_OFFSET(mp,i)	xfs_agino_to_offset(mp,i)
-#else
 #define	XFS_AGINO_TO_OFFSET(mp,i)	\
 	((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_OFFBNO_TO_AGINO)
-xfs_agino_t xfs_offbno_to_agino(struct xfs_mount *mp, xfs_agblock_t b, int o);
-#define	XFS_OFFBNO_TO_AGINO(mp,b,o)	xfs_offbno_to_agino(mp,b,o)
-#else
 #define	XFS_OFFBNO_TO_AGINO(mp,b,o)	\
 	((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
-#endif
 
 #if XFS_BIG_INUMS
 #define	XFS_MAXINUMBER		((xfs_ino_t)((1ULL << 56) - 1ULL))
diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
index 414ec496845..a07815661a8 100644
--- a/fs/xfs/xfs_iocore.c
+++ b/fs/xfs/xfs_iocore.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,22 +28,21 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_itable.h"
-#include "xfs_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 #include "xfs_rw.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2edd6769e5d..45a77a3a6c0 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
 #include "xfs_fs.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -44,16 +29,16 @@
 #include "xfs_dmapi.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_bit.h"
 #include "xfs_rtalloc.h"
@@ -226,13 +211,12 @@ xfs_iomap(
 		xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
 		lockmode = XFS_LCK_MAP_SHARED(mp, io);
 		bmapi_flags = XFS_BMAPI_ENTIRE;
-		if (flags & BMAPI_IGNSTATE)
-			bmapi_flags |= XFS_BMAPI_IGSTATE;
 		break;
 	case BMAPI_WRITE:
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
 		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
-		bmapi_flags = 0;
+		if (flags & BMAPI_IGNSTATE)
+			bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
 		XFS_ILOCK(mp, io, lockmode);
 		break;
 	case BMAPI_ALLOCATE:
@@ -380,20 +364,18 @@ xfs_iomap_write_direct(
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_filblks_t	count_fsb;
-	xfs_fsize_t	isize;
 	xfs_fsblock_t	firstfsb;
-	int		nimaps, maps;
+	int		nimaps;
 	int		error;
 	int		bmapi_flag;
 	int		quota_flag;
 	int		rt;
 	xfs_trans_t	*tp;
-	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
+	xfs_bmbt_irec_t imap;
 	xfs_bmap_free_t free_list;
-	int		aeof;
-	xfs_filblks_t	datablocks, qblocks, resblks;
+	xfs_filblks_t	qblocks, resblks;
 	int		committed;
-	int		numrtextents;
+	int		resrtextents;
 
 	/*
 	 * Make sure that the dquots are there. This doesn't hold
@@ -403,15 +385,6 @@ xfs_iomap_write_direct(
 	if (error)
 		return XFS_ERROR(error);
 
-	maps = min(XFS_WRITE_IMAPS, *nmaps);
-	nimaps = maps;
-
-	isize = ip->i_d.di_size;
-	aeof = (offset + count) > isize;
-
-	if (io->io_new_size > isize)
-		isize = io->io_new_size;
-
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	count_fsb = last_fsb - offset_fsb;
@@ -434,14 +407,14 @@ xfs_iomap_write_direct(
 
 		if (!(extsz = ip->i_d.di_extsize))
 			extsz = mp->m_sb.sb_rextsize;
-		numrtextents = qblocks = (count_fsb + extsz - 1);
-		do_div(numrtextents, mp->m_sb.sb_rextsize);
+		resrtextents = qblocks = (count_fsb + extsz - 1);
+		do_div(resrtextents, mp->m_sb.sb_rextsize);
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
 		quota_flag = XFS_QMOPT_RES_RTBLKS;
-		datablocks = 0;
 	} else {
-		datablocks = qblocks = count_fsb;
+		resrtextents = 0;
+		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
 		quota_flag = XFS_QMOPT_RES_REGBLKS;
-		numrtextents = 0;
 	}
 
 	/*
@@ -449,9 +422,8 @@ xfs_iomap_write_direct(
 	 */
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
 	error = xfs_trans_reserve(tp, resblks,
-			XFS_WRITE_LOG_RES(mp), numrtextents,
+			XFS_WRITE_LOG_RES(mp), resrtextents,
 			XFS_TRANS_PERM_LOG_RES,
 			XFS_WRITE_LOG_COUNT);
 
@@ -481,9 +453,8 @@ xfs_iomap_write_direct(
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
 	nimaps = 1;
-	imapp = &imap[0];
 	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-		bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list);
+		bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list);
 	if (error)
 		goto error0;
 
@@ -505,7 +476,7 @@ xfs_iomap_write_direct(
 		goto error_out;
 	}
 
-	*ret_imap = imap[0];
+	*ret_imap = imap;
 	*nmaps = 1;
 	if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
                 cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 4daaa521210..fcd6d63bb68 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 8fbc8d37818..f63646ead81 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -1,59 +1,45 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_ag.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
+#include "xfs_btree.h"
 
 #ifndef HAVE_USERACC
 #define useracc(ubuffer, size, flags, foo) (0)
@@ -462,7 +448,7 @@ xfs_bulkstat(
 			while (error) {
 				agino += XFS_INODES_PER_CHUNK;
 				if (XFS_AGINO_TO_AGBNO(mp, agino) >=
-						INT_GET(agi->agi_length, ARCH_CONVERT))
+						be32_to_cpu(agi->agi_length))
 					break;
 				error = xfs_inobt_lookup_ge(cur, agino, 0, 0,
 							    &tmp);
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 2be9d1805ab..047d834ed21 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -1,33 +1,18 @@
 /*
  * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_ITABLE_H__
 #define	__XFS_ITABLE_H__
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1cd2ac16387..29af51275ca 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1,58 +1,47 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * High level interface routines for log manager
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_sb.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
+#include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_log_recover.h"
-#include "xfs_bit.h"
-#include "xfs_rw.h"
 #include "xfs_trans_priv.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_rw.h"
 
 
 #define xlog_write_adv_cnt(ptr, len, off, bytes) \
@@ -93,8 +82,11 @@ STATIC int  xlog_state_release_iclog(xlog_t		*log,
 STATIC void xlog_state_switch_iclogs(xlog_t		*log,
 				     xlog_in_core_t *iclog,
 				     int		eventual_size);
-STATIC int  xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags);
-STATIC int  xlog_state_sync_all(xlog_t *log, uint flags);
+STATIC int  xlog_state_sync(xlog_t			*log,
+			    xfs_lsn_t 			lsn,
+			    uint			flags,
+			    int				*log_flushed);
+STATIC int  xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
 STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);
 
 /* local functions to manipulate grant head */
@@ -119,8 +111,7 @@ STATIC xlog_ticket_t	*xlog_ticket_get(xlog_t *log,
 					 uint	flags);
 STATIC void		xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket);
 
-/* local debug functions */
-#if defined(DEBUG) && !defined(XLOG_NOLOG)
+#if defined(DEBUG)
 STATIC void	xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
 STATIC void	xlog_verify_grant_head(xlog_t *log, int equals);
 STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
@@ -136,34 +127,19 @@ STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
 
 STATIC int	xlog_iclogs_empty(xlog_t *log);
 
-#ifdef DEBUG
-int xlog_do_error = 0;
-int xlog_req_num  = 0;
-int xlog_error_mod = 33;
-#endif
-
-#define XLOG_FORCED_SHUTDOWN(log)	(log->l_flags & XLOG_IO_ERROR)
-
-/*
- * 0 => disable log manager
- * 1 => enable log manager
- * 2 => enable log manager and log debugging
- */
-#if defined(XLOG_NOLOG) || defined(DEBUG)
-int   xlog_debug = 1;
-xfs_buftarg_t *xlog_target;
-#endif
-
 #if defined(XFS_LOG_TRACE)
-
 void
 xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
 {
-	if (! log->l_grant_trace) {
-		log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP);
-		if (! log->l_grant_trace)
+	unsigned long cnts;
+
+	if (!log->l_grant_trace) {
+		log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
+		if (!log->l_grant_trace)
 			return;
 	}
+	/* ticket counts are 1 byte each */
+	cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
 
 	ktrace_enter(log->l_grant_trace,
 		     (void *)tic,
@@ -178,40 +154,25 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
 		     (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
 		     (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
 		     (void *)string,
-		     (void *)((unsigned long)13),
-		     (void *)((unsigned long)14),
-		     (void *)((unsigned long)15),
-		     (void *)((unsigned long)16));
+		     (void *)((unsigned long)tic->t_trans_type),
+		     (void *)cnts,
+		     (void *)((unsigned long)tic->t_curr_res),
+		     (void *)((unsigned long)tic->t_unit_res));
 }
 
 void
 xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
 {
-	pid_t pid;
-
-	pid = current_pid();
-
 	if (!iclog->ic_trace)
 		iclog->ic_trace = ktrace_alloc(256, KM_SLEEP);
 	ktrace_enter(iclog->ic_trace,
 		     (void *)((unsigned long)state),
-		     (void *)((unsigned long)pid),
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0,
-		     (void *)0);
+		     (void *)((unsigned long)current_pid()),
+		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
+		     (void *)NULL, (void *)NULL);
 }
-
 #else
 #define	xlog_trace_loggrant(log,tic,string)
 #define	xlog_trace_iclog(iclog,state)
@@ -248,11 +209,6 @@ xfs_log_done(xfs_mount_t	*mp,
 	xlog_ticket_t	*ticket = (xfs_log_ticket_t) xtic;
 	xfs_lsn_t	lsn	= 0;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return 0;
-#endif
-
 	if (XLOG_FORCED_SHUTDOWN(log) ||
 	    /*
 	     * If nothing was ever written, don't write out commit record.
@@ -274,9 +230,11 @@ xfs_log_done(xfs_mount_t	*mp,
 		 * Release ticket if not permanent reservation or a specifc
 		 * request has been made to release a permanent reservation.
 		 */
+		xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
 		xlog_ungrant_log_space(log, ticket);
 		xlog_state_put_ticket(log, ticket);
 	} else {
+		xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
 		xlog_regrant_reserve_log_space(log, ticket);
 	}
 
@@ -306,33 +264,28 @@ xfs_log_done(xfs_mount_t	*mp,
  * semaphore.
  */
 int
-xfs_log_force(xfs_mount_t *mp,
-	      xfs_lsn_t	  lsn,
-	      uint	  flags)
+_xfs_log_force(
+	xfs_mount_t	*mp,
+	xfs_lsn_t	lsn,
+	uint		flags,
+	int		*log_flushed)
 {
-	int	rval;
-	xlog_t *log = mp->m_log;
+	xlog_t		*log = mp->m_log;
+	int		dummy;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return 0;
-#endif
+	if (!log_flushed)
+		log_flushed = &dummy;
 
 	ASSERT(flags & XFS_LOG_FORCE);
 
 	XFS_STATS_INC(xs_log_force);
 
-	if ((log->l_flags & XLOG_IO_ERROR) == 0) {
-		if (lsn == 0)
-			rval = xlog_state_sync_all(log, flags);
-		else
-			rval = xlog_state_sync(log, lsn, flags);
-	} else {
-		rval = XFS_ERROR(EIO);
-	}
-
-	return rval;
-
+	if (log->l_flags & XLOG_IO_ERROR)
+		return XFS_ERROR(EIO);
+	if (lsn == 0)
+		return xlog_state_sync_all(log, flags, log_flushed);
+	else
+		return xlog_state_sync(log, lsn, flags, log_flushed);
 }	/* xfs_log_force */
 
 /*
@@ -350,10 +303,6 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mount of partition */
 	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
 	int	abortflg, spl;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return 0;
-#endif
 	cb->cb_next = NULL;
 	spl = LOG_LOCK(log);
 	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
@@ -399,17 +348,13 @@ xfs_log_reserve(xfs_mount_t	 *mp,
 		int		 cnt,
 		xfs_log_ticket_t *ticket,
 		__uint8_t	 client,
-		uint		 flags)
+		uint		 flags,
+		uint		 t_type)
 {
 	xlog_t		*log = mp->m_log;
 	xlog_ticket_t	*internal_ticket;
-	int		retval;
+	int		retval = 0;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return 0;
-#endif
-	retval = 0;
 	ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
 	ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
 
@@ -421,13 +366,19 @@ xfs_log_reserve(xfs_mount_t	 *mp,
 	if (*ticket != NULL) {
 		ASSERT(flags & XFS_LOG_PERM_RESERV);
 		internal_ticket = (xlog_ticket_t *)*ticket;
+		xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
 		xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
 		retval = xlog_regrant_write_log_space(log, internal_ticket);
 	} else {
 		/* may sleep if need to allocate more tickets */
 		internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
 						  client, flags);
+		internal_ticket->t_trans_type = t_type;
 		*ticket = internal_ticket;
+		xlog_trace_loggrant(log, internal_ticket, 
+			(internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
+			"xfs_log_reserve: create new ticket (permanent trans)" :
+			"xfs_log_reserve: create new ticket");
 		xlog_grant_push_ail(mp,
 				    (internal_ticket->t_unit_res *
 				     internal_ticket->t_cnt));
@@ -465,12 +416,6 @@ xfs_log_mount(xfs_mount_t	*mp,
 
 	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug) {
-		cmn_err(CE_NOTE, "log dev: %s", XFS_BUFTARG_NAME(log_target));
-		return 0;
-	}
-#endif
 	/*
 	 * skip log recovery on a norecovery mount.  pretend it all
 	 * just worked.
@@ -574,11 +519,6 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 	    __uint32_t pad2; /* may as well make it 64 bits */
 	} magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return 0;
-#endif
-
 	/*
 	 * Don't write out unmount record on read-only mounts.
 	 * Or, if we are doing a forced umount (typically because of IO errors).
@@ -601,8 +541,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 	if (! (XLOG_FORCED_SHUTDOWN(log))) {
 		reg[0].i_addr = (void*)&magic;
 		reg[0].i_len  = sizeof(magic);
+		XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
 
-		error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
+		error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0);
 		if (!error) {
 			/* remove inited flag */
 			((xlog_ticket_t *)tic)->t_flags = 0;
@@ -704,12 +645,6 @@ xfs_log_write(xfs_mount_t *	mp,
 	int	error;
 	xlog_t *log = mp->m_log;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ) {
-		*start_lsn = 0;
-		return 0;
-	}
-#endif
 	if (XLOG_FORCED_SHUTDOWN(log))
 		return XFS_ERROR(EIO);
 
@@ -729,11 +664,6 @@ xfs_log_move_tail(xfs_mount_t	*mp,
 	int		need_bytes, free_bytes, cycle, bytes;
 	SPLDECL(s);
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	if (!xlog_debug && xlog_target == log->l_targ)
-		return;
-#endif
-	/* XXXsup tmp */
 	if (XLOG_FORCED_SHUTDOWN(log))
 		return;
 	ASSERT(!XFS_FORCED_SHUTDOWN(mp));
@@ -1020,51 +950,22 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 	int size;
 	int xhdrs;
 
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-	/*
-	 * When logbufs == 0, someone has disabled the log from the FSTAB
-	 * file.  This is not a documented feature.  We need to set xlog_debug
-	 * to zero (this deactivates the log) and set xlog_target to the
-	 * appropriate device.  Only one filesystem may be affected as such
-	 * since this is just a performance hack to test what we might be able
-	 * to get if the log were not present.
-	 */
-	if (mp->m_logbufs == 0) {
-		xlog_debug = 0;
-		xlog_target = log->l_targ;
-		log->l_iclog_bufs = XLOG_MIN_ICLOGS;
-	} else
-#endif
-	{
-		/*
-		 * This is the normal path.  If m_logbufs == -1, then the
-		 * admin has chosen to use the system defaults for logbuffers.
-		 */
-		if (mp->m_logbufs == -1) { 
-			if (xfs_physmem <= btoc(128*1024*1024)) { 
-				log->l_iclog_bufs = XLOG_MIN_ICLOGS; 
-			} else if (xfs_physmem <= btoc(400*1024*1024)) { 
-				log->l_iclog_bufs = XLOG_MED_ICLOGS; 
-			} else {
-				/* 256K with 32K bufs */
-				log->l_iclog_bufs = XLOG_MAX_ICLOGS;
-			}
-		} else
-			log->l_iclog_bufs = mp->m_logbufs;
-
-#if defined(DEBUG) || defined(XLOG_NOLOG)
-		/* We are reactivating a filesystem after it was inactive */
-		if (log->l_targ == xlog_target) {
-			xlog_target = NULL;
-			xlog_debug = 1;
+	if (mp->m_logbufs <= 0) {
+		if (xfs_physmem <= btoc(128*1024*1024)) {
+			log->l_iclog_bufs = XLOG_MIN_ICLOGS;
+		} else if (xfs_physmem <= btoc(400*1024*1024)) {
+			log->l_iclog_bufs = XLOG_MED_ICLOGS;
+		} else {	/* 256K with 32K bufs */
+			log->l_iclog_bufs = XLOG_MAX_ICLOGS;
 		}
-#endif
+	} else {
+		log->l_iclog_bufs = mp->m_logbufs;
 	}
 
 	/*
 	 * Buffer size passed in from mount system call.
 	 */
-	if (mp->m_logbsize != -1) {
+	if (mp->m_logbsize > 0) {
 		size = log->l_iclog_size = mp->m_logbsize;
 		log->l_iclog_size_log = 0;
 		while (size != 1) {
@@ -1087,7 +988,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 			log->l_iclog_hsize = BBSIZE;
 			log->l_iclog_heads = 1;
 		}
-		return;
+		goto done;
 	}
 
 	/*
@@ -1114,7 +1015,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 	if (mp->m_sb.sb_blocksize >= 16*1024) {
 		log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
 		log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-		if (mp->m_logbufs == -1) {
+		if (mp->m_logbufs <= 0) {
 			switch (mp->m_sb.sb_blocksize) {
 			    case 16*1024:			/* 16 KB */
 				log->l_iclog_bufs = 3;
@@ -1131,6 +1032,12 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 			}
 		}
 	}
+
+done:	/* are we being asked to make the sizes selected above visible? */
+	if (mp->m_logbufs == 0)
+		mp->m_logbufs = log->l_iclog_bufs;
+	if (mp->m_logbsize == 0)
+		mp->m_logbsize = log->l_iclog_size;
 }	/* xlog_get_iclog_buffer_size */
 
 
@@ -1272,6 +1179,7 @@ xlog_commit_record(xfs_mount_t  *mp,
 
 	reg[0].i_addr = NULL;
 	reg[0].i_len = 0;
+	XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
 
 	ASSERT_ALWAYS(iclog);
 	if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1452,14 +1360,13 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
 	/*
-	 * Do a disk write cache flush for the log block.
-	 * This is a bit of a sledgehammer, it would be better
-	 * to use a tag barrier here that just prevents reordering.
+	 * Do an ordered write for the log block.
+	 *
 	 * It may not be needed to flush the first split block in the log wrap
 	 * case, but do it anyways to be safe -AK
 	 */
-	if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
-		XFS_BUF_FLUSH(bp);
+	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+		XFS_BUF_ORDERED(bp);
 
 	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
 	ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1490,8 +1397,8 @@ xlog_sync(xlog_t		*log,
 		XFS_BUF_SET_FSPRIVATE(bp, iclog);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
-		if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
-			XFS_BUF_FLUSH(bp);
+		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
 		/*
 		 * Bump the cycle numbers at the start of each block
@@ -1605,6 +1512,117 @@ xlog_state_finish_copy(xlog_t		*log,
 
 
 /*
+ * print out info relating to regions written which consume
+ * the reservation
+ */
+#if defined(XFS_LOG_RES_DEBUG)
+STATIC void
+xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
+{
+	uint i;
+	uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
+
+	/* match with XLOG_REG_TYPE_* in xfs_log.h */
+	static char *res_type_str[XLOG_REG_TYPE_MAX] = {
+	    "bformat",
+	    "bchunk",
+	    "efi_format",
+	    "efd_format",
+	    "iformat",
+	    "icore",
+	    "iext",
+	    "ibroot",
+	    "ilocal",
+	    "iattr_ext",
+	    "iattr_broot",
+	    "iattr_local",
+	    "qformat",
+	    "dquot",
+	    "quotaoff",
+	    "LR header",
+	    "unmount",
+	    "commit",
+	    "trans header"
+	};
+	static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
+	    "SETATTR_NOT_SIZE",
+	    "SETATTR_SIZE",
+	    "INACTIVE",
+	    "CREATE",
+	    "CREATE_TRUNC",
+	    "TRUNCATE_FILE",
+	    "REMOVE",
+	    "LINK",
+	    "RENAME",
+	    "MKDIR",
+	    "RMDIR",
+	    "SYMLINK",
+	    "SET_DMATTRS",
+	    "GROWFS",
+	    "STRAT_WRITE",
+	    "DIOSTRAT",
+	    "WRITE_SYNC",
+	    "WRITEID",
+	    "ADDAFORK",
+	    "ATTRINVAL",
+	    "ATRUNCATE",
+	    "ATTR_SET",
+	    "ATTR_RM",
+	    "ATTR_FLAG",
+	    "CLEAR_AGI_BUCKET",
+	    "QM_SBCHANGE",
+	    "DUMMY1",
+	    "DUMMY2",
+	    "QM_QUOTAOFF",
+	    "QM_DQALLOC",
+	    "QM_SETQLIM",
+	    "QM_DQCLUSTER",
+	    "QM_QINOCREATE",
+	    "QM_QUOTAOFF_END",
+	    "SB_UNIT",
+	    "FSYNC_TS",
+	    "GROWFSRT_ALLOC",
+	    "GROWFSRT_ZERO",
+	    "GROWFSRT_FREE",
+	    "SWAPEXT"
+	};
+
+	xfs_fs_cmn_err(CE_WARN, mp,
+			"xfs_log_write: reservation summary:\n"
+			"  trans type  = %s (%u)\n"
+			"  unit res    = %d bytes\n"
+			"  current res = %d bytes\n"
+			"  total reg   = %u bytes (o/flow = %u bytes)\n"
+			"  ophdrs      = %u (ophdr space = %u bytes)\n"
+			"  ophdr + reg = %u bytes\n"
+			"  num regions = %u\n",
+			((ticket->t_trans_type <= 0 ||
+			  ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
+			  "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+			ticket->t_trans_type,
+			ticket->t_unit_res,
+			ticket->t_curr_res,
+			ticket->t_res_arr_sum, ticket->t_res_o_flow,
+			ticket->t_res_num_ophdrs, ophdr_spc,
+			ticket->t_res_arr_sum + 
+			  ticket->t_res_o_flow + ophdr_spc,
+			ticket->t_res_num);
+
+	for (i = 0; i < ticket->t_res_num; i++) {
+	   	uint r_type = ticket->t_res_arr[i].r_type; 
+		cmn_err(CE_WARN,
+			    "region[%u]: %s - %u bytes\n",
+			    i, 
+			    ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
+			    "bad-rtype" : res_type_str[r_type-1]),
+			    ticket->t_res_arr[i].r_len);
+	}
+}
+#else
+#define xlog_print_tic_res(mp, ticket)
+#endif
+
+/*
  * Write some region out to in-core log
  *
  * This will be called when writing externally provided regions or when
@@ -1677,16 +1695,21 @@ xlog_write(xfs_mount_t *	mp,
      * xlog_op_header_t and may need to be double word aligned.
      */
     len = 0;
-    if (ticket->t_flags & XLOG_TIC_INITED)     /* acct for start rec of xact */
+    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */
 	len += sizeof(xlog_op_header_t);
+	XLOG_TIC_ADD_OPHDR(ticket);
+    }
 
     for (index = 0; index < nentries; index++) {
 	len += sizeof(xlog_op_header_t);	    /* each region gets >= 1 */
+	XLOG_TIC_ADD_OPHDR(ticket);
 	len += reg[index].i_len;
+	XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type);
     }
     contwr = *start_lsn = 0;
 
     if (ticket->t_curr_res < len) {
+	xlog_print_tic_res(mp, ticket);
 #ifdef DEBUG
 	xlog_panic(
 		"xfs_log_write: reservation ran out. Need to up reservation");
@@ -1790,6 +1813,7 @@ xlog_write(xfs_mount_t *	mp,
 		len += sizeof(xlog_op_header_t); /* from splitting of region */
 		/* account for new log op header */
 		ticket->t_curr_res -= sizeof(xlog_op_header_t);
+		XLOG_TIC_ADD_OPHDR(ticket);
 	    }
 	    xlog_verify_dest_ptr(log, ptr);
 
@@ -2282,6 +2306,9 @@ restart:
 	 */
 	if (log_offset == 0) {
 		ticket->t_curr_res -= log->l_iclog_hsize;
+		XLOG_TIC_ADD_REGION(ticket,
+				    log->l_iclog_hsize,
+				    XLOG_REG_TYPE_LRHEADER);
 		INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle);
 		ASSIGN_LSN(head->h_lsn, log);
 		ASSERT(log->l_curr_block >= 0);
@@ -2468,6 +2495,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 #endif
 
 	tic->t_curr_res = tic->t_unit_res;
+	XLOG_TIC_RESET_RES(tic);
 
 	if (tic->t_cnt > 0)
 		return (0);
@@ -2608,6 +2636,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
 	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
 	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
 	ticket->t_curr_res = ticket->t_unit_res;
+	XLOG_TIC_RESET_RES(ticket);
 	xlog_trace_loggrant(log, ticket,
 			    "xlog_regrant_reserve_log_space: sub current res");
 	xlog_verify_grant_head(log, 1);
@@ -2624,6 +2653,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
 	xlog_verify_grant_head(log, 0);
 	GRANT_UNLOCK(log, s);
 	ticket->t_curr_res = ticket->t_unit_res;
+	XLOG_TIC_RESET_RES(ticket);
 }	/* xlog_regrant_reserve_log_space */
 
 
@@ -2813,7 +2843,7 @@ xlog_state_switch_iclogs(xlog_t		*log,
  *		not in the active nor dirty state.
  */
 STATIC int
-xlog_state_sync_all(xlog_t *log, uint flags)
+xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 {
 	xlog_in_core_t	*iclog;
 	xfs_lsn_t	lsn;
@@ -2862,6 +2892,7 @@ xlog_state_sync_all(xlog_t *log, uint flags)
 
 				if (xlog_state_release_iclog(log, iclog))
 					return XFS_ERROR(EIO);
+				*log_flushed = 1;
 				s = LOG_LOCK(log);
 				if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn &&
 				    iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2905,6 +2936,7 @@ maybe_sleep:
 		 */
 		if (iclog->ic_state & XLOG_STATE_IOERROR)
 			return XFS_ERROR(EIO);
+		*log_flushed = 1;
 
 	} else {
 
@@ -2930,7 +2962,8 @@ no_sleep:
 int
 xlog_state_sync(xlog_t	  *log,
 		xfs_lsn_t lsn,
-		uint	  flags)
+		uint	  flags,
+		int	  *log_flushed)
 {
     xlog_in_core_t	*iclog;
     int			already_slept = 0;
@@ -2982,6 +3015,7 @@ try_again:
 			XFS_STATS_INC(xs_log_force_sleep);
 			sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
 				&log->l_icloglock, s);
+			*log_flushed = 1;
 			already_slept = 1;
 			goto try_again;
 		} else {
@@ -2990,6 +3024,7 @@ try_again:
 			LOG_UNLOCK(log, s);
 			if (xlog_state_release_iclog(log, iclog))
 				return XFS_ERROR(EIO);
+			*log_flushed = 1;
 			s = LOG_LOCK(log);
 		}
 	}
@@ -3014,6 +3049,7 @@ try_again:
 		 */
 		if (iclog->ic_state & XLOG_STATE_IOERROR)
 			return XFS_ERROR(EIO);
+		*log_flushed = 1;
 	} else {		/* just return */
 		LOG_UNLOCK(log, s);
 	}
@@ -3179,29 +3215,57 @@ xlog_ticket_get(xlog_t		*log,
 	 * and their unit amount is the total amount of space required.
 	 *
 	 * The following lines of code account for non-transaction data
-	 * which occupy space in the on-disk log. 
+	 * which occupy space in the on-disk log.
+	 *
+	 * Normal form of a transaction is:
+	 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
+	 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
+	 *
+	 * We need to account for all the leadup data and trailer data
+	 * around the transaction data.
+	 * And then we need to account for the worst case in terms of using
+	 * more space.
+	 * The worst case will happen if:
+	 * - the placement of the transaction happens to be such that the
+	 *   roundoff is at its maximum
+	 * - the transaction data is synced before the commit record is synced
+	 *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
+	 *   Therefore the commit record is in its own Log Record.
+	 *   This can happen as the commit record is called with its
+	 *   own region to xlog_write().
+	 *   This then means that in the worst case, roundoff can happen for
+	 *   the commit-rec as well.
+	 *   The commit-rec is smaller than padding in this scenario and so it is
+	 *   not added separately.
 	 */
 
+	/* for trans header */
+	unit_bytes += sizeof(xlog_op_header_t);
+	unit_bytes += sizeof(xfs_trans_header_t);
+
 	/* for start-rec */
-	unit_bytes += sizeof(xlog_op_header_t); 
+	unit_bytes += sizeof(xlog_op_header_t);
+
+	/* for LR headers */
+	num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
+	unit_bytes += log->l_iclog_hsize * num_headers;
+
+	/* for commit-rec LR header - note: padding will subsume the ophdr */
+	unit_bytes += log->l_iclog_hsize;
 
-	/* for padding */
+	/* for split-recs - ophdrs added when data split over LRs */
+	unit_bytes += sizeof(xlog_op_header_t) * num_headers;
+
+	/* for roundoff padding for transaction data and one for commit record */
 	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
-		log->l_mp->m_sb.sb_logsunit > 1) {
+	    log->l_mp->m_sb.sb_logsunit > 1) {
 		/* log su roundoff */
-		unit_bytes += log->l_mp->m_sb.sb_logsunit;  
+		unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
 	} else {
 		/* BB roundoff */
-		unit_bytes += BBSIZE;
+		unit_bytes += 2*BBSIZE;
         }
 
-	/* for commit-rec */
-	unit_bytes += sizeof(xlog_op_header_t);
- 
-	/* for LR headers */
-	num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
-	unit_bytes += log->l_iclog_hsize * num_headers;
-
 	tic->t_unit_res		= unit_bytes;
 	tic->t_curr_res		= unit_bytes;
 	tic->t_cnt		= cnt;
@@ -3209,10 +3273,13 @@ xlog_ticket_get(xlog_t		*log,
 	tic->t_tid		= (xlog_tid_t)((__psint_t)tic & 0xffffffff);
 	tic->t_clientid		= client;
 	tic->t_flags		= XLOG_TIC_INITED;
+	tic->t_trans_type	= 0;
 	if (xflags & XFS_LOG_PERM_RESERV)
 		tic->t_flags |= XLOG_TIC_PERM_RESERV;
 	sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
 
+	XLOG_TIC_RESET_RES(tic);
+
 	return tic;
 }	/* xlog_ticket_get */
 
@@ -3223,7 +3290,7 @@ xlog_ticket_get(xlog_t		*log,
  *
  ******************************************************************************
  */
-#if defined(DEBUG) && !defined(XLOG_NOLOG)
+#if defined(DEBUG)
 /*
  * Make sure that the destination ptr is within the valid data region of
  * one of the iclogs.  This uses backup pointers stored in a different
@@ -3364,7 +3431,9 @@ xlog_verify_iclog(xlog_t	 *log,
 			}
 		}
 		if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
-			cmn_err(CE_WARN, "xlog_verify_iclog: invalid clientid %d op 0x%p offset 0x%x", clientid, ophead, field_offset);
+			cmn_err(CE_WARN, "xlog_verify_iclog: "
+				"invalid clientid %d op 0x%p offset 0x%lx",
+				clientid, ophead, (unsigned long)field_offset);
 
 		/* check length */
 		field_offset = (__psint_t)
@@ -3385,7 +3454,7 @@ xlog_verify_iclog(xlog_t	 *log,
 		ptr += sizeof(xlog_op_header_t) + op_len;
 	}
 }	/* xlog_verify_iclog */
-#endif /* DEBUG && !XLOG_NOLOG */
+#endif
 
 /*
  * Mark all iclogs IOERROR. LOG_LOCK is held by the caller.
@@ -3435,6 +3504,7 @@ xfs_log_force_umount(
 	xlog_ticket_t	*tic;
 	xlog_t		*log;
 	int		retval;
+	int		dummy;
 	SPLDECL(s);
 	SPLDECL(s2);
 
@@ -3513,7 +3583,7 @@ xfs_log_force_umount(
 		 * Force the incore logs to disk before shutting the
 		 * log down completely.
 		 */
-		xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC);
+		xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
 		s2 = LOG_LOCK(log);
 		retval = xlog_state_ioerror(log);
 		LOG_UNLOCK(log, s2);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 0db122ddda3..158829ca56f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_LOG_H__
 #define __XFS_LOG_H__
@@ -114,9 +100,44 @@ xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XFS_VOLUME		0x2
 #define XFS_LOG			0xaa
 
+
+/* Region types for iovec's i_type */
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_REG_TYPE_BFORMAT		1
+#define XLOG_REG_TYPE_BCHUNK		2
+#define XLOG_REG_TYPE_EFI_FORMAT	3
+#define XLOG_REG_TYPE_EFD_FORMAT	4
+#define XLOG_REG_TYPE_IFORMAT		5
+#define XLOG_REG_TYPE_ICORE		6
+#define XLOG_REG_TYPE_IEXT		7
+#define XLOG_REG_TYPE_IBROOT		8
+#define XLOG_REG_TYPE_ILOCAL		9
+#define XLOG_REG_TYPE_IATTR_EXT		10
+#define XLOG_REG_TYPE_IATTR_BROOT	11
+#define XLOG_REG_TYPE_IATTR_LOCAL	12
+#define XLOG_REG_TYPE_QFORMAT		13
+#define XLOG_REG_TYPE_DQUOT		14
+#define XLOG_REG_TYPE_QUOTAOFF		15
+#define XLOG_REG_TYPE_LRHEADER		16
+#define XLOG_REG_TYPE_UNMOUNT		17
+#define XLOG_REG_TYPE_COMMIT		18
+#define XLOG_REG_TYPE_TRANSHDR		19
+#define XLOG_REG_TYPE_MAX		19
+#endif
+
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
+#else
+#define XLOG_VEC_SET_TYPE(vecp, t)
+#endif
+
+
 typedef struct xfs_log_iovec {
 	xfs_caddr_t		i_addr;		/* beginning address of region */
 	int		i_len;		/* length in bytes of region */
+#if defined(XFS_LOG_RES_DEBUG)
+ 	uint		i_type;		/* type of region */
+#endif
 } xfs_log_iovec_t;
 
 typedef void* xfs_log_ticket_t;
@@ -139,9 +160,12 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
 		       xfs_log_ticket_t ticket,
 		       void		**iclog,
 		       uint		flags);
-int	  xfs_log_force(struct xfs_mount *mp,
-			xfs_lsn_t	 lsn,
-			uint		 flags);
+int	  _xfs_log_force(struct xfs_mount *mp,
+			 xfs_lsn_t	lsn,
+			 uint		flags,
+			 int		*log_forced);
+#define xfs_log_force(mp, lsn, flags) \
+	_xfs_log_force(mp, lsn, flags, NULL);
 int	  xfs_log_mount(struct xfs_mount	*mp,
 			struct xfs_buftarg	*log_target,
 			xfs_daddr_t		start_block,
@@ -159,7 +183,8 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  int		   count,
 			  xfs_log_ticket_t *ticket,
 			  __uint8_t	   clientid,
-			  uint		   flags);
+			  uint		   flags,
+			  uint		   t_type);
 int	  xfs_log_write(struct xfs_mount *mp,
 			xfs_log_iovec_t  region[],
 			int		 nentries,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1a1d452f15f..8f285149681 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_LOG_PRIV_H__
 #define __XFS_LOG_PRIV_H__
@@ -35,6 +21,7 @@
 struct xfs_buf;
 struct ktrace;
 struct log;
+struct xlog_ticket;
 struct xfs_buf_cancel;
 struct xfs_mount;
 
@@ -112,7 +99,7 @@ struct xfs_mount;
  * this has endian issues, of course.
  */
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
 #define GET_CLIENT_ID(i,arch) \
     ((i) & 0xff)
 #else
@@ -120,77 +107,6 @@ struct xfs_mount;
     ((i) >> 24)
 #endif
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_SUB_SPACE)
-void xlog_grant_sub_space(struct log *log, int bytes, int type);
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type)	\
-	xlog_grant_sub_space(log,bytes,type)
-#else
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type)				\
-    {									\
-	if (type == 'w') {						\
-		(log)->l_grant_write_bytes -= (bytes);			\
-		if ((log)->l_grant_write_bytes < 0) {			\
-			(log)->l_grant_write_bytes += (log)->l_logsize;	\
-			(log)->l_grant_write_cycle--;			\
-		}							\
-	} else {							\
-		(log)->l_grant_reserve_bytes -= (bytes);		\
-		if ((log)->l_grant_reserve_bytes < 0) {			\
-			(log)->l_grant_reserve_bytes += (log)->l_logsize;\
-			(log)->l_grant_reserve_cycle--;			\
-		}							\
-	 }								\
-    }
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_ADD_SPACE)
-void xlog_grant_add_space(struct log *log, int bytes, int type);
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type)	\
-	xlog_grant_add_space(log,bytes,type)
-#else
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type)				\
-    {									\
-	if (type == 'w') {						\
-		(log)->l_grant_write_bytes += (bytes);			\
-		if ((log)->l_grant_write_bytes > (log)->l_logsize) {	\
-			(log)->l_grant_write_bytes -= (log)->l_logsize;	\
-			(log)->l_grant_write_cycle++;			\
-		}							\
-	} else {							\
-		(log)->l_grant_reserve_bytes += (bytes);		\
-		if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {	\
-			(log)->l_grant_reserve_bytes -= (log)->l_logsize;\
-			(log)->l_grant_reserve_cycle++;			\
-		}							\
-	 }								\
-    }
-#endif
-#define XLOG_INS_TICKETQ(q,tic)				\
-    {							\
-	if (q) {					\
-		(tic)->t_next	    = (q);		\
-		(tic)->t_prev	    = (q)->t_prev;	\
-		(q)->t_prev->t_next = (tic);		\
-		(q)->t_prev	    = (tic);		\
-	} else {					\
-		(tic)->t_prev = (tic)->t_next = (tic);	\
-		(q) = (tic);				\
-	}						\
-	(tic)->t_flags |= XLOG_TIC_IN_Q;		\
-    }
-#define XLOG_DEL_TICKETQ(q,tic)				\
-    {							\
-	if ((tic) == (tic)->t_next) {			\
-		(q) = NULL;				\
-	} else {					\
-		(q) = (tic)->t_next;			\
-		(tic)->t_next->t_prev = (tic)->t_prev;	\
-		(tic)->t_prev->t_next = (tic)->t_next;	\
-	}						\
-	(tic)->t_next = (tic)->t_prev = NULL;		\
-	(tic)->t_flags &= ~XLOG_TIC_IN_Q;		\
-    }
-
-
 #define GRANT_LOCK(log)		mutex_spinlock(&(log)->l_grant_lock)
 #define GRANT_UNLOCK(log, s)	mutex_spinunlock(&(log)->l_grant_lock, s)
 #define LOG_LOCK(log)		mutex_spinlock(&(log)->l_icloglock)
@@ -335,18 +251,66 @@ typedef __uint32_t xlog_tid_t;
 
 #define XLOG_COVER_OPS		5
 
+
+/* Ticket reservation region accounting */ 
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_TIC_LEN_MAX	15
+#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
+				(t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
+#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++)
+#define XLOG_TIC_ADD_REGION(t, len, type)				\
+	do {								\
+		if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { 		\
+			/* add to overflow and start again */		\
+			(t)->t_res_o_flow += (t)->t_res_arr_sum;	\
+			(t)->t_res_num = 0;				\
+			(t)->t_res_arr_sum = 0;				\
+		}							\
+		(t)->t_res_arr[(t)->t_res_num].r_len = (len);		\
+		(t)->t_res_arr[(t)->t_res_num].r_type = (type);		\
+		(t)->t_res_arr_sum += (len);				\
+		(t)->t_res_num++;					\
+	} while (0)
+
+/*
+ * Reservation region
+ * As would be stored in xfs_log_iovec but without the i_addr which
+ * we don't care about.
+ */
+typedef struct xlog_res {
+	uint	r_len;
+	uint	r_type;
+} xlog_res_t;
+#else
+#define XLOG_TIC_RESET_RES(t)
+#define XLOG_TIC_ADD_OPHDR(t)
+#define XLOG_TIC_ADD_REGION(t, len, type)
+#endif
+
+
 typedef struct xlog_ticket {
-	sv_t		   t_sema;	 /* sleep on this semaphore	 :20 */
-	struct xlog_ticket *t_next;	 /*			         : 4 */
-	struct xlog_ticket *t_prev;	 /*				 : 4 */
-	xlog_tid_t	   t_tid;	 /* transaction identifier	 : 4 */
-	int		   t_curr_res;	 /* current reservation in bytes : 4 */
-	int		   t_unit_res;	 /* unit reservation in bytes    : 4 */
-	__uint8_t	   t_ocnt;	 /* original count		 : 1 */
-	__uint8_t	   t_cnt;	 /* current count		 : 1 */
-	__uint8_t	   t_clientid;	 /* who does this belong to;	 : 1 */
-	__uint8_t	   t_flags;	 /* properties of reservation	 : 1 */
+	sv_t		   t_sema;	 /* sleep on this semaphore      : 20 */
+ 	struct xlog_ticket *t_next;	 /*			         :4|8 */
+	struct xlog_ticket *t_prev;	 /*				 :4|8 */
+	xlog_tid_t	   t_tid;	 /* transaction identifier	 : 4  */
+	int		   t_curr_res;	 /* current reservation in bytes : 4  */
+	int		   t_unit_res;	 /* unit reservation in bytes    : 4  */
+	char		   t_ocnt;	 /* original count		 : 1  */
+	char		   t_cnt;	 /* current count		 : 1  */
+	char		   t_clientid;	 /* who does this belong to;	 : 1  */
+	char		   t_flags;	 /* properties of reservation	 : 1  */
+	uint		   t_trans_type; /* transaction type             : 4  */
+
+#if defined (XFS_LOG_RES_DEBUG)
+        /* reservation array fields */
+	uint		   t_res_num;                    /* num in array : 4 */
+	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : X */ 
+	uint		   t_res_num_ophdrs;		 /* num op hdrs  : 4 */
+	uint		   t_res_arr_sum;		 /* array sum    : 4 */
+	uint		   t_res_o_flow;		 /* sum overflow : 4 */
+#endif
 } xlog_ticket_t;
+
 #endif
 
 
@@ -366,14 +330,10 @@ typedef struct xlog_op_header {
 #define XLOG_FMT_IRIX_BE  3
 
 /* our fmt */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define XLOG_FMT XLOG_FMT_LINUX_LE
-#else
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
 #define XLOG_FMT XLOG_FMT_LINUX_BE
 #else
-#error unknown byte order
-#endif
+#define XLOG_FMT XLOG_FMT_LINUX_LE
 #endif
 
 typedef struct xlog_rec_header {
@@ -532,6 +492,77 @@ typedef struct log {
 						 * alignment mask */
 } xlog_t;
 
+#define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)
+
+#define XLOG_GRANT_SUB_SPACE(log,bytes,type)	\
+	xlog_grant_sub_space(log,bytes,type)
+static inline void xlog_grant_sub_space(struct log *log, int bytes, int type)
+{
+	if (type == 'w') {						\
+		(log)->l_grant_write_bytes -= (bytes);			\
+		if ((log)->l_grant_write_bytes < 0) {			\
+			(log)->l_grant_write_bytes += (log)->l_logsize;	\
+			(log)->l_grant_write_cycle--;			\
+		}							\
+	} else {							\
+		(log)->l_grant_reserve_bytes -= (bytes);		\
+		if ((log)->l_grant_reserve_bytes < 0) {			\
+			(log)->l_grant_reserve_bytes += (log)->l_logsize;\
+			(log)->l_grant_reserve_cycle--;			\
+		}							\
+	 }								\
+}
+
+#define XLOG_GRANT_ADD_SPACE(log,bytes,type)	\
+	xlog_grant_add_space(log,bytes,type)
+static inline void
+xlog_grant_add_space(struct log *log, int bytes, int type)
+{
+	if (type == 'w') {						\
+		(log)->l_grant_write_bytes += (bytes);			\
+		if ((log)->l_grant_write_bytes > (log)->l_logsize) {	\
+			(log)->l_grant_write_bytes -= (log)->l_logsize;	\
+			(log)->l_grant_write_cycle++;			\
+		}							\
+	} else {							\
+		(log)->l_grant_reserve_bytes += (bytes);		\
+		if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {	\
+			(log)->l_grant_reserve_bytes -= (log)->l_logsize;\
+			(log)->l_grant_reserve_cycle++;			\
+		}							\
+	 }								\
+}
+
+#define XLOG_INS_TICKETQ(q, tic)	xlog_ins_ticketq(q, tic)
+static inline void
+xlog_ins_ticketq(struct xlog_ticket *q, struct xlog_ticket *tic)
+{							\
+	if (q) {					\
+		(tic)->t_next	    = (q);		\
+		(tic)->t_prev	    = (q)->t_prev;	\
+		(q)->t_prev->t_next = (tic);		\
+		(q)->t_prev	    = (tic);		\
+	} else {					\
+		(tic)->t_prev = (tic)->t_next = (tic);	\
+		(q) = (tic);				\
+	}						\
+	(tic)->t_flags |= XLOG_TIC_IN_Q;		\
+}
+
+#define XLOG_DEL_TICKETQ(q, tic)	xlog_del_ticketq(q, tic)
+static inline void
+xlog_del_ticketq(struct xlog_ticket *q, struct xlog_ticket *tic)
+{							\
+	if ((tic) == (tic)->t_next) {			\
+		(q) = NULL;				\
+	} else {					\
+		(q) = (tic)->t_next;			\
+		(tic)->t_next->t_prev = (tic)->t_prev;	\
+		(tic)->t_prev->t_next = (tic)->t_next;	\
+	}						\
+	(tic)->t_next = (tic)->t_prev = NULL;		\
+	(tic)->t_flags &= ~XLOG_TIC_IN_Q;		\
+}
 
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0aac28ddb81..8ab7df76806 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1,66 +1,51 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_sb.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_attr_sf.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_imap.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc_btree.h"
+#include "xfs_inode_item.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_log_recover.h"
 #include "xfs_extfree_item.h"
 #include "xfs_trans_priv.h"
-#include "xfs_bit.h"
 #include "xfs_quota.h"
 #include "xfs_rw.h"
 
@@ -1387,7 +1372,7 @@ xlog_recover_add_to_cont_trans(
 	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
 
-	ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
+	ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
 	memcpy(&ptr[old_len], dp, len); /* d, s, l */
 	item->ri_buf[item->ri_cnt-1].i_len += len;
 	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -2013,79 +1998,74 @@ xfs_qm_dqcheck(
 	 * This is all fine; things are still consistent, and we haven't lost
 	 * any quota information. Just don't complain about bad dquot blks.
 	 */
-	if (INT_GET(ddq->d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) {
+	if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
 			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
-			str, id,
-			INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_MAGIC);
+			str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
 		errs++;
 	}
-	if (INT_GET(ddq->d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) {
+	if (ddq->d_version != XFS_DQUOT_VERSION) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
 			"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
-			str, id,
-			INT_GET(ddq->d_magic, ARCH_CONVERT), XFS_DQUOT_VERSION);
+			str, id, ddq->d_version, XFS_DQUOT_VERSION);
 		errs++;
 	}
 
-	if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER &&
-	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_PROJ &&
-	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) {
+	if (ddq->d_flags != XFS_DQ_USER &&
+	    ddq->d_flags != XFS_DQ_PROJ &&
+	    ddq->d_flags != XFS_DQ_GROUP) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
 			"%s : XFS dquot ID 0x%x, unknown flags 0x%x",
-			str, id, INT_GET(ddq->d_flags, ARCH_CONVERT));
+			str, id, ddq->d_flags);
 		errs++;
 	}
 
-	if (id != -1 && id != INT_GET(ddq->d_id, ARCH_CONVERT)) {
+	if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
 			"%s : ondisk-dquot 0x%p, ID mismatch: "
 			"0x%x expected, found id 0x%x",
-			str, ddq, id, INT_GET(ddq->d_id, ARCH_CONVERT));
+			str, ddq, id, be32_to_cpu(ddq->d_id));
 		errs++;
 	}
 
 	if (!errs && ddq->d_id) {
-		if (INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT) &&
-		    INT_GET(ddq->d_bcount, ARCH_CONVERT) >=
-				INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT)) {
+		if (ddq->d_blk_softlimit &&
+		    be64_to_cpu(ddq->d_bcount) >=
+				be64_to_cpu(ddq->d_blk_softlimit)) {
 			if (!ddq->d_btimer) {
 				if (flags & XFS_QMOPT_DOWARN)
 					cmn_err(CE_ALERT,
 					"%s : Dquot ID 0x%x (0x%p) "
 					"BLK TIMER NOT STARTED",
-					str, (int)
-					INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
+					str, (int)be32_to_cpu(ddq->d_id), ddq);
 				errs++;
 			}
 		}
-		if (INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT) &&
-		    INT_GET(ddq->d_icount, ARCH_CONVERT) >=
-				INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT)) {
+		if (ddq->d_ino_softlimit &&
+		    be64_to_cpu(ddq->d_icount) >=
+				be64_to_cpu(ddq->d_ino_softlimit)) {
 			if (!ddq->d_itimer) {
 				if (flags & XFS_QMOPT_DOWARN)
 					cmn_err(CE_ALERT,
 					"%s : Dquot ID 0x%x (0x%p) "
 					"INODE TIMER NOT STARTED",
-					str, (int)
-					INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
+					str, (int)be32_to_cpu(ddq->d_id), ddq);
 				errs++;
 			}
 		}
-		if (INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT) &&
-		    INT_GET(ddq->d_rtbcount, ARCH_CONVERT) >=
-				INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT)) {
+		if (ddq->d_rtb_softlimit &&
+		    be64_to_cpu(ddq->d_rtbcount) >=
+				be64_to_cpu(ddq->d_rtb_softlimit)) {
 			if (!ddq->d_rtbtimer) {
 				if (flags & XFS_QMOPT_DOWARN)
 					cmn_err(CE_ALERT,
 					"%s : Dquot ID 0x%x (0x%p) "
 					"RTBLK TIMER NOT STARTED",
-					str, (int)
-					INT_GET(ddq->d_id, ARCH_CONVERT), ddq);
+					str, (int)be32_to_cpu(ddq->d_id), ddq);
 				errs++;
 			}
 		}
@@ -2103,10 +2083,11 @@ xfs_qm_dqcheck(
 	ASSERT(id != -1);
 	ASSERT(flags & XFS_QMOPT_DQREPAIR);
 	memset(d, 0, sizeof(xfs_dqblk_t));
-	INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
-	INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
-	INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
-	INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
+
+	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+	d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+	d->dd_diskdq.d_flags = type;
+	d->dd_diskdq.d_id = cpu_to_be32(id);
 
 	return errs;
 }
@@ -2226,8 +2207,9 @@ xlog_recover_do_buffer_trans(
 		break;
 	default:
 		xfs_fs_cmn_err(CE_ALERT, log->l_mp,
-			"xfs_log_recover: unknown buffer type 0x%x, dev %s",
-			buf_f->blf_type, XFS_BUFTARG_NAME(log->l_targ));
+			"xfs_log_recover: unknown buffer type 0x%x, logdev %s",
+			buf_f->blf_type, log->l_mp->m_logname ?
+			log->l_mp->m_logname : "internal");
 		XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
 				 XFS_ERRLEVEL_LOW, log->l_mp);
 		return XFS_ERROR(EFSCORRUPTED);
@@ -3178,13 +3160,12 @@ xlog_recover_clear_agi_bucket(
 	}
 
 	agi = XFS_BUF_TO_AGI(agibp);
-	if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC) {
+	if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) {
 		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
 		return;
 	}
-	ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
 
-	INT_SET(agi->agi_unlinked[bucket], ARCH_CONVERT, NULLAGINO);
+	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
 	offset = offsetof(xfs_agi_t, agi_unlinked) +
 		 (sizeof(xfs_agino_t) * bucket);
 	xfs_trans_log_buf(tp, agibp, offset,
@@ -3243,12 +3224,11 @@ xlog_recover_process_iunlinks(
 				XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)));
 		}
 		agi = XFS_BUF_TO_AGI(agibp);
-		ASSERT(XFS_AGI_MAGIC ==
-			INT_GET(agi->agi_magicnum, ARCH_CONVERT));
+		ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
 
 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
 
-			agino = INT_GET(agi->agi_unlinked[bucket], ARCH_CONVERT);
+			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
 			while (agino != NULLAGINO) {
 
 				/*
@@ -3336,8 +3316,8 @@ xlog_recover_process_iunlinks(
 							XFS_AGI_DADDR(mp)));
 				}
 				agi = XFS_BUF_TO_AGI(agibp);
-				ASSERT(XFS_AGI_MAGIC == INT_GET(
-					agi->agi_magicnum, ARCH_CONVERT));
+				ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
+					agi->agi_magicnum));
 			}
 		}
 
@@ -3938,8 +3918,9 @@ xlog_recover(
 		}
 
 		cmn_err(CE_NOTE,
-			"Starting XFS recovery on filesystem: %s (dev: %s)",
-			log->l_mp->m_fsname, XFS_BUFTARG_NAME(log->l_targ));
+			"Starting XFS recovery on filesystem: %s (logdev: %s)",
+			log->l_mp->m_fsname, log->l_mp->m_logname ?
+			log->l_mp->m_logname : "internal");
 
 		error = xlog_do_recover(log, head_blk, tail_blk);
 		log->l_flags |= XLOG_RECOVERY_NEEDED;
@@ -3987,8 +3968,9 @@ xlog_recover_finish(
 		xlog_recover_check_summary(log);
 
 		cmn_err(CE_NOTE,
-			"Ending XFS recovery on filesystem: %s (dev: %s)",
-			log->l_mp->m_fsname, XFS_BUFTARG_NAME(log->l_targ));
+			"Ending XFS recovery on filesystem: %s (logdev: %s)",
+			log->l_mp->m_fsname, log->l_mp->m_logname ?
+			log->l_mp->m_logname : "internal");
 		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
 	} else {
 		cmn_err(CE_DEBUG,
@@ -4038,14 +4020,12 @@ xlog_recover_check_summary(
 						mp, agfbp, agfdaddr);
 		}
 		agfp = XFS_BUF_TO_AGF(agfbp);
-		ASSERT(XFS_AGF_MAGIC ==
-			INT_GET(agfp->agf_magicnum, ARCH_CONVERT));
-		ASSERT(XFS_AGF_GOOD_VERSION(
-			INT_GET(agfp->agf_versionnum, ARCH_CONVERT)));
-		ASSERT(INT_GET(agfp->agf_seqno, ARCH_CONVERT) == agno);
-
-		freeblks += INT_GET(agfp->agf_freeblks, ARCH_CONVERT) +
-			    INT_GET(agfp->agf_flcount, ARCH_CONVERT);
+		ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum));
+		ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum)));
+		ASSERT(be32_to_cpu(agfp->agf_seqno) == agno);
+
+		freeblks += be32_to_cpu(agfp->agf_freeblks) +
+			    be32_to_cpu(agfp->agf_flcount);
 		xfs_buf_relse(agfbp);
 
 		agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
@@ -4056,14 +4036,12 @@ xlog_recover_check_summary(
 					  mp, agibp, agidaddr);
 		}
 		agip = XFS_BUF_TO_AGI(agibp);
-		ASSERT(XFS_AGI_MAGIC ==
-			INT_GET(agip->agi_magicnum, ARCH_CONVERT));
-		ASSERT(XFS_AGI_GOOD_VERSION(
-			INT_GET(agip->agi_versionnum, ARCH_CONVERT)));
-		ASSERT(INT_GET(agip->agi_seqno, ARCH_CONVERT) == agno);
-
-		itotal += INT_GET(agip->agi_count, ARCH_CONVERT);
-		ifree += INT_GET(agip->agi_freecount, ARCH_CONVERT);
+		ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
+		ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
+		ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
+
+		itotal += be32_to_cpu(agip->agi_count);
+		ifree += be32_to_cpu(agip->agi_freecount);
 		xfs_buf_relse(agibp);
 	}
 
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h
index 42158b442b5..b2254555530 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/xfs_log_recover.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_LOG_RECOVER_H__
 #define __XFS_LOG_RECOVER_H__
diff --git a/fs/xfs/xfs_mac.h b/fs/xfs/xfs_mac.h
index 8d59aaffeb8..18e0e98e03d 100644
--- a/fs/xfs/xfs_mac.h
+++ b/fs/xfs/xfs_mac.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_MAC_H__
 #define __XFS_MAC_H__
diff --git a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c
deleted file mode 100644
index 698c2cd6285..00000000000
--- a/fs/xfs/xfs_macros.c
+++ /dev/null
@@ -1,2141 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#define	XFS_MACRO_C
-
-#include "xfs.h"
-#include "xfs_macros.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_ialloc.h"
-#include "xfs_inode_item.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rw.h"
-#include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_dir_leaf.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
-#include "xfs_bit.h"
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_ISNULLDSTARTBLOCK)
-int
-isnulldstartblock(xfs_dfsbno_t x)
-{
-	return ISNULLDSTARTBLOCK(x);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_ISNULLSTARTBLOCK)
-int
-isnullstartblock(xfs_fsblock_t x)
-{
-	return ISNULLSTARTBLOCK(x);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_NULLSTARTBLOCK)
-xfs_fsblock_t
-nullstartblock(int k)
-{
-	return NULLSTARTBLOCK(k);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_STARTBLOCKVAL)
-xfs_filblks_t
-startblockval(xfs_fsblock_t x)
-{
-	return STARTBLOCKVAL(x);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AG_CHECK_DADDR)
-void
-xfs_ag_check_daddr(xfs_mount_t *mp, xfs_daddr_t d, xfs_extlen_t len)
-{
-	XFS_AG_CHECK_DADDR(mp, d, len);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AG_DADDR)
-xfs_daddr_t
-xfs_ag_daddr(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_daddr_t d)
-{
-	return XFS_AG_DADDR(mp, agno, d);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AG_MAXLEVELS)
-int
-xfs_ag_maxlevels(xfs_mount_t *mp)
-{
-	return XFS_AG_MAXLEVELS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGB_TO_DADDR)
-xfs_daddr_t
-xfs_agb_to_daddr(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
-{
-	return XFS_AGB_TO_DADDR(mp, agno, agbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGB_TO_FSB)
-xfs_fsblock_t
-xfs_agb_to_fsb(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
-{
-	return XFS_AGB_TO_FSB(mp, agno, agbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGBLOCK_MAX)
-xfs_agblock_t
-xfs_agblock_max(xfs_agblock_t a, xfs_agblock_t b)
-{
-	return XFS_AGBLOCK_MAX(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGBLOCK_MIN)
-xfs_agblock_t
-xfs_agblock_min(xfs_agblock_t a, xfs_agblock_t b)
-{
-	return XFS_AGBLOCK_MIN(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGF_BLOCK)
-xfs_agblock_t
-xfs_agf_block(xfs_mount_t *mp)
-{
-	return XFS_AGF_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGF_GOOD_VERSION)
-int
-xfs_agf_good_version(unsigned v)
-{
-	return XFS_AGF_GOOD_VERSION(v);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGFL_BLOCK)
-xfs_agblock_t
-xfs_agfl_block(xfs_mount_t *mp)
-{
-	return XFS_AGFL_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGI_BLOCK)
-xfs_agblock_t
-xfs_agi_block(xfs_mount_t *mp)
-{
-	return XFS_AGI_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGI_GOOD_VERSION)
-int
-xfs_agi_good_version(unsigned v)
-{
-	return XFS_AGI_GOOD_VERSION(v);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGINO_TO_AGBNO)
-xfs_agblock_t
-xfs_agino_to_agbno(xfs_mount_t *mp, xfs_agino_t i)
-{
-	return XFS_AGINO_TO_AGBNO(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGINO_TO_INO)
-xfs_ino_t
-xfs_agino_to_ino(xfs_mount_t *mp, xfs_agnumber_t a, xfs_agino_t i)
-{
-	return XFS_AGINO_TO_INO(mp, a, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_AGINO_TO_OFFSET)
-int
-xfs_agino_to_offset(xfs_mount_t *mp, xfs_agino_t i)
-{
-	return XFS_AGINO_TO_OFFSET(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_BLOCK_MAXRECS)
-int
-xfs_alloc_block_maxrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_BLOCK_MAXRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_BLOCK_MINRECS)
-int
-xfs_alloc_block_minrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_BLOCK_MINRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_BLOCK_SIZE)
-/*ARGSUSED1*/
-int
-xfs_alloc_block_size(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_BLOCK_SIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_KEY_ADDR)
-/*ARGSUSED3*/
-xfs_alloc_key_t *
-xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_KEY_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_PTR_ADDR)
-xfs_alloc_ptr_t *
-xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_PTR_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ALLOC_REC_ADDR)
-/*ARGSUSED3*/
-xfs_alloc_rec_t *
-xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_ALLOC_REC_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL)
-int
-xfs_attr_leaf_entsize_local(int nlen, int vlen)
-{
-	return XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen, vlen);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX)
-int
-xfs_attr_leaf_entsize_local_max(int bsize)
-{
-	return XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_ENTSIZE_REMOTE)
-int
-xfs_attr_leaf_entsize_remote(int nlen)
-{
-	return XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_NAME)
-char *
-xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
-{
-	return XFS_ATTR_LEAF_NAME(leafp, idx);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_NAME_LOCAL)
-xfs_attr_leaf_name_local_t *
-xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
-{
-	return XFS_ATTR_LEAF_NAME_LOCAL(leafp, idx);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_LEAF_NAME_REMOTE)
-xfs_attr_leaf_name_remote_t *
-xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
-{
-	return XFS_ATTR_LEAF_NAME_REMOTE(leafp, idx);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_SF_ENTSIZE)
-int
-xfs_attr_sf_entsize(xfs_attr_sf_entry_t *sfep)
-{
-	return XFS_ATTR_SF_ENTSIZE(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_SF_ENTSIZE_BYNAME)
-int
-xfs_attr_sf_entsize_byname(int nlen, int vlen)
-{
-	return XFS_ATTR_SF_ENTSIZE_BYNAME(nlen, vlen);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_SF_NEXTENTRY)
-xfs_attr_sf_entry_t *
-xfs_attr_sf_nextentry(xfs_attr_sf_entry_t *sfep)
-{
-	return XFS_ATTR_SF_NEXTENTRY(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ATTR_SF_TOTSIZE)
-int
-xfs_attr_sf_totsize(xfs_inode_t *dp)
-{
-	return XFS_ATTR_SF_TOTSIZE(dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BHVTOI)
-xfs_inode_t *
-xfs_bhvtoi(bhv_desc_t *bhvp)
-{
-	return XFS_BHVTOI(bhvp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BHVTOM)
-xfs_mount_t *
-xfs_bhvtom(bhv_desc_t *bdp)
-{
-	return XFS_BHVTOM(bdp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_VFSTOM)
-xfs_mount_t *
-xfs_vfstom(vfs_t *vfs)
-{
-	return XFS_VFSTOM(vfs);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BM_MAXLEVELS)
-int
-xfs_bm_maxlevels(xfs_mount_t *mp, int w)
-{
-	return XFS_BM_MAXLEVELS(mp, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_DMAXRECS)
-int
-xfs_bmap_block_dmaxrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_DMAXRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_DMINRECS)
-int
-xfs_bmap_block_dminrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_DMINRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_DSIZE)
-int
-xfs_bmap_block_dsize(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_DSIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_IMAXRECS)
-int
-xfs_bmap_block_imaxrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_IMAXRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_IMINRECS)
-int
-xfs_bmap_block_iminrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_IMINRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BLOCK_ISIZE)
-int
-xfs_bmap_block_isize(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_BLOCK_ISIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_KEY_ADDR)
-/*ARGSUSED3*/
-xfs_bmbt_key_t *
-xfs_bmap_broot_key_addr(xfs_bmbt_block_t *bb, int i, int sz)
-{
-	return XFS_BMAP_BROOT_KEY_ADDR(bb, i, sz);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_MAXRECS)
-int
-xfs_bmap_broot_maxrecs(int sz)
-{
-	return XFS_BMAP_BROOT_MAXRECS(sz);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_NUMRECS)
-int
-xfs_bmap_broot_numrecs(xfs_bmdr_block_t *bb)
-{
-	return XFS_BMAP_BROOT_NUMRECS(bb);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_PTR_ADDR)
-xfs_bmbt_ptr_t *
-xfs_bmap_broot_ptr_addr(xfs_bmbt_block_t *bb, int i, int sz)
-{
-	return XFS_BMAP_BROOT_PTR_ADDR(bb, i, sz);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_REC_ADDR)
-/*ARGSUSED3*/
-xfs_bmbt_rec_t *
-xfs_bmap_broot_rec_addr(xfs_bmbt_block_t *bb, int i, int sz)
-{
-	return XFS_BMAP_BROOT_REC_ADDR(bb, i, sz);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_SPACE)
-int
-xfs_bmap_broot_space(xfs_bmdr_block_t *bb)
-{
-	return XFS_BMAP_BROOT_SPACE(bb);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_BROOT_SPACE_CALC)
-int
-xfs_bmap_broot_space_calc(int nrecs)
-{
-	return XFS_BMAP_BROOT_SPACE_CALC(nrecs);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_IBLOCK_SIZE)
-/*ARGSUSED1*/
-int
-xfs_bmap_iblock_size(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_IBLOCK_SIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_INIT)
-void
-xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
-{
-	XFS_BMAP_INIT(flp, fbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_KEY_DADDR)
-/*ARGSUSED3*/
-xfs_bmbt_key_t *
-xfs_bmap_key_daddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_KEY_DADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_KEY_IADDR)
-/*ARGSUSED3*/
-xfs_bmbt_key_t *
-xfs_bmap_key_iaddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_KEY_IADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_PTR_DADDR)
-xfs_bmbt_ptr_t *
-xfs_bmap_ptr_daddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_PTR_DADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_PTR_IADDR)
-xfs_bmbt_ptr_t *
-xfs_bmap_ptr_iaddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_PTR_IADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_RBLOCK_DSIZE)
-/*ARGSUSED1*/
-int
-xfs_bmap_rblock_dsize(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_RBLOCK_DSIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_RBLOCK_ISIZE)
-/*ARGSUSED1*/
-int
-xfs_bmap_rblock_isize(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_RBLOCK_ISIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_REC_DADDR)
-/*ARGSUSED3*/
-xfs_bmbt_rec_t *
-xfs_bmap_rec_daddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_REC_DADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_REC_IADDR)
-/*ARGSUSED3*/
-xfs_bmbt_rec_t *
-xfs_bmap_rec_iaddr(xfs_bmbt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_BMAP_REC_IADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAP_SANITY_CHECK)
-int
-xfs_bmap_sanity_check(xfs_mount_t *mp, xfs_bmbt_block_t *bb, int level)
-{
-	return XFS_BMAP_SANITY_CHECK(mp, bb, level);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMAPI_AFLAG)
-int
-xfs_bmapi_aflag(int w)
-{
-	return XFS_BMAPI_AFLAG(w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BMDR_SPACE_CALC)
-int
-xfs_bmdr_space_calc(int nrecs)
-{
-	return XFS_BMDR_SPACE_CALC(nrecs);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BNO_BLOCK)
-xfs_agblock_t
-xfs_bno_block(xfs_mount_t *mp)
-{
-	return XFS_BNO_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BTREE_LONG_PTRS)
-int
-xfs_btree_long_ptrs(xfs_btnum_t btnum)
-{
-	return XFS_BTREE_LONG_PTRS(btnum);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_AGF)
-xfs_agf_t *
-xfs_buf_to_agf(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_AGF(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_AGFL)
-xfs_agfl_t *
-xfs_buf_to_agfl(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_AGFL(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_AGI)
-xfs_agi_t *
-xfs_buf_to_agi(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_AGI(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_ALLOC_BLOCK)
-xfs_alloc_block_t *
-xfs_buf_to_alloc_block(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_ALLOC_BLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_BLOCK)
-xfs_btree_block_t *
-xfs_buf_to_block(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_BLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_BMBT_BLOCK)
-xfs_bmbt_block_t *
-xfs_buf_to_bmbt_block(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_BMBT_BLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_DINODE)
-xfs_dinode_t *
-xfs_buf_to_dinode(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_DINODE(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_INOBT_BLOCK)
-xfs_inobt_block_t *
-xfs_buf_to_inobt_block(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_INOBT_BLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_LBLOCK)
-xfs_btree_lblock_t *
-xfs_buf_to_lblock(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_LBLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_SBLOCK)
-xfs_btree_sblock_t *
-xfs_buf_to_sblock(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_SBLOCK(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_BUF_TO_SBP)
-xfs_sb_t *
-xfs_buf_to_sbp(xfs_buf_t *bp)
-{
-	return XFS_BUF_TO_SBP(bp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_ASIZE)
-int
-xfs_cfork_asize_disk(xfs_dinode_core_t *dcp, xfs_mount_t *mp)
-{
-	return XFS_CFORK_ASIZE_DISK(dcp, mp);
-}
-int
-xfs_cfork_asize(xfs_dinode_core_t *dcp, xfs_mount_t *mp)
-{
-	return XFS_CFORK_ASIZE(dcp, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_BOFF)
-int
-xfs_cfork_boff_disk(xfs_dinode_core_t *dcp)
-{
-	return XFS_CFORK_BOFF_DISK(dcp);
-}
-int
-xfs_cfork_boff(xfs_dinode_core_t *dcp)
-{
-	return XFS_CFORK_BOFF(dcp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_DSIZE)
-int
-xfs_cfork_dsize_disk(xfs_dinode_core_t *dcp, xfs_mount_t *mp)
-{
-	return XFS_CFORK_DSIZE_DISK(dcp, mp);
-}
-int
-xfs_cfork_dsize(xfs_dinode_core_t *dcp, xfs_mount_t *mp)
-{
-	return XFS_CFORK_DSIZE(dcp, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_FMT_SET)
-void
-xfs_cfork_fmt_set(xfs_dinode_core_t *dcp, int w, int n)
-{
-	XFS_CFORK_FMT_SET(dcp, w, n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_FORMAT)
-int
-xfs_cfork_format(xfs_dinode_core_t *dcp, int w)
-{
-	return XFS_CFORK_FORMAT(dcp, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_NEXT_SET)
-void
-xfs_cfork_next_set(xfs_dinode_core_t *dcp, int w, int n)
-{
-	XFS_CFORK_NEXT_SET(dcp, w, n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_NEXTENTS)
-int
-xfs_cfork_nextents_disk(xfs_dinode_core_t *dcp, int w)
-{
-	return XFS_CFORK_NEXTENTS_DISK(dcp, w);
-}
-int
-xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w)
-{
-	return XFS_CFORK_NEXTENTS(dcp, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_Q)
-int
-xfs_cfork_q_disk(xfs_dinode_core_t *dcp)
-{
-	return XFS_CFORK_Q_DISK(dcp);
-}
-int
-xfs_cfork_q(xfs_dinode_core_t *dcp)
-{
-	return XFS_CFORK_Q(dcp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CFORK_SIZE)
-int
-xfs_cfork_size_disk(xfs_dinode_core_t *dcp, xfs_mount_t *mp, int w)
-{
-	return XFS_CFORK_SIZE_DISK(dcp, mp, w);
-}
-int
-xfs_cfork_size(xfs_dinode_core_t *dcp, xfs_mount_t *mp, int w)
-{
-	return XFS_CFORK_SIZE(dcp, mp, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_CNT_BLOCK)
-xfs_agblock_t
-xfs_cnt_block(xfs_mount_t *mp)
-{
-	return XFS_CNT_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DA_COOKIE_BNO)
-xfs_dablk_t
-xfs_da_cookie_bno(xfs_mount_t *mp, xfs_off_t cookie)
-{
-	return XFS_DA_COOKIE_BNO(mp, cookie);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DA_COOKIE_ENTRY)
-int
-xfs_da_cookie_entry(xfs_mount_t *mp, xfs_off_t cookie)
-{
-	return XFS_DA_COOKIE_ENTRY(mp, cookie);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DA_COOKIE_HASH)
-/*ARGSUSED1*/
-xfs_dahash_t
-xfs_da_cookie_hash(xfs_mount_t *mp, xfs_off_t cookie)
-{
-	return XFS_DA_COOKIE_HASH(mp, cookie);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DA_MAKE_BNOENTRY)
-__uint32_t
-xfs_da_make_bnoentry(xfs_mount_t *mp, xfs_dablk_t bno, int entry)
-{
-	return XFS_DA_MAKE_BNOENTRY(mp, bno, entry);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DA_MAKE_COOKIE)
-xfs_off_t
-xfs_da_make_cookie(xfs_mount_t *mp, xfs_dablk_t bno, int entry,
-		   xfs_dahash_t hash)
-{
-	return XFS_DA_MAKE_COOKIE(mp, bno, entry, hash);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DADDR_TO_AGBNO)
-xfs_agblock_t
-xfs_daddr_to_agbno(xfs_mount_t *mp, xfs_daddr_t d)
-{
-	return XFS_DADDR_TO_AGBNO(mp, d);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DADDR_TO_AGNO)
-xfs_agnumber_t
-xfs_daddr_to_agno(xfs_mount_t *mp, xfs_daddr_t d)
-{
-	return XFS_DADDR_TO_AGNO(mp, d);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DADDR_TO_FSB)
-xfs_fsblock_t
-xfs_daddr_to_fsb(xfs_mount_t *mp, xfs_daddr_t d)
-{
-	return XFS_DADDR_TO_FSB(mp, d);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_APTR)
-char *
-xfs_dfork_aptr(xfs_dinode_t *dip)
-{
-	return XFS_DFORK_APTR(dip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_ASIZE)
-int
-xfs_dfork_asize(xfs_dinode_t *dip, xfs_mount_t *mp)
-{
-	return XFS_DFORK_ASIZE(dip, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_BOFF)
-int
-xfs_dfork_boff(xfs_dinode_t *dip)
-{
-	return XFS_DFORK_BOFF(dip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_DPTR)
-char *
-xfs_dfork_dptr(xfs_dinode_t *dip)
-{
-	return XFS_DFORK_DPTR(dip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_DSIZE)
-int
-xfs_dfork_dsize(xfs_dinode_t *dip, xfs_mount_t *mp)
-{
-	return XFS_DFORK_DSIZE(dip, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_NEXTENTS)
-int
-xfs_dfork_nextents(xfs_dinode_t *dip, int w)
-{
-	return XFS_DFORK_NEXTENTS(dip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_PTR)
-char *
-xfs_dfork_ptr(xfs_dinode_t *dip, int w)
-{
-	return XFS_DFORK_PTR(dip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_Q)
-int
-xfs_dfork_q(xfs_dinode_t *dip)
-{
-	return XFS_DFORK_Q(dip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DFORK_SIZE)
-int
-xfs_dfork_size(xfs_dinode_t *dip, xfs_mount_t *mp, int w)
-{
-	return XFS_DFORK_SIZE(dip, mp, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DINODE_GOOD_VERSION)
-int
-xfs_dinode_good_version(int v)
-{
-	return XFS_DINODE_GOOD_VERSION(v);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYENTRY)
-int
-xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry)
-{
-	return XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYNAME)
-int
-xfs_dir_leaf_entsize_byname(int len)
-{
-	return XFS_DIR_LEAF_ENTSIZE_BYNAME(len);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_LEAF_NAMESTRUCT)
-xfs_dir_leaf_name_t *
-xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset)
-{
-	return XFS_DIR_LEAF_NAMESTRUCT(leafp, offset);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_ALLFIT)
-int
-xfs_dir_sf_allfit(int count, int totallen)
-{
-	return XFS_DIR_SF_ALLFIT(count, totallen);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_ENTSIZE_BYENTRY)
-int
-xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep)
-{
-	return XFS_DIR_SF_ENTSIZE_BYENTRY(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_ENTSIZE_BYNAME)
-int
-xfs_dir_sf_entsize_byname(int len)
-{
-	return XFS_DIR_SF_ENTSIZE_BYNAME(len);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_GET_DIRINO)
-void
-xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to)
-{
-	XFS_DIR_SF_GET_DIRINO(from, to);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_NEXTENTRY)
-xfs_dir_sf_entry_t *
-xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep)
-{
-	return XFS_DIR_SF_NEXTENTRY(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR_SF_PUT_DIRINO)
-void
-xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to)
-{
-	XFS_DIR_SF_PUT_DIRINO(from, to);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BLOCK_LEAF_P)
-xfs_dir2_leaf_entry_t *
-xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
-{
-	return XFS_DIR2_BLOCK_LEAF_P(btp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BLOCK_TAIL_P)
-xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(xfs_mount_t *mp, xfs_dir2_block_t *block)
-{
-	return XFS_DIR2_BLOCK_TAIL_P(mp, block);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BYTE_TO_DA)
-xfs_dablk_t
-xfs_dir2_byte_to_da(xfs_mount_t *mp, xfs_dir2_off_t by)
-{
-	return XFS_DIR2_BYTE_TO_DA(mp, by);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BYTE_TO_DATAPTR)
-/* ARGSUSED */
-xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(xfs_mount_t *mp, xfs_dir2_off_t by)
-{
-	return XFS_DIR2_BYTE_TO_DATAPTR(mp, by);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BYTE_TO_DB)
-xfs_dir2_db_t
-xfs_dir2_byte_to_db(xfs_mount_t *mp, xfs_dir2_off_t by)
-{
-	return XFS_DIR2_BYTE_TO_DB(mp, by);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_BYTE_TO_OFF)
-xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(xfs_mount_t *mp, xfs_dir2_off_t by)
-{
-	return XFS_DIR2_BYTE_TO_OFF(mp, by);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DA_TO_BYTE)
-xfs_dir2_off_t
-xfs_dir2_da_to_byte(xfs_mount_t *mp, xfs_dablk_t da)
-{
-	return XFS_DIR2_DA_TO_BYTE(mp, da);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DA_TO_DB)
-xfs_dir2_db_t
-xfs_dir2_da_to_db(xfs_mount_t *mp, xfs_dablk_t da)
-{
-	return XFS_DIR2_DA_TO_DB(mp, da);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATA_ENTRY_TAG_P)
-xfs_dir2_data_off_t *
-xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
-{
-	return XFS_DIR2_DATA_ENTRY_TAG_P(dep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATA_ENTSIZE)
-int
-xfs_dir2_data_entsize(int n)
-{
-	return XFS_DIR2_DATA_ENTSIZE(n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATA_UNUSED_TAG_P)
-xfs_dir2_data_off_t *
-xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
-{
-	return XFS_DIR2_DATA_UNUSED_TAG_P(dup);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATAPTR_TO_BYTE)
-/* ARGSUSED */
-xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(xfs_mount_t *mp, xfs_dir2_dataptr_t dp)
-{
-	return XFS_DIR2_DATAPTR_TO_BYTE(mp, dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATAPTR_TO_DB)
-xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(xfs_mount_t *mp, xfs_dir2_dataptr_t dp)
-{
-	return XFS_DIR2_DATAPTR_TO_DB(mp, dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DATAPTR_TO_OFF)
-xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(xfs_mount_t *mp, xfs_dir2_dataptr_t dp)
-{
-	return XFS_DIR2_DATAPTR_TO_OFF(mp, dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DB_OFF_TO_BYTE)
-xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(xfs_mount_t *mp, xfs_dir2_db_t db,
-			xfs_dir2_data_aoff_t o)
-{
-	return XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DB_OFF_TO_DATAPTR)
-xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(xfs_mount_t *mp, xfs_dir2_db_t db,
-			   xfs_dir2_data_aoff_t o)
-{
-	return XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db, o);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DB_TO_DA)
-xfs_dablk_t
-xfs_dir2_db_to_da(xfs_mount_t *mp, xfs_dir2_db_t db)
-{
-	return XFS_DIR2_DB_TO_DA(mp, db);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DB_TO_FDB)
-xfs_dir2_db_t
-xfs_dir2_db_to_fdb(xfs_mount_t *mp, xfs_dir2_db_t db)
-{
-	return XFS_DIR2_DB_TO_FDB(mp, db);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_DB_TO_FDINDEX)
-int
-xfs_dir2_db_to_fdindex(xfs_mount_t *mp, xfs_dir2_db_t db)
-{
-	return XFS_DIR2_DB_TO_FDINDEX(mp, db);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_LEAF_BESTS_P)
-xfs_dir2_data_off_t *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
-{
-	return XFS_DIR2_LEAF_BESTS_P(ltp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_LEAF_TAIL_P)
-xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(xfs_mount_t *mp, xfs_dir2_leaf_t *lp)
-{
-	return XFS_DIR2_LEAF_TAIL_P(mp, lp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_MAX_LEAF_ENTS)
-int
-xfs_dir2_max_leaf_ents(xfs_mount_t *mp)
-{
-	return XFS_DIR2_MAX_LEAF_ENTS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_ENTSIZE_BYENTRY)
-int
-xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-	return XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_FIRSTENTRY)
-xfs_dir2_sf_entry_t *
-xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
-{
-	return XFS_DIR2_SF_FIRSTENTRY(sfp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_ENTSIZE_BYNAME)
-int
-xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
-{
-	return XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, len);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_GET_INUMBER)
-xfs_intino_t
-xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
-{
-	return XFS_DIR2_SF_GET_INUMBER(sfp, from);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_GET_OFFSET)
-xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
-	return XFS_DIR2_SF_GET_OFFSET(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_HDR_SIZE)
-int
-xfs_dir2_sf_hdr_size(int i8count)
-{
-	return XFS_DIR2_SF_HDR_SIZE(i8count);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_INUMBERP)
-xfs_dir2_inou_t *
-xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
-{
-	return XFS_DIR2_SF_INUMBERP(sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_NEXTENTRY)
-xfs_dir2_sf_entry_t *
-xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-	return XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_PUT_INUMBER)
-void
-xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, xfs_dir2_inou_t *to)
-{
-	XFS_DIR2_SF_PUT_INUMBER(sfp, from, to);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_DIR2_SF_PUT_OFFSET)
-void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
-	XFS_DIR2_SF_PUT_OFFSET(sfep, off);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_EXTFMT_INODE )
-xfs_exntfmt_t
-xfs_extfmt_inode(struct xfs_inode *ip)
-{
-	return XFS_EXTFMT_INODE(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_EXTLEN_MAX)
-xfs_extlen_t
-xfs_extlen_max(xfs_extlen_t a, xfs_extlen_t b)
-{
-	return XFS_EXTLEN_MAX(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_EXTLEN_MIN)
-xfs_extlen_t
-xfs_extlen_min(xfs_extlen_t a, xfs_extlen_t b)
-{
-	return XFS_EXTLEN_MIN(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FILBLKS_MAX)
-xfs_filblks_t
-xfs_filblks_max(xfs_filblks_t a, xfs_filblks_t b)
-{
-	return XFS_FILBLKS_MAX(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FILBLKS_MIN)
-xfs_filblks_t
-xfs_filblks_min(xfs_filblks_t a, xfs_filblks_t b)
-{
-	return XFS_FILBLKS_MIN(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FILEOFF_MAX)
-xfs_fileoff_t
-xfs_fileoff_max(xfs_fileoff_t a, xfs_fileoff_t b)
-{
-	return XFS_FILEOFF_MAX(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FILEOFF_MIN)
-xfs_fileoff_t
-xfs_fileoff_min(xfs_fileoff_t a, xfs_fileoff_t b)
-{
-	return XFS_FILEOFF_MIN(a, b);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FSB_SANITY_CHECK)
-int
-xfs_fsb_sanity_check(xfs_mount_t *mp, xfs_fsblock_t fsbno)
-{
-	return XFS_FSB_SANITY_CHECK(mp, fsbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FSB_TO_AGBNO)
-xfs_agblock_t
-xfs_fsb_to_agbno(xfs_mount_t *mp, xfs_fsblock_t fsbno)
-{
-	return XFS_FSB_TO_AGBNO(mp, fsbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FSB_TO_AGNO)
-xfs_agnumber_t
-xfs_fsb_to_agno(xfs_mount_t *mp, xfs_fsblock_t fsbno)
-{
-	return XFS_FSB_TO_AGNO(mp, fsbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FSB_TO_DADDR)
-xfs_daddr_t
-xfs_fsb_to_daddr(xfs_mount_t *mp, xfs_fsblock_t fsbno)
-{
-	return XFS_FSB_TO_DADDR(mp, fsbno);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_FSB_TO_DB)
-xfs_daddr_t
-xfs_fsb_to_db(xfs_inode_t *ip, xfs_fsblock_t fsb)
-{
-	return XFS_FSB_TO_DB(ip, fsb);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_HDR_BLOCK)
-xfs_agblock_t
-xfs_hdr_block(xfs_mount_t *mp, xfs_daddr_t d)
-{
-	return XFS_HDR_BLOCK(mp, d);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IALLOC_BLOCKS)
-xfs_extlen_t
-xfs_ialloc_blocks(xfs_mount_t *mp)
-{
-	return XFS_IALLOC_BLOCKS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IALLOC_FIND_FREE)
-int
-xfs_ialloc_find_free(xfs_inofree_t *fp)
-{
-	return XFS_IALLOC_FIND_FREE(fp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IALLOC_INODES)
-int
-xfs_ialloc_inodes(xfs_mount_t *mp)
-{
-	return XFS_IALLOC_INODES(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IBT_BLOCK)
-xfs_agblock_t
-xfs_ibt_block(xfs_mount_t *mp)
-{
-	return XFS_IBT_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_ASIZE)
-int
-xfs_ifork_asize(xfs_inode_t *ip)
-{
-	return XFS_IFORK_ASIZE(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_DSIZE)
-int
-xfs_ifork_dsize(xfs_inode_t *ip)
-{
-	return XFS_IFORK_DSIZE(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_FMT_SET)
-void
-xfs_ifork_fmt_set(xfs_inode_t *ip, int w, int n)
-{
-	XFS_IFORK_FMT_SET(ip, w, n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_FORMAT)
-int
-xfs_ifork_format(xfs_inode_t *ip, int w)
-{
-	return XFS_IFORK_FORMAT(ip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_NEXT_SET)
-void
-xfs_ifork_next_set(xfs_inode_t *ip, int w, int n)
-{
-	XFS_IFORK_NEXT_SET(ip, w, n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_NEXTENTS)
-int
-xfs_ifork_nextents(xfs_inode_t *ip, int w)
-{
-	return XFS_IFORK_NEXTENTS(ip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_PTR)
-xfs_ifork_t *
-xfs_ifork_ptr(xfs_inode_t *ip, int w)
-{
-	return XFS_IFORK_PTR(ip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_Q)
-int
-xfs_ifork_q(xfs_inode_t *ip)
-{
-	return XFS_IFORK_Q(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IFORK_SIZE)
-int
-xfs_ifork_size(xfs_inode_t *ip, int w)
-{
-	return XFS_IFORK_SIZE(ip, w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ILOG_FBROOT)
-int
-xfs_ilog_fbroot(int w)
-{
-	return XFS_ILOG_FBROOT(w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ILOG_FDATA)
-int
-xfs_ilog_fdata(int w)
-{
-	return XFS_ILOG_FDATA(w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ILOG_FEXT)
-int
-xfs_ilog_fext(int w)
-{
-	return XFS_ILOG_FEXT(w);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_IN_MAXLEVELS)
-int
-xfs_in_maxlevels(xfs_mount_t *mp)
-{
-	return XFS_IN_MAXLEVELS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_AGBNO_BITS)
-int
-xfs_ino_agbno_bits(xfs_mount_t *mp)
-{
-	return XFS_INO_AGBNO_BITS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_AGINO_BITS)
-int
-xfs_ino_agino_bits(xfs_mount_t *mp)
-{
-	return XFS_INO_AGINO_BITS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_AGNO_BITS)
-int
-xfs_ino_agno_bits(xfs_mount_t *mp)
-{
-	return XFS_INO_AGNO_BITS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_BITS)
-int
-xfs_ino_bits(xfs_mount_t *mp)
-{
-	return XFS_INO_BITS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_MASK)
-__uint32_t
-xfs_ino_mask(int k)
-{
-	return XFS_INO_MASK(k);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_OFFSET_BITS)
-int
-xfs_ino_offset_bits(xfs_mount_t *mp)
-{
-	return XFS_INO_OFFSET_BITS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_TO_AGBNO)
-xfs_agblock_t
-xfs_ino_to_agbno(xfs_mount_t *mp, xfs_ino_t i)
-{
-	return XFS_INO_TO_AGBNO(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_TO_AGINO)
-xfs_agino_t
-xfs_ino_to_agino(xfs_mount_t *mp, xfs_ino_t i)
-{
-	return XFS_INO_TO_AGINO(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_TO_AGNO)
-xfs_agnumber_t
-xfs_ino_to_agno(xfs_mount_t *mp, xfs_ino_t i)
-{
-	return XFS_INO_TO_AGNO(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_TO_FSB)
-xfs_fsblock_t
-xfs_ino_to_fsb(xfs_mount_t *mp, xfs_ino_t i)
-{
-	return XFS_INO_TO_FSB(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INO_TO_OFFSET)
-int
-xfs_ino_to_offset(xfs_mount_t *mp, xfs_ino_t i)
-{
-	return XFS_INO_TO_OFFSET(mp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_BLOCK_MAXRECS)
-int
-xfs_inobt_block_maxrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_BLOCK_MAXRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_BLOCK_MINRECS)
-int
-xfs_inobt_block_minrecs(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_BLOCK_MINRECS(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_BLOCK_SIZE)
-/*ARGSUSED1*/
-int
-xfs_inobt_block_size(int lev, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_BLOCK_SIZE(lev, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_CLR_FREE)
-void
-xfs_inobt_clr_free(xfs_inobt_rec_t *rp, int i)
-{
-	XFS_INOBT_CLR_FREE(rp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_FREE)
-int
-xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i)
-{
-	return XFS_INOBT_IS_FREE(rp, i);
-}
-int
-xfs_inobt_is_free_disk(xfs_inobt_rec_t *rp, int i)
-{
-	return XFS_INOBT_IS_FREE_DISK(rp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_LAST_REC)
-int
-xfs_inobt_is_last_rec(xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_IS_LAST_REC(cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_KEY_ADDR)
-/*ARGSUSED3*/
-xfs_inobt_key_t *
-xfs_inobt_key_addr(xfs_inobt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_KEY_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_MASK)
-xfs_inofree_t
-xfs_inobt_mask(int i)
-{
-	return XFS_INOBT_MASK(i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_MASKN)
-xfs_inofree_t
-xfs_inobt_maskn(int i, int n)
-{
-	return XFS_INOBT_MASKN(i, n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_PTR_ADDR)
-xfs_inobt_ptr_t *
-xfs_inobt_ptr_addr(xfs_inobt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_PTR_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_REC_ADDR)
-/*ARGSUSED3*/
-xfs_inobt_rec_t *
-xfs_inobt_rec_addr(xfs_inobt_block_t *bb, int i, xfs_btree_cur_t *cur)
-{
-	return XFS_INOBT_REC_ADDR(bb, i, cur);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_SET_FREE)
-void
-xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i)
-{
-	XFS_INOBT_SET_FREE(rp, i);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ITOBHV)
-bhv_desc_t *
-xfs_itobhv(xfs_inode_t *ip)
-{
-	return XFS_ITOBHV(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_ITOV)
-vnode_t *
-xfs_itov(xfs_inode_t *ip)
-{
-	return XFS_ITOV(ip);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LBLOG)
-int
-xfs_lblog(xfs_mount_t *mp)
-{
-	return XFS_LBLOG(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LBSIZE)
-int
-xfs_lbsize(xfs_mount_t *mp)
-{
-	return XFS_LBSIZE(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_ALL_FREE)
-void
-xfs_lic_all_free(xfs_log_item_chunk_t *cp)
-{
-	XFS_LIC_ALL_FREE(cp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_ARE_ALL_FREE)
-int
-xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
-{
-	return XFS_LIC_ARE_ALL_FREE(cp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_CLAIM)
-void
-xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
-{
-	XFS_LIC_CLAIM(cp, slot);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_DESC_TO_CHUNK)
-xfs_log_item_chunk_t *
-xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
-{
-	return XFS_LIC_DESC_TO_CHUNK(dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_DESC_TO_SLOT)
-int
-xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
-{
-	return XFS_LIC_DESC_TO_SLOT(dp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_INIT)
-void
-xfs_lic_init(xfs_log_item_chunk_t *cp)
-{
-	XFS_LIC_INIT(cp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_INIT_SLOT)
-void
-xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-	XFS_LIC_INIT_SLOT(cp, slot);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_ISFREE)
-int
-xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
-{
-	return XFS_LIC_ISFREE(cp, slot);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_RELSE)
-void
-xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
-{
-	XFS_LIC_RELSE(cp, slot);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_SLOT)
-xfs_log_item_desc_t *
-xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-	return XFS_LIC_SLOT(cp, slot);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LIC_VACANCY)
-int
-xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
-{
-	return XFS_LIC_VACANCY(cp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_LITINO)
-int
-xfs_litino(xfs_mount_t *mp)
-{
-	return XFS_LITINO(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MAKE_IPTR)
-xfs_dinode_t *
-xfs_make_iptr(xfs_mount_t *mp, xfs_buf_t *b, int o)
-{
-	return XFS_MAKE_IPTR(mp, b, o);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MASK32HI)
-__uint32_t
-xfs_mask32hi(int n)
-{
-	return XFS_MASK32HI(n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MASK32LO)
-__uint32_t
-xfs_mask32lo(int n)
-{
-	return XFS_MASK32LO(n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MASK64HI)
-__uint64_t
-xfs_mask64hi(int n)
-{
-	return XFS_MASK64HI(n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MASK64LO)
-__uint64_t
-xfs_mask64lo(int n)
-{
-	return XFS_MASK64LO(n);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MIN_FREELIST)
-int
-xfs_min_freelist(xfs_agf_t *a, xfs_mount_t *mp)
-{
-	return XFS_MIN_FREELIST(a, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MIN_FREELIST_PAG)
-int
-xfs_min_freelist_pag(xfs_perag_t *pag, xfs_mount_t *mp)
-{
-	return XFS_MIN_FREELIST_PAG(pag, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MIN_FREELIST_RAW)
-int
-xfs_min_freelist_raw(uint bl, uint cl, xfs_mount_t *mp)
-{
-	return XFS_MIN_FREELIST_RAW(bl, cl, mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_MTOVFS)
-vfs_t *
-xfs_mtovfs(xfs_mount_t *mp)
-{
-	return XFS_MTOVFS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_OFFBNO_TO_AGINO)
-xfs_agino_t
-xfs_offbno_to_agino(xfs_mount_t *mp, xfs_agblock_t b, int o)
-{
-	return XFS_OFFBNO_TO_AGINO(mp, b, o);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_PREALLOC_BLOCKS)
-xfs_agblock_t
-xfs_prealloc_blocks(xfs_mount_t *mp)
-{
-	return XFS_PREALLOC_BLOCKS(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_BLOCK)
-xfs_agblock_t
-xfs_sb_block(xfs_mount_t *mp)
-{
-	return XFS_SB_BLOCK(mp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_GOOD_VERSION)
-int
-xfs_sb_good_version(xfs_sb_t *sbp)
-{
-	return XFS_SB_GOOD_VERSION(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_ADDATTR)
-void
-xfs_sb_version_addattr(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_ADDATTR(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_ADDDALIGN)
-void
-xfs_sb_version_adddalign(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_ADDDALIGN(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_ADDNLINK)
-void
-xfs_sb_version_addnlink(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_ADDNLINK(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_ADDQUOTA)
-void
-xfs_sb_version_addquota(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_ADDQUOTA(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_ADDSHARED)
-void
-xfs_sb_version_addshared(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_ADDSHARED(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASALIGN)
-int
-xfs_sb_version_hasalign(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASALIGN(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASATTR)
-int
-xfs_sb_version_hasattr(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASATTR(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASDALIGN)
-int
-xfs_sb_version_hasdalign(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASDALIGN(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASDIRV2)
-int
-xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASDIRV2(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASEXTFLGBIT)
-int
-xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASEXTFLGBIT(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASNLINK)
-int
-xfs_sb_version_hasnlink(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASNLINK(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASQUOTA)
-int
-xfs_sb_version_hasquota(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASQUOTA(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASSHARED)
-int
-xfs_sb_version_hasshared(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASSHARED(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_NUM)
-int
-xfs_sb_version_num(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_SUBALIGN)
-void
-xfs_sb_version_subalign(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_SUBALIGN(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_SUBSHARED)
-void
-xfs_sb_version_subshared(xfs_sb_t *sbp)
-{
-	XFS_SB_VERSION_SUBSHARED(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASLOGV2)
-int
-xfs_sb_version_haslogv2(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASLOGV2(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASSECTOR)
-int
-xfs_sb_version_hassector(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASSECTOR(sbp);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_TONEW)
-unsigned
-xfs_sb_version_tonew(unsigned v)
-{
-	return XFS_SB_VERSION_TONEW(v);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_TOOLD)
-unsigned
-xfs_sb_version_toold(unsigned v)
-{
-	return XFS_SB_VERSION_TOOLD(v);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XLOG_GRANT_ADD_SPACE)
-void
-xlog_grant_add_space(xlog_t *log, int bytes, int type)
-{
-	XLOG_GRANT_ADD_SPACE(log, bytes, type);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XLOG_GRANT_SUB_SPACE)
-void
-xlog_grant_sub_space(xlog_t *log, int bytes, int type)
-{
-	XLOG_GRANT_SUB_SPACE(log, bytes, type);
-}
-#endif
-
-#if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_SB_VERSION_HASMOREBITS)
-int
-xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
-{
-	return XFS_SB_VERSION_HASMOREBITS(sbp);
-}
-#endif
-
diff --git a/fs/xfs/xfs_macros.h b/fs/xfs/xfs_macros.h
deleted file mode 100644
index 0a9307514a4..00000000000
--- a/fs/xfs/xfs_macros.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_MACROS_H__
-#define	__XFS_MACROS_H__
-
-/*
- * Set for debug kernels and simulation
- * These replacements save space.
- * Used in xfs_macros.c.
- */
-#define	XFS_WANT_SPACE_C	\
-	(!defined(_STANDALONE) && defined(DEBUG))
-
-/*
- * Set for debug simulation and kernel builds, but not for standalone.
- * These replacements do not save space.
- * Used in xfs_macros.c.
- */
-#define	XFS_WANT_FUNCS_C	\
-	(!defined(_STANDALONE) && defined(DEBUG))
-
-/*
- * Corresponding names used in .h files.
- */
-#define	XFS_WANT_SPACE	(XFS_WANT_SPACE_C && !defined(XFS_MACRO_C))
-#define	XFS_WANT_FUNCS	(XFS_WANT_FUNCS_C && !defined(XFS_MACRO_C))
-
-/*
- * These are the macros that get turned into functions to save space.
- */
-#define	XFSSO_NULLSTARTBLOCK 1
-#define	XFSSO_XFS_AGB_TO_DADDR 1
-#define XFSSO_XFS_AGB_TO_FSB 1
-#define	XFSSO_XFS_AGINO_TO_INO 1
-#define	XFSSO_XFS_ALLOC_BLOCK_MINRECS 1
-#define	XFSSO_XFS_ATTR_SF_NEXTENTRY 1
-#define	XFSSO_XFS_BMAP_BLOCK_DMAXRECS 1
-#define	XFSSO_XFS_BMAP_BLOCK_IMAXRECS 1
-#define	XFSSO_XFS_BMAP_BLOCK_IMINRECS 1
-#define	XFSSO_XFS_BMAP_INIT 1
-#define	XFSSO_XFS_BMAP_PTR_IADDR 1
-#define	XFSSO_XFS_BMAP_SANITY_CHECK 1
-#define	XFSSO_XFS_BMAPI_AFLAG 1
-#define	XFSSO_XFS_CFORK_SIZE 1
-#define	XFSSO_XFS_DA_COOKIE_BNO 1
-#define	XFSSO_XFS_DA_COOKIE_ENTRY 1
-#define	XFSSO_XFS_DADDR_TO_AGBNO 1
-#define	XFSSO_XFS_DADDR_TO_FSB 1
-#define	XFSSO_XFS_DFORK_PTR 1
-#define	XFSSO_XFS_DIR_SF_GET_DIRINO 1
-#define	XFSSO_XFS_DIR_SF_NEXTENTRY 1
-#define	XFSSO_XFS_DIR_SF_PUT_DIRINO 1
-#define	XFSSO_XFS_FILBLKS_MIN 1
-#define	XFSSO_XFS_FSB_SANITY_CHECK 1
-#define	XFSSO_XFS_FSB_TO_DADDR 1
-#define	XFSSO_XFS_FSB_TO_DB 1
-#define	XFSSO_XFS_IALLOC_INODES 1
-#define	XFSSO_XFS_IFORK_ASIZE 1
-#define	XFSSO_XFS_IFORK_DSIZE 1
-#define	XFSSO_XFS_IFORK_FORMAT 1
-#define	XFSSO_XFS_IFORK_NEXT_SET 1
-#define	XFSSO_XFS_IFORK_NEXTENTS 1
-#define	XFSSO_XFS_IFORK_PTR 1
-#define	XFSSO_XFS_ILOG_FBROOT 1
-#define	XFSSO_XFS_ILOG_FEXT 1
-#define	XFSSO_XFS_INO_MASK 1
-#define	XFSSO_XFS_INO_TO_FSB 1
-#define	XFSSO_XFS_INODE_CLEAR_READ_AHEAD 1
-#define	XFSSO_XFS_MIN_FREELIST 1
-#define XFSSO_XFS_SB_GOOD_VERSION 1
-#define XFSSO_XFS_SB_VERSION_HASNLINK 1
-#define	XFSSO_XLOG_GRANT_ADD_SPACE 1
-#define	XFSSO_XLOG_GRANT_SUB_SPACE 1
-
-#endif	/* __XFS_MACROS_H__ */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 82e1646e624..541d5dd474b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,21 +28,20 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
 #include "xfs_rw.h"
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
@@ -180,6 +165,10 @@ xfs_mount_free(
 
 	if (mp->m_fsname != NULL)
 		kmem_free(mp->m_fsname, mp->m_fsname_len);
+	if (mp->m_rtname != NULL)
+		kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
+	if (mp->m_logname != NULL)
+		kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
 
 	if (remove_bhv) {
 		struct vfs	*vfsp = XFS_MTOVFS(mp);
@@ -318,7 +307,7 @@ xfs_mount_validate_sb(
 		"XFS: Attempted to mount file system with blocksize %d bytes",
 			sbp->sb_blocksize);
 		cmn_err(CE_WARN,
-		"XFS: Only page-sized (%d) or less blocksizes currently work.",
+		"XFS: Only page-sized (%ld) or less blocksizes currently work.",
 			PAGE_SIZE);
 		return XFS_ERROR(ENOSYS);
 	}
@@ -327,7 +316,10 @@ xfs_mount_validate_sb(
 }
 
 xfs_agnumber_t
-xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount)
+xfs_initialize_perag(
+	struct vfs	*vfs,
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agcount)
 {
 	xfs_agnumber_t	index, max_metadata;
 	xfs_perag_t	*pag;
@@ -343,7 +335,7 @@ xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount)
 	/* Clear the mount flag if no inode can overflow 32 bits
 	 * on this filesystem, or if specifically requested..
 	 */
-	if ((mp->m_flags & XFS_MOUNT_32BITINOOPT) && ino > max_inum) {
+	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
 		mp->m_flags |= XFS_MOUNT_32BITINODES;
 	} else {
 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
@@ -360,7 +352,7 @@ xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount)
 			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 			do_div(icount, 100);
 			icount += sbp->sb_agblocks - 1;
-			do_div(icount, mp->m_ialloc_blks);
+			do_div(icount, sbp->sb_agblocks);
 			max_metadata = icount;
 		} else {
 			max_metadata = agcount;
@@ -584,12 +576,13 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
 	switch (sbp->sb_inodesize) {
 	case 256:
-		mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2);
+		mp->m_attroffset = XFS_LITINO(mp) -
+				   XFS_BMDR_SPACE_CALC(MINABTPTRS);
 		break;
 	case 512:
 	case 1024:
 	case 2048:
-		mp->m_attroffset = XFS_BMDR_SPACE_CALC(12);
+		mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 		break;
 	default:
 		ASSERT(0);
@@ -954,7 +947,7 @@ xfs_mountfs(
 	mp->m_perag =
 		kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
 
-	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
+	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
 
 	/*
 	 * log's mount-time initialization. Perform 1st part recovery if needed
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5affba38a57..08b2e0a5d80 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -1,38 +1,23 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_MOUNT_H__
 #define	__XFS_MOUNT_H__
 
-
 typedef struct xfs_trans_reservations {
 	uint	tr_write;	/* extent alloc trans */
 	uint	tr_itruncate;	/* truncate trans */
@@ -57,7 +42,6 @@ typedef struct xfs_trans_reservations {
 	uint	tr_growrtfree;	/* grow realtime freeing */
 } xfs_trans_reservations_t;
 
-
 #ifndef __KERNEL__
 /*
  * Moved here from xfs_ag.h to avoid reordering header files
@@ -80,6 +64,9 @@ struct xfs_iocore;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 
+extern struct vfsops xfs_vfsops;
+extern struct vnodeops xfs_vnodeops;
+
 #define	AIL_LOCK_T		lock_t
 #define	AIL_LOCKINIT(x,y)	spinlock_init(x,y)
 #define	AIL_LOCK_DESTROY(x)	spinlock_destroy(x)
@@ -292,6 +279,8 @@ typedef struct xfs_mount {
 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
 	char			*m_fsname;	/* filesystem name */
 	int			m_fsname_len;	/* strlen of fs name */
+	char			*m_rtname;	/* realtime device name */
+	char			*m_logname;	/* external log device name */
 	int			m_bsize;	/* fs logical block size */
 	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
@@ -344,7 +333,7 @@ typedef struct xfs_mount {
 	sema_t			m_growlock;	/* growfs mutex */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_dmevmask;	/* DMI events for this FS */
-	uint			m_flags;	/* global mount flags */
+	__uint64_t		m_flags;	/* global mount flags */
 	uint			m_attroffset;	/* inode attribute offset */
 	uint			m_dir_node_ents; /* #entries in a dir danode */
 	uint			m_attr_node_ents; /* #entries in attr danode */
@@ -389,38 +378,41 @@ typedef struct xfs_mount {
 /*
  * Flags for m_flags.
  */
-#define	XFS_MOUNT_WSYNC		0x00000001	/* for nfs - all metadata ops
+#define	XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 						   must be synchronous except
 						   for space allocations */
-#define	XFS_MOUNT_INO64		0x00000002
-			     /* 0x00000004	-- currently unused */
-			     /* 0x00000008	-- currently unused */
-#define XFS_MOUNT_FS_SHUTDOWN	0x00000010	/* atomic stop of all filesystem
+#define	XFS_MOUNT_INO64		(1ULL << 1)
+			     /* (1ULL << 2)	-- currently unused */
+			     /* (1ULL << 3)	-- currently unused */
+#define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
 						   disk errors in metadata */
-#define XFS_MOUNT_NOATIME	0x00000020	/* don't modify inode access
+#define XFS_MOUNT_NOATIME	(1ULL << 5)	/* don't modify inode access
 						   times on reads */
-#define XFS_MOUNT_RETERR	0x00000040      /* return alignment errors to
+#define XFS_MOUNT_RETERR	(1ULL << 6)     /* return alignment errors to
 						   user */
-#define XFS_MOUNT_NOALIGN	0x00000080	/* turn off stripe alignment
+#define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
 						   allocations */
-			     /* 0x00000100	-- currently unused */
-			     /*	0x00000200	-- currently unused */
-#define XFS_MOUNT_NORECOVERY	0x00000400	/* no recovery - dirty fs */
-#define XFS_MOUNT_SHARED	0x00000800	/* shared mount */
-#define XFS_MOUNT_DFLT_IOSIZE	0x00001000	/* set default i/o size */
-#define XFS_MOUNT_OSYNCISOSYNC	0x00002000	/* o_sync is REALLY o_sync */
+#define XFS_MOUNT_COMPAT_ATTR	(1ULL << 8)	/* do not use attr2 format */
+			     /*	(1ULL << 9)	-- currently unused */
+#define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
+#define XFS_MOUNT_SHARED	(1ULL << 11)	/* shared mount */
+#define XFS_MOUNT_DFLT_IOSIZE	(1ULL << 12)	/* set default i/o size */
+#define XFS_MOUNT_OSYNCISOSYNC	(1ULL << 13)	/* o_sync is REALLY o_sync */
 						/* osyncisdsync is now default*/
-#define XFS_MOUNT_32BITINODES	0x00004000	/* do not create inodes above
+#define XFS_MOUNT_32BITINODES	(1ULL << 14)	/* do not create inodes above
 						 * 32 bits in size */
-#define XFS_MOUNT_32BITINOOPT	0x00008000	/* saved mount option state */
-#define XFS_MOUNT_NOUUID	0x00010000	/* ignore uuid during mount */
-#define XFS_MOUNT_NOLOGFLUSH	0x00020000
-#define XFS_MOUNT_IDELETE	0x00040000	/* delete empty inode clusters*/
-#define XFS_MOUNT_SWALLOC	0x00080000	/* turn on stripe width
+			     /* (1ULL << 15)	-- currently unused */
+#define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
+#define XFS_MOUNT_BARRIER	(1ULL << 17)
+#define XFS_MOUNT_IDELETE	(1ULL << 18)	/* delete empty inode clusters*/
+#define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
 						 * allocation */
-#define XFS_MOUNT_IHASHSIZE	0x00100000	/* inode hash table size */
-#define XFS_MOUNT_DIRSYNC	0x00200000	/* synchronous directory ops */
+#define XFS_MOUNT_IHASHSIZE	(1ULL << 20)	/* inode hash table size */
+#define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
+#define XFS_MOUNT_COMPAT_IOSIZE	(1ULL << 22)	/* don't report large preferred
+						 * I/O size in stat() */
+
 
 /*
  * Default minimum read and write sizes.
@@ -442,6 +434,30 @@ typedef struct xfs_mount {
 #define	XFS_WSYNC_READIO_LOG	15	/* 32K */
 #define	XFS_WSYNC_WRITEIO_LOG	14	/* 16K */
 
+/*
+ * Allow large block sizes to be reported to userspace programs if the
+ * "largeio" mount option is used. 
+ *
+ * If compatibility mode is specified, simply return the basic unit of caching
+ * so that we don't get inefficient read/modify/write I/O from user apps.
+ * Otherwise....
+ *
+ * If the underlying volume is a stripe, then return the stripe width in bytes
+ * as the recommended I/O size. It is not a stripe and we've set a default
+ * buffered I/O size, return that, otherwise return the compat default.
+ */
+static inline unsigned long
+xfs_preferred_iosize(xfs_mount_t *mp)
+{
+	if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
+		return PAGE_CACHE_SIZE;
+	return (mp->m_swidth ?
+		(mp->m_swidth << mp->m_sb.sb_blocklog) :
+		((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
+			(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
+			PAGE_CACHE_SIZE));
+}
+
 #define XFS_MAXIOFFSET(mp)	((mp)->m_maxioffset)
 
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
@@ -474,57 +490,41 @@ typedef struct xfs_mount {
 /*
  * Macros for getting from mount to vfs and back.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MTOVFS)
-struct vfs *xfs_mtovfs(xfs_mount_t *mp);
 #define	XFS_MTOVFS(mp)		xfs_mtovfs(mp)
-#else
-#define	XFS_MTOVFS(mp)		(bhvtovfs(&(mp)->m_bhv))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOM)
-xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp);
-#define	XFS_BHVTOM(bdp)	xfs_bhvtom(bdp)
-#else
-#define XFS_BHVTOM(bdp)		((xfs_mount_t *)BHV_PDATA(bdp))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_VFSTOM)
-xfs_mount_t *xfs_vfstom(vfs_t *vfs);
-#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
-#else
-#define XFS_VFSTOM(vfs)		\
-	(XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)))
-#endif
+static inline struct vfs *xfs_mtovfs(xfs_mount_t *mp)
+{
+	return bhvtovfs(&mp->m_bhv);
+}
 
+#define	XFS_BHVTOM(bdp)	xfs_bhvtom(bdp)
+static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp)
+{
+	return (xfs_mount_t *)BHV_PDATA(bdp);
+}
 
-/*
- * Moved here from xfs_ag.h to avoid reordering header files
- */
+#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
+static inline xfs_mount_t *xfs_vfstom(vfs_t *vfs)
+{
+	return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGNO)
-xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d);
 #define XFS_DADDR_TO_AGNO(mp,d)         xfs_daddr_to_agno(mp,d)
-#else
-
-static inline xfs_agnumber_t XFS_DADDR_TO_AGNO(xfs_mount_t *mp, xfs_daddr_t d)
+static inline xfs_agnumber_t
+xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
 {
-	d = XFS_BB_TO_FSBT(mp, d);
-	do_div(d, mp->m_sb.sb_agblocks);
-	return (xfs_agnumber_t) d;
+	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+	do_div(ld, mp->m_sb.sb_agblocks);
+	return (xfs_agnumber_t) ld;
 }
 
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGBNO)
-xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d);
 #define XFS_DADDR_TO_AGBNO(mp,d)        xfs_daddr_to_agbno(mp,d)
-#else
-
-static inline xfs_agblock_t XFS_DADDR_TO_AGBNO(xfs_mount_t *mp, xfs_daddr_t d)
+static inline xfs_agblock_t
+xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 {
-	d = XFS_BB_TO_FSBT(mp, d);
-	return (xfs_agblock_t) do_div(d, mp->m_sb.sb_agblocks);
+	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-#endif
-
 /*
  * This structure is for use by the xfs_mod_incore_sb_batch() routine.
  */
@@ -542,6 +542,7 @@ extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
 extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int	xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
+extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
 
 extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
 extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
@@ -555,12 +556,11 @@ extern int	xfs_readsb(xfs_mount_t *mp);
 extern void	xfs_freesb(xfs_mount_t *);
 extern void	xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
 extern int	xfs_syncsub(xfs_mount_t *, int, int, int *);
-extern xfs_agnumber_t	xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t);
+extern int	xfs_sync_inodes(xfs_mount_t *, int, int, int *);
+extern xfs_agnumber_t	xfs_initialize_perag(struct vfs *, xfs_mount_t *,
+						xfs_agnumber_t);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
 
-extern struct vfsops xfs_vfsops;
-extern struct vnodeops xfs_vnodeops;
-
 extern struct xfs_dmops xfs_dmcore_stub;
 extern struct xfs_qmops xfs_qmcore_stub;
 extern struct xfs_ioops xfs_iocore_xfs;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 4f40c92863d..1408a32eef8 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,7 +27,8 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-
+#include "xfs_quota.h"
+#include "xfs_error.h"
 
 STATIC struct xfs_dquot *
 xfs_dqvopchown_default(
@@ -54,8 +40,79 @@ xfs_dqvopchown_default(
 	return NULL;
 }
 
+/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(xfs_mount_t *mp)
+{
+	int			error;
+	xfs_trans_t		*tp;
+	unsigned long		s;
+
+	mp->m_qflags = 0;
+	/*
+	 * It is OK to look at sb_qflags here in mount path,
+	 * without SB_LOCK.
+	 */
+	if (mp->m_sb.sb_qflags == 0)
+		return 0;
+	s = XFS_SB_LOCK(mp);
+	mp->m_sb.sb_qflags = 0;
+	XFS_SB_UNLOCK(mp, s);
+
+	/*
+	 * if the fs is readonly, let the incore superblock run
+	 * with quotas off but don't flush the update out to disk
+	 */
+	if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
+		return 0;
+#ifdef QUOTADEBUG
+	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		xfs_fs_cmn_err(CE_ALERT, mp,
+			"xfs_mount_reset_sbqflags: Superblock update failed!");
+		return error;
+	}
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+	error = xfs_trans_commit(tp, 0, NULL);
+	return error;
+}
+
+STATIC int
+xfs_noquota_init(
+	xfs_mount_t	*mp,
+	uint		*needquotamount,
+	uint		*quotaflags)
+{
+	int		error = 0;
+
+	*quotaflags = 0;
+	*needquotamount = B_FALSE;
+
+	ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+	/*
+	 * If a file system had quotas running earlier, but decided to
+	 * mount without -o uquota/pquota/gquota options, revoke the
+	 * quotachecked license.
+	 */
+	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+		cmn_err(CE_NOTE,
+                        "XFS resetting qflags for filesystem %s",
+                        mp->m_fsname);
+
+		error = xfs_mount_reset_sbqflags(mp);
+	}
+	return error;
+}
+
 xfs_qmops_t	xfs_qmcore_stub = {
-	.xfs_qminit		= (xfs_qminit_t) fs_noerr,
+	.xfs_qminit		= (xfs_qminit_t) xfs_noquota_init,
 	.xfs_qmdone		= (xfs_qmdone_t) fs_noerr,
 	.xfs_qmmount		= (xfs_qmmount_t) fs_noerr,
 	.xfs_qmunmount		= (xfs_qmunmount_t) fs_noerr,
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 7134576ae7f..82a08baf437 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_QUOTA_H__
 #define __XFS_QUOTA_H__
@@ -42,7 +28,7 @@
  * uid_t and gid_t are hard-coded to 32 bits in the inode.
  * Hence, an 'id' in a dquot is 32 bits..
  */
-typedef __int32_t	xfs_dqid_t;
+typedef __uint32_t	xfs_dqid_t;
 
 /*
  * Eventhough users may not have quota limits occupying all 64-bits,
@@ -59,28 +45,28 @@ typedef __uint16_t	xfs_qwarncnt_t;
  * to construct the on disk structure.
  */
 typedef struct	xfs_disk_dquot {
-/*16*/	u_int16_t	d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
-/*8 */	u_int8_t	d_version;	/* dquot version */
-/*8 */	u_int8_t	d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
-/*32*/	xfs_dqid_t	d_id;		/* user,project,group id */
-/*64*/	xfs_qcnt_t	d_blk_hardlimit;/* absolute limit on disk blks */
-/*64*/	xfs_qcnt_t	d_blk_softlimit;/* preferred limit on disk blks */
-/*64*/	xfs_qcnt_t	d_ino_hardlimit;/* maximum # allocated inodes */
-/*64*/	xfs_qcnt_t	d_ino_softlimit;/* preferred inode limit */
-/*64*/	xfs_qcnt_t	d_bcount;	/* disk blocks owned by the user */
-/*64*/	xfs_qcnt_t	d_icount;	/* inodes owned by the user */
-/*32*/	__int32_t	d_itimer;	/* zero if within inode limits if not,
+	__be16		d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
+	__u8		d_version;	/* dquot version */
+	__u8		d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
+	__be32		d_id;		/* user,project,group id */
+	__be64		d_blk_hardlimit;/* absolute limit on disk blks */
+	__be64		d_blk_softlimit;/* preferred limit on disk blks */
+	__be64		d_ino_hardlimit;/* maximum # allocated inodes */
+	__be64		d_ino_softlimit;/* preferred inode limit */
+	__be64		d_bcount;	/* disk blocks owned by the user */
+	__be64		d_icount;	/* inodes owned by the user */
+	__be32		d_itimer;	/* zero if within inode limits if not,
 					   this is when we refuse service */
-/*32*/	__int32_t	d_btimer;	/* similar to above; for disk blocks */
-/*16*/	xfs_qwarncnt_t	d_iwarns;	/* warnings issued wrt num inodes */
-/*16*/	xfs_qwarncnt_t	d_bwarns;	/* warnings issued wrt disk blocks */
-/*32*/	__int32_t	d_pad0;		/* 64 bit align */
-/*64*/	xfs_qcnt_t	d_rtb_hardlimit;/* absolute limit on realtime blks */
-/*64*/	xfs_qcnt_t	d_rtb_softlimit;/* preferred limit on RT disk blks */
-/*64*/	xfs_qcnt_t	d_rtbcount;	/* realtime blocks owned */
-/*32*/	__int32_t	d_rtbtimer;	/* similar to above; for RT disk blocks */
-/*16*/	xfs_qwarncnt_t	d_rtbwarns;	/* warnings issued wrt RT disk blocks */
-/*16*/	__uint16_t	d_pad;
+	__be32		d_btimer;	/* similar to above; for disk blocks */
+	__be16		d_iwarns;	/* warnings issued wrt num inodes */
+	__be16		d_bwarns;	/* warnings issued wrt disk blocks */
+	__be32		d_pad0;		/* 64 bit align */
+	__be64		d_rtb_hardlimit;/* absolute limit on realtime blks */
+	__be64		d_rtb_softlimit;/* preferred limit on RT disk blks */
+	__be64		d_rtbcount;	/* realtime blocks owned */
+	__be32		d_rtbtimer;	/* similar to above; for RT disk blocks */
+	__be16		d_rtbwarns;	/* warnings issued wrt RT disk blocks */
+	__be16		d_pad;
 } xfs_disk_dquot_t;
 
 /*
@@ -160,6 +146,20 @@ typedef struct xfs_qoff_logformat {
 #define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
 
 /*
+ * Quota Accounting/Enforcement flags
+ */
+#define XFS_ALL_QUOTA_ACCT	\
+		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
+
+#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
+#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
+
+/*
  * Incore only flags for quotaoff - these bits get cleared when quota(s)
  * are in the process of getting turned off. These flags are in m_qflags but
  * never in sb_qflags.
@@ -362,6 +362,7 @@ typedef struct xfs_dqtrxops {
 				f | XFS_QMOPT_RES_REGBLKS)
 
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
+extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
 extern struct bhv_vfsops xfs_qmops;
 
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h
index cd8ddfd35d6..2dec79edb51 100644
--- a/fs/xfs/xfs_refcache.h
+++ b/fs/xfs/xfs_refcache.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_REFCACHE_H__
 #define __XFS_REFCACHE_H__
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 23b48ac1cb7..4d4e8f4e768 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -1,60 +1,45 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir_leaf.h"
 
 
@@ -620,8 +605,6 @@ xfs_rename(
 		IRELE(target_ip);
 	}
 
-	FSC_NOTIFY_NAME_CHANGED(XFS_ITOV(src_ip));
-
 	IRELE(src_ip);
 
 	/* Fall through to std_return with error = 0 or errno from
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 2c37822d101..06fc061c50f 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1,44 +1,26 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
-/*
- * Free realtime space allocation for XFS.
- */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -46,19 +28,18 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_fsops.h"
 #include "xfs_error.h"
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index e2710264c05..0e0b4d2ec20 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_RTALLOC_H__
 #define	__XFS_RTALLOC_H__
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index d3ff7aef33b..c4b20872f07 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,20 +28,20 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_itable.h"
 #include "xfs_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_attr.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode_item.h"
-#include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "xfs_acl.h"
 #include "xfs_mac.h"
@@ -264,7 +250,7 @@ xfs_ioerror_alert(
 {
 	cmn_err(CE_ALERT,
  "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
- "       (\"%s\") error %d buf count %u",
+ "       (\"%s\") error %d buf count %zd",
 		(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
 		XFS_BUFTARG_NAME(bp->pb_target),
 		(__uint64_t)blkno,
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index c8b10bf8f53..de85eefb796 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_RW_H__
 #define	__XFS_RW_H__
@@ -68,87 +54,44 @@ struct xfs_mount;
  * file is a real time file or not, because the bmap code
  * does.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_DB)
-xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
 #define	XFS_FSB_TO_DB(ip,fsb)	xfs_fsb_to_db(ip,fsb)
-#else
-#define	XFS_FSB_TO_DB(ip,fsb) \
-		(((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) ? \
+static inline xfs_daddr_t
+xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
+{
+	return (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) ? \
 		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
-		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)))
-#endif
-
-#define XFS_FSB_TO_DB_IO(io,fsb) \
-		(((io)->io_flags & XFS_IOCORE_RT) ? \
+		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
+}
+#define XFS_FSB_TO_DB_IO(io,fsb) xfs_fsb_to_db_io(io,fsb)
+static inline xfs_daddr_t
+xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
+{
+	return (((io)->io_flags & XFS_IOCORE_RT) ? \
 		 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
-		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
+		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)));
+}
 
 /*
  * Prototypes for functions in xfs_rw.c.
  */
-
-int
-xfs_write_clear_setuid(
-	struct xfs_inode	*ip);
-
-int
-xfs_bwrite(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp);
-
-int
-xfs_bioerror(
-	struct xfs_buf		*b);
-
-int
-xfs_bioerror_relse(
-	struct xfs_buf		*b);
-
-int
-xfs_read_buf(
-	struct xfs_mount	*mp,
-	xfs_buftarg_t		*target,
-	xfs_daddr_t		blkno,
-	int			len,
-	uint			flags,
-	struct xfs_buf		**bpp);
-
-void
-xfs_ioerror_alert(
-	char			*func,
-	struct xfs_mount	*mp,
-	xfs_buf_t		*bp,
-	xfs_daddr_t		blkno);
-
+extern int xfs_write_clear_setuid(struct xfs_inode *ip);
+extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
+extern int xfs_bioerror(struct xfs_buf *bp);
+extern int xfs_bioerror_relse(struct xfs_buf *bp);
+extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
+			xfs_daddr_t blkno, int len, uint flags,
+			struct xfs_buf **bpp);
+extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
+				xfs_buf_t *bp, xfs_daddr_t blkno);
 
 /*
  * Prototypes for functions in xfs_vnodeops.c.
  */
-
-int
-xfs_rwlock(
-	bhv_desc_t		*bdp,
-	vrwlock_t		write_lock);
-
-void
-xfs_rwunlock(
-	bhv_desc_t		*bdp,
-	vrwlock_t		write_lock);
-
-int
-xfs_change_file_space(
-	bhv_desc_t		*bdp,
-	int			cmd,
-	xfs_flock64_t		*bf,
-	xfs_off_t		offset,
-	cred_t			*credp,
-	int			flags);
-
-int
-xfs_set_dmattrs(
-	bhv_desc_t		*bdp,
-	u_int			evmask,
-	u_int16_t		state,
-	cred_t			*credp);
+extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock);
+extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock);
+extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
+				 xfs_off_t offset, cred_t *credp, int flags);
+extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
+			   cred_t *credp);
 
 #endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index ad090a834ce..4a17d335f89 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_SB_H__
 #define	__XFS_SB_H__
@@ -72,7 +58,8 @@ struct xfs_mount;
 	 XFS_SB_VERSION_DALIGNBIT | \
 	 XFS_SB_VERSION_SHAREDBIT | \
 	 XFS_SB_VERSION_LOGV2BIT | \
-	 XFS_SB_VERSION_SECTORBIT)
+	 XFS_SB_VERSION_SECTORBIT | \
+	 XFS_SB_VERSION_MOREBITSBIT)
 #define	XFS_SB_VERSION_OKSASHBITS	\
 	(XFS_SB_VERSION_NUMBITS | \
 	 XFS_SB_VERSION_REALFBITS | \
@@ -103,12 +90,15 @@ struct xfs_mount;
  */
 #define XFS_SB_VERSION2_REALFBITS	0x00ffffff	/* Mask: features */
 #define XFS_SB_VERSION2_RESERVED1BIT	0x00000001
+#define XFS_SB_VERSION2_RESERVED2BIT	0x00000002
+#define XFS_SB_VERSION2_RESERVED4BIT	0x00000004
+#define XFS_SB_VERSION2_ATTR2BIT	0x00000008	/* Inline attr rework */
 #define XFS_SB_VERSION2_SASHFBITS	0xff000000	/* Mask: features that
 							   require changing
 							   PROM and SASH */
 
 #define	XFS_SB_VERSION2_OKREALFBITS	\
-	(0)
+	(XFS_SB_VERSION2_ATTR2BIT)
 #define	XFS_SB_VERSION2_OKSASHFBITS	\
 	(0)
 #define XFS_SB_VERSION2_OKREALBITS	\
@@ -118,8 +108,7 @@ struct xfs_mount;
 /*
  * mkfs macro to set up sb_features2 word
  */
-#define	XFS_SB_VERSION2_MKFS(xyz)	\
-	((xyz) ? 0 : 0)
+#define	XFS_SB_VERSION2_MKFS(resvd1, sbcntr)	0
 
 typedef struct xfs_sb
 {
@@ -176,7 +165,7 @@ typedef struct xfs_sb
 	__uint8_t	sb_logsectlog;	/* log2 of the log sector size */
 	__uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
 	__uint32_t	sb_logsunit;	/* stripe unit size for the log */
-	__uint32_t	sb_features2;	/* additonal feature bits */
+	__uint32_t	sb_features2;	/* additional feature bits */
 } xfs_sb_t;
 
 /*
@@ -216,12 +205,15 @@ typedef enum {
 #define XFS_SB_SHARED_VN	XFS_SB_MVAL(SHARED_VN)
 #define XFS_SB_UNIT		XFS_SB_MVAL(UNIT)
 #define XFS_SB_WIDTH		XFS_SB_MVAL(WIDTH)
+#define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
 #define	XFS_SB_MOD_BITS		\
 	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
-	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH)
+	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
+	 XFS_SB_FEATURES2)
+
 
 /*
  * Misc. Flags - warning - these will be cleared by xfs_repair unless
@@ -235,42 +227,33 @@ typedef enum {
  */
 #define XFS_SB_MAX_SHARED_VN	0
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_NUM)
-int xfs_sb_version_num(xfs_sb_t *sbp);
-#define	XFS_SB_VERSION_NUM(sbp)	xfs_sb_version_num(sbp)
-#else
 #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
-#endif
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_GOOD_VERSION)
-int xfs_sb_good_version(xfs_sb_t *sbp);
 #define	XFS_SB_GOOD_VERSION(sbp)	xfs_sb_good_version(sbp)
-#else
-#define	XFS_SB_GOOD_VERSION_INT(sbp)	\
-	((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
-	  ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
-	   ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	    !(((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \
-	      (((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \
-	       ((sbp)->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
-
 #ifdef __KERNEL__
-#define	XFS_SB_GOOD_VERSION(sbp)	\
-	(XFS_SB_GOOD_VERSION_INT(sbp) && \
-	  (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN) ))
+static inline int xfs_sb_good_version(xfs_sb_t *sbp)
+{
+	return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \
+		  (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \
+		   ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		    !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \
+		      ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \
+		       (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \
+	  	    (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)));
+}
 #else
-/*
- * extra 2 paren's here (( to unconfuse paren-matching editors
- * like vi because XFS_SB_GOOD_VERSION_INT is a partial expression
- * and the two XFS_SB_GOOD_VERSION's each 2 more close paren's to
- * complete the expression.
- */
-#define XFS_SB_GOOD_VERSION(sbp)	\
-	(XFS_SB_GOOD_VERSION_INT(sbp) && \
-	  (!((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \
-	   (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)) ))
+static inline int xfs_sb_good_version(xfs_sb_t *sbp)
+{
+	return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \
+		  (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \
+		   ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		    !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \
+		      ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \
+		       (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \
+		  (!(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \
+		   (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))));
+}
 #endif /* __KERNEL__ */
-#endif
 
 #define	XFS_SB_GOOD_SASH_VERSION(sbp)	\
 	((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
@@ -278,275 +261,218 @@ int xfs_sb_good_version(xfs_sb_t *sbp);
 	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 	  !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS)))
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TONEW)
-unsigned xfs_sb_version_tonew(unsigned v);
 #define	XFS_SB_VERSION_TONEW(v)	xfs_sb_version_tonew(v)
-#else
-#define	XFS_SB_VERSION_TONEW(v)	\
-	((((v) == XFS_SB_VERSION_1) ? \
+static inline unsigned xfs_sb_version_tonew(unsigned v)
+{
+	return ((((v) == XFS_SB_VERSION_1) ? \
 		0 : \
 		(((v) == XFS_SB_VERSION_2) ? \
 			XFS_SB_VERSION_ATTRBIT : \
 			(XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \
-	 XFS_SB_VERSION_4)
-#endif
+		XFS_SB_VERSION_4);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TOOLD)
-unsigned xfs_sb_version_toold(unsigned v);
 #define	XFS_SB_VERSION_TOOLD(v)	xfs_sb_version_toold(v)
-#else
-#define	XFS_SB_VERSION_TOOLD(v)	\
-	(((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
+static inline unsigned xfs_sb_version_toold(unsigned v)
+{
+	return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
 		0 : \
 		(((v) & XFS_SB_VERSION_NLINKBIT) ? \
 			XFS_SB_VERSION_3 : \
 			(((v) & XFS_SB_VERSION_ATTRBIT) ?  \
 				XFS_SB_VERSION_2 : \
-				XFS_SB_VERSION_1)))
-#endif
+				XFS_SB_VERSION_1)));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASATTR)
-int xfs_sb_version_hasattr(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_HASATTR(sbp)	xfs_sb_version_hasattr(sbp)
-#else
-#define	XFS_SB_VERSION_HASATTR(sbp)	\
-	(((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
-	 ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
-	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	  ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)))
-#endif
+static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
+{
+	return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
+		 ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+		 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		  ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDATTR)
-void xfs_sb_version_addattr(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_ADDATTR(sbp)	xfs_sb_version_addattr(sbp)
-#else
-#define	XFS_SB_VERSION_ADDATTR(sbp)	\
-	((sbp)->sb_versionnum = \
-	 (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
+static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
+{
+	(sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
 		XFS_SB_VERSION_2 : \
 		((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \
 			((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \
-			(XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))))
-#endif
+			(XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT)));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASNLINK)
-int xfs_sb_version_hasnlink(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_HASNLINK(sbp)	xfs_sb_version_hasnlink(sbp)
-#else
-#define	XFS_SB_VERSION_HASNLINK(sbp)	\
-	(((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
-	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	  ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)))
-#endif
+static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
+{
+	return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+		 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		  ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDNLINK)
-void xfs_sb_version_addnlink(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_ADDNLINK(sbp)	xfs_sb_version_addnlink(sbp)
-#else
-#define	XFS_SB_VERSION_ADDNLINK(sbp)	\
-	((sbp)->sb_versionnum = \
-	 ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
+static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
+{
+	(sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
 		XFS_SB_VERSION_3 : \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)))
-#endif
+		((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT));
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASQUOTA)
-int xfs_sb_version_hasquota(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_HASQUOTA(sbp)	xfs_sb_version_hasquota(sbp)
-#else
-#define	XFS_SB_VERSION_HASQUOTA(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT))
-#endif
+static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDQUOTA)
-void xfs_sb_version_addquota(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_ADDQUOTA(sbp)	xfs_sb_version_addquota(sbp)
-#else
-#define	XFS_SB_VERSION_ADDQUOTA(sbp)	\
-	((sbp)->sb_versionnum = \
-	 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
-		(XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
-		 XFS_SB_VERSION_QUOTABIT)))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASALIGN)
-int xfs_sb_version_hasalign(xfs_sb_t *sbp);
+static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
+{
+	(sbp)->sb_versionnum = \
+		 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
+			((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
+			(XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
+			 XFS_SB_VERSION_QUOTABIT));
+}
+
 #define	XFS_SB_VERSION_HASALIGN(sbp)	xfs_sb_version_hasalign(sbp)
-#else
-#define	XFS_SB_VERSION_HASALIGN(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT))
-#endif
+static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBALIGN)
-void xfs_sb_version_subalign(xfs_sb_t *sbp);
 #define	XFS_SB_VERSION_SUBALIGN(sbp)	xfs_sb_version_subalign(sbp)
-#else
-#define	XFS_SB_VERSION_SUBALIGN(sbp)	\
-	((sbp)->sb_versionnum = \
-	 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT))
-#endif
+static inline void xfs_sb_version_subalign(xfs_sb_t *sbp)
+{
+	(sbp)->sb_versionnum = \
+	 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDALIGN)
-int xfs_sb_version_hasdalign(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASDALIGN(sbp)	xfs_sb_version_hasdalign(sbp)
-#else
-#define XFS_SB_VERSION_HASDALIGN(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT))
-#endif
+static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDDALIGN)
-int xfs_sb_version_adddalign(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_ADDDALIGN(sbp)	xfs_sb_version_adddalign(sbp)
-#else
-#define XFS_SB_VERSION_ADDDALIGN(sbp)	\
-	((sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT))
-#endif
+static inline int xfs_sb_version_adddalign(xfs_sb_t *sbp)
+{
+	return (sbp)->sb_versionnum = \
+		((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASSHARED)
-int xfs_sb_version_hasshared(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASSHARED(sbp)	xfs_sb_version_hasshared(sbp)
-#else
-#define XFS_SB_VERSION_HASSHARED(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT))
-#endif
+static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDSHARED)
-int xfs_sb_version_addshared(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_ADDSHARED(sbp)	xfs_sb_version_addshared(sbp)
-#else
-#define XFS_SB_VERSION_ADDSHARED(sbp)	\
-	((sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT))
-#endif
+static inline int xfs_sb_version_addshared(xfs_sb_t *sbp)
+{
+	return (sbp)->sb_versionnum = \
+		((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBSHARED)
-int xfs_sb_version_subshared(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_SUBSHARED(sbp)	xfs_sb_version_subshared(sbp)
-#else
-#define XFS_SB_VERSION_SUBSHARED(sbp)	\
-	((sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT))
-#endif
+static inline int xfs_sb_version_subshared(xfs_sb_t *sbp)
+{
+	return (sbp)->sb_versionnum = \
+		((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDIRV2)
-int xfs_sb_version_hasdirv2(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASDIRV2(sbp)	xfs_sb_version_hasdirv2(sbp)
-#else
-#define XFS_SB_VERSION_HASDIRV2(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
-#endif
+static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASLOGV2)
-int xfs_sb_version_haslogv2(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASLOGV2(sbp)   xfs_sb_version_haslogv2(sbp)
-#else
-#define XFS_SB_VERSION_HASLOGV2(sbp)   \
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT))
-#endif
+static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASEXTFLGBIT)
-int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	xfs_sb_version_hasextflgbit(sbp)
-#else
-#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
-#endif
+static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDEXTFLGBIT)
-int xfs_sb_version_addextflgbit(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	xfs_sb_version_addextflgbit(sbp)
-#else
-#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	\
-	((sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT))
-#endif
+static inline int xfs_sb_version_addextflgbit(xfs_sb_t *sbp)
+{
+	return (sbp)->sb_versionnum = \
+		((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBEXTFLGBIT)
-int xfs_sb_version_subextflgbit(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	xfs_sb_version_subextflgbit(sbp)
-#else
-#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	\
-	((sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT))
-#endif
+static inline int xfs_sb_version_subextflgbit(xfs_sb_t *sbp)
+{
+	return (sbp)->sb_versionnum = \
+		((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASSECTOR)
-int xfs_sb_version_hassector(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASSECTOR(sbp)   xfs_sb_version_hassector(sbp)
-#else
-#define XFS_SB_VERSION_HASSECTOR(sbp)   \
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT))
-#endif
+static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
+}
 
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASMOREBITSBIT)
-int xfs_sb_version_hasmorebits(xfs_sb_t *sbp);
 #define XFS_SB_VERSION_HASMOREBITS(sbp)	xfs_sb_version_hasmorebits(sbp)
-#else
-#define XFS_SB_VERSION_HASMOREBITS(sbp)	\
-	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-	 ((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT))
-#endif
+static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
+}
 
 /*
  * sb_features2 bit version macros.
  *
- * For example, for a bit defined as XFS_SB_VERSION2_YBIT, has a macro:
+ * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
  *
- * SB_VERSION_HASYBIT(xfs_sb_t *sbp)
+ * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
  *	((XFS_SB_VERSION_HASMOREBITS(sbp) &&
- *	 ((sbp)->sb_versionnum & XFS_SB_VERSION2_YBIT)
+ *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
 
+#define XFS_SB_VERSION_HASATTR2(sbp)	xfs_sb_version_hasattr2(sbp)
+static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_HASMOREBITS(sbp)) &&	\
+		((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
+}
+
+#define XFS_SB_VERSION_ADDATTR2(sbp)	xfs_sb_version_addattr2(sbp)
+static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
+{
+	((sbp)->sb_versionnum =	\
+		((sbp)->sb_versionnum | XFS_SB_VERSION_MOREBITSBIT),	\
+	((sbp)->sb_features2 =	\
+		((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
+}
+
 /*
  * end of superblock version macros
  */
 
-#define XFS_SB_DADDR	((xfs_daddr_t)0)	/* daddr in filesystem/ag */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_BLOCK)
-xfs_agblock_t xfs_sb_block(struct xfs_mount *mp);
-#define	XFS_SB_BLOCK(mp)	xfs_sb_block(mp)
-#else
+#define XFS_SB_DADDR		((xfs_daddr_t)0) /* daddr in filesystem/ag */
 #define	XFS_SB_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_HDR_BLOCK)
-xfs_agblock_t xfs_hdr_block(struct xfs_mount *mp, xfs_daddr_t d);
-#define	XFS_HDR_BLOCK(mp,d)	xfs_hdr_block(mp,d)
-#else
-#define	XFS_HDR_BLOCK(mp,d)	((xfs_agblock_t)(XFS_BB_TO_FSBT(mp,d)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_FSB)
-xfs_fsblock_t xfs_daddr_to_fsb(struct xfs_mount *mp, xfs_daddr_t d);
-#define	XFS_DADDR_TO_FSB(mp,d)		xfs_daddr_to_fsb(mp,d)
-#else
-#define	XFS_DADDR_TO_FSB(mp,d) \
-	XFS_AGB_TO_FSB(mp, XFS_DADDR_TO_AGNO(mp,d), XFS_DADDR_TO_AGBNO(mp,d))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_DADDR)
-xfs_daddr_t xfs_fsb_to_daddr(struct xfs_mount *mp, xfs_fsblock_t fsbno);
-#define	XFS_FSB_TO_DADDR(mp,fsbno)	xfs_fsb_to_daddr(mp,fsbno)
-#else
-#define	XFS_FSB_TO_DADDR(mp,fsbno) \
-	XFS_AGB_TO_DADDR(mp, XFS_FSB_TO_AGNO(mp,fsbno), \
-			 XFS_FSB_TO_AGBNO(mp,fsbno))
-#endif
-
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBP)
-xfs_sb_t *xfs_buf_to_sbp(struct xfs_buf *bp);
-#define XFS_BUF_TO_SBP(bp)	xfs_buf_to_sbp(bp)
-#else
 #define XFS_BUF_TO_SBP(bp)	((xfs_sb_t *)XFS_BUF_PTR(bp))
-#endif
+
+#define	XFS_HDR_BLOCK(mp,d)	((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
+#define	XFS_DADDR_TO_FSB(mp,d)	XFS_AGB_TO_FSB(mp, \
+			XFS_DADDR_TO_AGNO(mp,d), XFS_DADDR_TO_AGBNO(mp,d))
+#define	XFS_FSB_TO_DADDR(mp,fsbno)	XFS_AGB_TO_DADDR(mp, \
+			XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno))
 
 /*
  * File system sector to basic block conversions.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06dfca531f7..279e043d732 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -43,21 +29,21 @@
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
-#include "xfs_trans_priv.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_quota.h"
+#include "xfs_trans_priv.h"
 #include "xfs_trans_space.h"
 
 
@@ -190,12 +176,8 @@ xfs_trans_dup(
 	XFS_LBC_INIT(&(ntp->t_busy));
 
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-
-#if defined(XLOG_NOLOG) || defined(DEBUG)
-	ASSERT(!xlog_debug || tp->t_ticket != NULL);
-#else
 	ASSERT(tp->t_ticket != NULL);
-#endif
+
 	ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
 	ntp->t_ticket = tp->t_ticket;
 	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
@@ -276,7 +258,7 @@ xfs_trans_reserve(
 
 		error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
 					&tp->t_ticket,
-					XFS_TRANSACTION, log_flags);
+					XFS_TRANSACTION, log_flags, tp->t_type);
 		if (error) {
 			goto undo_blocks;
 		}
@@ -661,10 +643,11 @@ xfs_trans_unreserve_and_mod_sb(
  */
  /*ARGSUSED*/
 int
-xfs_trans_commit(
+_xfs_trans_commit(
 	xfs_trans_t	*tp,
 	uint		flags,
-	xfs_lsn_t	*commit_lsn_p)
+	xfs_lsn_t	*commit_lsn_p,
+	int		*log_flushed)
 {
 	xfs_log_iovec_t		*log_vector;
 	int			nvec;
@@ -676,9 +659,6 @@ xfs_trans_commit(
 	int			sync;
 #define	XFS_TRANS_LOGVEC_COUNT	16
 	xfs_log_iovec_t		log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
-#if defined(XLOG_NOLOG) || defined(DEBUG)
-	static xfs_lsn_t	trans_lsn = 1;
-#endif
 	void			*commit_iclog;
 	int			shutdown;
 
@@ -729,11 +709,7 @@ shut_us_down:
 			*commit_lsn_p = commit_lsn;
 		return (shutdown);
 	}
-#if defined(XLOG_NOLOG) || defined(DEBUG)
-	ASSERT(!xlog_debug || tp->t_ticket != NULL);
-#else
 	ASSERT(tp->t_ticket != NULL);
-#endif
 
 	/*
 	 * If we need to update the superblock, then do it now.
@@ -750,14 +726,10 @@ shut_us_down:
 	 * by using a vector from the stack when it fits.
 	 */
 	nvec = xfs_trans_count_vecs(tp);
-
 	if (nvec == 0) {
 		xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
 		goto shut_us_down;
-	}
-
-
-	if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
+	} else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
 		log_vector = log_vector_fast;
 	} else {
 		log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
@@ -771,30 +743,14 @@ shut_us_down:
 	 */
 	xfs_trans_fill_vecs(tp, log_vector);
 
-	/*
-	 * Ignore errors here. xfs_log_done would do the right thing.
-	 * We need to put the ticket, etc. away.
-	 */
-	error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket,
-			     &(tp->t_lsn));
+	error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
 
-#if defined(XLOG_NOLOG) || defined(DEBUG)
-	if (xlog_debug) {
-		commit_lsn = xfs_log_done(mp, tp->t_ticket,
-					  &commit_iclog, log_flags);
-	} else {
-		commit_lsn = 0;
-		tp->t_lsn = trans_lsn++;
-	}
-#else
 	/*
-	 * This is the regular case.  At this point (after the call finishes),
-	 * the transaction is committed incore and could go out to disk at
-	 * any time.  However, all the items associated with the transaction
-	 * are still locked and pinned in memory.
+	 * The transaction is committed incore here, and can go out to disk
+	 * at any time after this call.  However, all the items associated
+	 * with the transaction are still locked and pinned in memory.
 	 */
 	commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
-#endif
 
 	tp->t_commit_lsn = commit_lsn;
 	if (nvec > XFS_TRANS_LOGVEC_COUNT) {
@@ -893,9 +849,11 @@ shut_us_down:
 	 * log out now and wait for it.
 	 */
 	if (sync) {
-		if (!error)
-			error = xfs_log_force(mp, commit_lsn,
-				      XFS_LOG_FORCE | XFS_LOG_SYNC);
+		if (!error) {
+			error = _xfs_log_force(mp, commit_lsn,
+				      XFS_LOG_FORCE | XFS_LOG_SYNC,
+				      log_flushed);
+		}
 		XFS_STATS_INC(xs_trans_sync);
 	} else {
 		XFS_STATS_INC(xs_trans_async);
@@ -1032,6 +990,7 @@ xfs_trans_fill_vecs(
 	tp->t_header.th_num_items = nitems;
 	log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
 	log_vector->i_len = sizeof(xfs_trans_header_t);
+	XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR);
 }
 
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ec541d66fa2..a889963fdd1 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef	__XFS_TRANS_H__
 #define	__XFS_TRANS_H__
@@ -112,6 +98,7 @@ typedef struct xfs_trans_header {
 #define	XFS_TRANS_GROWFSRT_ZERO		38
 #define	XFS_TRANS_GROWFSRT_FREE		39
 #define	XFS_TRANS_SWAPEXT		40
+#define	XFS_TRANS_TYPE_MAX		40
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 
@@ -134,19 +121,6 @@ typedef struct xfs_ail_entry {
 	struct xfs_log_item	*ail_back;	/* AIL back pointer */
 } xfs_ail_entry_t;
 
-/*
- * This structure is passed as a parameter to xfs_trans_push_ail()
- * and is used to track the what LSN the waiting processes are
- * waiting to become unused.
- */
-typedef struct xfs_ail_ticket {
-	xfs_lsn_t		at_lsn;		/* lsn waitin for */
-	struct xfs_ail_ticket	*at_forw;	/* wait list ptr */
-	struct xfs_ail_ticket	*at_back;	/* wait list ptr */
-	sv_t			at_sema;	/* wait sema */
-} xfs_ail_ticket_t;
-
-
 typedef struct xfs_log_item {
 	xfs_ail_entry_t			li_ail;		/* AIL pointers */
 	xfs_lsn_t			li_lsn;		/* last on-disk lsn */
@@ -246,68 +220,67 @@ typedef struct xfs_log_item_chunk {
  * lic_unused to the right value (0 matches all free).  The
  * lic_descs.lid_index values are set up as each desc is allocated.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT)
-void xfs_lic_init(xfs_log_item_chunk_t *cp);
 #define	XFS_LIC_INIT(cp)	xfs_lic_init(cp)
-#else
-#define	XFS_LIC_INIT(cp)	((cp)->lic_free = XFS_LIC_FREEMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT_SLOT)
-void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot);
+static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
+{
+	cp->lic_free = XFS_LIC_FREEMASK;
+}
+
 #define	XFS_LIC_INIT_SLOT(cp,slot)	xfs_lic_init_slot(cp, slot)
-#else
-#define	XFS_LIC_INIT_SLOT(cp,slot)	\
-	((cp)->lic_descs[slot].lid_index = (unsigned char)(slot))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_VACANCY)
-int xfs_lic_vacancy(xfs_log_item_chunk_t *cp);
+static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
+{
+	cp->lic_descs[slot].lid_index = (unsigned char)(slot);
+}
+
 #define	XFS_LIC_VACANCY(cp)		xfs_lic_vacancy(cp)
-#else
-#define	XFS_LIC_VACANCY(cp)		(((cp)->lic_free) & XFS_LIC_FREEMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ALL_FREE)
-void xfs_lic_all_free(xfs_log_item_chunk_t *cp);
+static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
+{
+	return cp->lic_free & XFS_LIC_FREEMASK;
+}
+
 #define	XFS_LIC_ALL_FREE(cp)		xfs_lic_all_free(cp)
-#else
-#define	XFS_LIC_ALL_FREE(cp)		((cp)->lic_free = XFS_LIC_FREEMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ARE_ALL_FREE)
-int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp);
+static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
+{
+	cp->lic_free = XFS_LIC_FREEMASK;
+}
+
 #define	XFS_LIC_ARE_ALL_FREE(cp)	xfs_lic_are_all_free(cp)
-#else
-#define	XFS_LIC_ARE_ALL_FREE(cp)	(((cp)->lic_free & XFS_LIC_FREEMASK) ==\
-					XFS_LIC_FREEMASK)
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ISFREE)
-int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot);
+static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
+{
+	return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
+}
+
 #define	XFS_LIC_ISFREE(cp,slot)	xfs_lic_isfree(cp,slot)
-#else
-#define	XFS_LIC_ISFREE(cp,slot)	((cp)->lic_free & (1 << (slot)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_CLAIM)
-void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot);
+static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
+{
+	return (cp->lic_free & (1 << slot));
+}
+
 #define	XFS_LIC_CLAIM(cp,slot)		xfs_lic_claim(cp,slot)
-#else
-#define	XFS_LIC_CLAIM(cp,slot)		((cp)->lic_free &= ~(1 << (slot)))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_RELSE)
-void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot);
+static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
+{
+	cp->lic_free &= ~(1 << slot);
+}
+
 #define	XFS_LIC_RELSE(cp,slot)		xfs_lic_relse(cp,slot)
-#else
-#define	XFS_LIC_RELSE(cp,slot)		((cp)->lic_free |= 1 << (slot))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_SLOT)
-xfs_log_item_desc_t *xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot);
+static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
+{
+	cp->lic_free |= 1 << slot;
+}
+
 #define	XFS_LIC_SLOT(cp,slot)		xfs_lic_slot(cp,slot)
-#else
-#define	XFS_LIC_SLOT(cp,slot)		(&((cp)->lic_descs[slot]))
-#endif
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_SLOT)
-int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp);
+static inline xfs_log_item_desc_t *
+xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
+{
+	return &(cp->lic_descs[slot]);
+}
+
 #define	XFS_LIC_DESC_TO_SLOT(dp)	xfs_lic_desc_to_slot(dp)
-#else
-#define	XFS_LIC_DESC_TO_SLOT(dp)	((uint)((dp)->lid_index))
-#endif
+static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
+{
+	return (uint)dp->lid_index;
+}
+
 /*
  * Calculate the address of a chunk given a descriptor pointer:
  * dp - dp->lid_index give the address of the start of the lic_descs array.
@@ -315,15 +288,14 @@ int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp);
  * All of this yields the address of the chunk, which is
  * cast to a chunk pointer.
  */
-#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_CHUNK)
-xfs_log_item_chunk_t *xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp);
 #define	XFS_LIC_DESC_TO_CHUNK(dp)	xfs_lic_desc_to_chunk(dp)
-#else
-#define	XFS_LIC_DESC_TO_CHUNK(dp)	((xfs_log_item_chunk_t*) \
-					(((xfs_caddr_t)((dp) - (dp)->lid_index)) -\
-					(xfs_caddr_t)(((xfs_log_item_chunk_t*) \
-					0)->lic_descs)))
-#endif
+static inline xfs_log_item_chunk_t *
+xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
+{
+	return (xfs_log_item_chunk_t*) \
+		(((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
+		(xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
+}
 
 #ifdef __KERNEL__
 /*
@@ -340,7 +312,7 @@ typedef struct xfs_log_busy_slot {
 #define XFS_LBC_NUM_SLOTS	31
 typedef struct xfs_log_busy_chunk {
 	struct xfs_log_busy_chunk	*lbc_next;
-	uint				lbc_free;	/* bitmask of free slots */
+	uint				lbc_free;	/* free slots bitmask */
 	ushort				lbc_unused;	/* first unused */
 	xfs_log_busy_slot_t		lbc_busy[XFS_LBC_NUM_SLOTS];
 } xfs_log_busy_chunk_t;
@@ -998,6 +970,7 @@ struct xfs_buf	*xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
@@ -1023,7 +996,12 @@ void		xfs_trans_log_efd_extent(xfs_trans_t *,
 					 struct xfs_efd_log_item *,
 					 xfs_fsblock_t,
 					 xfs_extlen_t);
-int		xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
+int		_xfs_trans_commit(xfs_trans_t *,
+				  uint flags,
+				  xfs_lsn_t *,
+				  int *);
+#define xfs_trans_commit(tp, flags, lsn) \
+	_xfs_trans_commit(tp, flags, lsn, NULL)
 void		xfs_trans_cancel(xfs_trans_t *, int);
 void		xfs_trans_ail_init(struct xfs_mount *);
 xfs_lsn_t	xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 7bc5eab4c2c..19ab24af1c1 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
@@ -379,8 +364,8 @@ xfs_trans_delete_ail(
 		else {
 			xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
 				"xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL");
-			xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
 			AIL_UNLOCK(mp, s);
+			xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
 		}
 	}
 }
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 144da7a8546..c74c31ebc81 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -1,47 +1,42 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
-#include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_dir.h"
+#include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
@@ -714,6 +709,29 @@ xfs_trans_bhold(xfs_trans_t	*tp,
 }
 
 /*
+ * Cancel the previous buffer hold request made on this buffer
+ * for this transaction.
+ */
+void
+xfs_trans_bhold_release(xfs_trans_t	*tp,
+			xfs_buf_t	*bp)
+{
+	xfs_buf_log_item_t	*bip;
+
+	ASSERT(XFS_BUF_ISBUSY(bp));
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
+	ASSERT(atomic_read(&bip->bli_refcount) > 0);
+	ASSERT(bip->bli_flags & XFS_BLI_HOLD);
+	bip->bli_flags &= ~XFS_BLI_HOLD;
+	xfs_buf_item_trace("BHOLD RELEASE", bip);
+}
+
+/*
  * This is called to mark bytes first through last inclusive of the given
  * buffer as needing to be logged when the transaction is committed.
  * The buffer must already be associated with the given transaction.
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 93259a15f98..7d7d627f25d 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_dir.h"
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 7e7631ca497..e341409172d 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,18 +28,18 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_trans_priv.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_trans_priv.h"
+#include "xfs_inode_item.h"
 
 #ifdef XFS_TRANS_DEBUG
 STATIC void
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 1b8a756d80e..486147ef0e3 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -1,40 +1,25 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 
 STATIC int	xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index d4dae7d06af..13edab8a9e9 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_TRANS_PRIV_H__
 #define	__XFS_TRANS_PRIV_H__
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index e91d173f4ed..7fe3792b18d 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_TRANS_SPACE_H__
 #define __XFS_TRANS_SPACE_H__
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 16f5371ce10..104f64a9879 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_TYPES_H__
 #define	__XFS_TYPES_H__
@@ -154,6 +140,12 @@ typedef __uint8_t	xfs_arch_t;	/* architecture of an xfs fs */
 #define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
 
 /*
+ * Min numbers of data/attr fork btree root pointers.
+ */
+#define MINDBTPTRS	3
+#define MINABTPTRS	2
+
+/*
  * MAXNAMELEN is the length (including the terminating null) of
  * the longest permissible file (component) name.
  */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 11351f08d43..fefe1d60377 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -1,53 +1,40 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 01d98b4b7af..472661a3b6d 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -1,33 +1,19 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef __XFS_UTILS_H__
 #define __XFS_UTILS_H__
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 42bcc021520..7bdbd991ab1 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1,74 +1,58 @@
 /*
- * XFS filesystem operations.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
+#include "xfs_ag.h"
 #include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
-#include "xfs_ag.h"
+#include "xfs_inode_item.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_quota.h"
 #include "xfs_error.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_rw.h"
 #include "xfs_refcache.h"
 #include "xfs_buf_item.h"
-#include "xfs_extfree_item.h"
-#include "xfs_quota.h"
+#include "xfs_log_priv.h"
 #include "xfs_dir2_trace.h"
+#include "xfs_extfree_item.h"
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
-#include "xfs_log_priv.h"
 
 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
 
@@ -230,9 +214,7 @@ xfs_start_flags(
 	}
 
 	if (ap->logbufs != -1 &&
-#if defined(DEBUG) || defined(XLOG_NOLOG)
 	    ap->logbufs != 0 &&
-#endif
 	    (ap->logbufs < XLOG_MIN_ICLOGS ||
 	     ap->logbufs > XLOG_MAX_ICLOGS)) {
 		cmn_err(CE_WARN,
@@ -242,6 +224,7 @@ xfs_start_flags(
 	}
 	mp->m_logbufs = ap->logbufs;
 	if (ap->logbufsize != -1 &&
+	    ap->logbufsize !=  0 &&
 	    ap->logbufsize != 16 * 1024 &&
 	    ap->logbufsize != 32 * 1024 &&
 	    ap->logbufsize != 64 * 1024 &&
@@ -257,6 +240,14 @@ xfs_start_flags(
 	mp->m_fsname_len = strlen(ap->fsname) + 1;
 	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
 	strcpy(mp->m_fsname, ap->fsname);
+	if (ap->rtname[0]) {
+		mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
+		strcpy(mp->m_rtname, ap->rtname);
+	}
+	if (ap->logname[0]) {
+		mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
+		strcpy(mp->m_logname, ap->logname);
+	}
 
 	if (ap->flags & XFSMNT_WSYNC)
 		mp->m_flags |= XFS_MOUNT_WSYNC;
@@ -268,21 +259,16 @@ xfs_start_flags(
 #endif
 	if (ap->flags & XFSMNT_NOATIME)
 		mp->m_flags |= XFS_MOUNT_NOATIME;
-
 	if (ap->flags & XFSMNT_RETERR)
 		mp->m_flags |= XFS_MOUNT_RETERR;
-
 	if (ap->flags & XFSMNT_NOALIGN)
 		mp->m_flags |= XFS_MOUNT_NOALIGN;
-
 	if (ap->flags & XFSMNT_SWALLOC)
 		mp->m_flags |= XFS_MOUNT_SWALLOC;
-
 	if (ap->flags & XFSMNT_OSYNCISOSYNC)
 		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
-
 	if (ap->flags & XFSMNT_32BITINODES)
-		mp->m_flags |= (XFS_MOUNT_32BITINODES | XFS_MOUNT_32BITINOOPT);
+		mp->m_flags |= XFS_MOUNT_32BITINODES;
 
 	if (ap->flags & XFSMNT_IOSIZE) {
 		if (ap->iosizelog > XFS_MAX_IO_LOG ||
@@ -300,12 +286,15 @@ xfs_start_flags(
 
 	if (ap->flags & XFSMNT_IHASHSIZE)
 		mp->m_flags |= XFS_MOUNT_IHASHSIZE;
-
 	if (ap->flags & XFSMNT_IDELETE)
 		mp->m_flags |= XFS_MOUNT_IDELETE;
-
 	if (ap->flags & XFSMNT_DIRSYNC)
 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
+	if (ap->flags & XFSMNT_COMPAT_ATTR)
+		mp->m_flags |= XFS_MOUNT_COMPAT_ATTR;
+
+	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
+		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 
 	/*
 	 * no recovery flag requires a read-only mount
@@ -321,8 +310,8 @@ xfs_start_flags(
 
 	if (ap->flags & XFSMNT_NOUUID)
 		mp->m_flags |= XFS_MOUNT_NOUUID;
-	if (ap->flags & XFSMNT_NOLOGFLUSH)
-		mp->m_flags |= XFS_MOUNT_NOLOGFLUSH;
+	if (ap->flags & XFSMNT_BARRIER)
+		mp->m_flags |= XFS_MOUNT_BARRIER;
 
 	return 0;
 }
@@ -393,6 +382,10 @@ xfs_finish_flags(
 			return XFS_ERROR(EINVAL);
 	}
 
+	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+		mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
+	}
+
 	return 0;
 }
 
@@ -512,8 +505,14 @@ xfs_mount(
 		goto error2;
 
 	error = XFS_IOINIT(vfsp, args, flags);
-	if (!error)
-		return 0;
+	if (error)
+		goto error2;
+
+	if ((args->flags & XFSMNT_BARRIER) &&
+	    !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
+		xfs_mountfs_check_barriers(mp);
+	return 0;
+
 error2:
 	if (mp->m_sb_bp)
 		xfs_freesb(mp);
@@ -656,19 +655,24 @@ xfs_mntupdate(
 	else
 		mp->m_flags &= ~XFS_MOUNT_NOATIME;
 
-	if (!(vfsp->vfs_flag & VFS_RDONLY)) {
-		VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
+	if ((vfsp->vfs_flag & VFS_RDONLY) &&
+	    !(*flags & MS_RDONLY)) {
+		vfsp->vfs_flag &= ~VFS_RDONLY;
+
+		if (args->flags & XFSMNT_BARRIER)
+			xfs_mountfs_check_barriers(mp);
 	}
 
-	if (*flags & MS_RDONLY) {
+	if (!(vfsp->vfs_flag & VFS_RDONLY) &&
+	    (*flags & MS_RDONLY)) {
+		VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
+
 		xfs_quiesce_fs(mp);
 
 		/* Ok now write out an unmount record */
 		xfs_log_unmount_write(mp);
 		xfs_unmountfs_writesb(mp);
 		vfsp->vfs_flag |= VFS_RDONLY;
-	} else {
-		vfsp->vfs_flag &= ~VFS_RDONLY;
 	}
 
 	return 0;
@@ -795,7 +799,6 @@ xfs_statvfs(
 	xfs_mount_t	*mp;
 	xfs_sb_t	*sbp;
 	unsigned long	s;
-	u64 id;
 
 	mp = XFS_BHVTOM(bdp);
 	sbp = &(mp->m_sb);
@@ -823,9 +826,7 @@ xfs_statvfs(
 	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
 	XFS_SB_UNLOCK(mp, s);
 
-	id = huge_encode_dev(mp->m_dev);
-	statp->f_fsid.val[0] = (u32)id;
-	statp->f_fsid.val[1] = (u32)(id >> 32);
+	xfs_statvfs_fsid(statp, mp);
 	statp->f_namelen = MAXNAMELEN - 1;
 
 	return 0;
@@ -895,7 +896,7 @@ xfs_sync(
  * only available by calling this routine.
  *
  */
-STATIC int
+int
 xfs_sync_inodes(
 	xfs_mount_t	*mp,
 	int		flags,
@@ -906,7 +907,6 @@ xfs_sync_inodes(
 	xfs_inode_t	*ip_next;
 	xfs_buf_t	*bp;
 	vnode_t		*vp = NULL;
-	vmap_t		vmap;
 	int		error;
 	int		last_error;
 	uint64_t	fflag;
@@ -980,7 +980,7 @@ xfs_sync_inodes(
 	ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
 
 	fflag = XFS_B_ASYNC;		/* default is don't wait */
-	if (flags & SYNC_BDFLUSH)
+	if (flags & (SYNC_BDFLUSH | SYNC_DELWRI))
 		fflag = XFS_B_DELWRI;
 	if (flags & SYNC_WAIT)
 		fflag = 0;		/* synchronous overrides all */
@@ -1101,48 +1101,21 @@ xfs_sync_inodes(
 		 * lock in xfs_ireclaim() after the inode is pulled from
 		 * the mount list will sleep until we release it here.
 		 * This keeps the vnode from being freed while we reference
-		 * it.  It is also cheaper and simpler than actually doing
-		 * a vn_get() for every inode we touch here.
+		 * it.
 		 */
 		if (xfs_ilock_nowait(ip, lock_flags) == 0) {
-
 			if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
 				ip = ip->i_mnext;
 				continue;
 			}
 
-			/*
-			 * We need to unlock the inode list lock in order
-			 * to lock the inode. Insert a marker record into
-			 * the inode list to remember our position, dropping
-			 * the lock is now done inside the IPOINTER_INSERT
-			 * macro.
-			 *
-			 * We also use the inode list lock to protect us
-			 * in taking a snapshot of the vnode version number
-			 * for use in calling vn_get().
-			 */
-			VMAP(vp, vmap);
-			IPOINTER_INSERT(ip, mp);
-
-			vp = vn_get(vp, &vmap);
+			vp = vn_grab(vp);
 			if (vp == NULL) {
-				/*
-				 * The vnode was reclaimed once we let go
-				 * of the inode list lock.  Skip to the
-				 * next list entry. Remove the marker.
-				 */
-
-				XFS_MOUNT_ILOCK(mp);
-
-				mount_locked = B_TRUE;
-				vnode_refed  = B_FALSE;
-
-				IPOINTER_REMOVE(ip, mp);
-
+				ip = ip->i_mnext;
 				continue;
 			}
 
+			IPOINTER_INSERT(ip, mp);
 			xfs_ilock(ip, lock_flags);
 
 			ASSERT(vp == XFS_ITOV(ip));
@@ -1533,7 +1506,10 @@ xfs_syncsub(
 	 * eventually kicked out of the cache.
 	 */
 	if (flags & SYNC_REFCACHE) {
-		xfs_refcache_purge_some(mp);
+		if (flags & SYNC_WAIT)
+			xfs_refcache_purge_mp(mp);
+		else
+			xfs_refcache_purge_some(mp);
 	}
 
 	/*
@@ -1649,14 +1625,24 @@ xfs_vget(
 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
 #define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
+#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
+#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
 #define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
 #define MNTOPT_IHASHSIZE    "ihashsize"    /* size of inode hash table */
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_NOLOGFLUSH   "nologflush"   /* don't hard flush on log writes */
+#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
+					 * unwritten extent conversion */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
+#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
+					 * in stat(). */
+#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
+#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
 
 STATIC unsigned long
 suffix_strtoul(const char *cp, char **endp, unsigned int base)
@@ -1693,12 +1679,15 @@ xfs_parseargs(
 	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
 	int			iosize;
 
+	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+	args->flags |= XFSMNT_COMPAT_ATTR;
+
 #if 0	/* XXX: off by default, until some remaining issues ironed out */
 	args->flags |= XFSMNT_IDELETE; /* default to on */
 #endif
 
 	if (!options)
-		return 0;
+		goto done;
 
 	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
 
@@ -1769,6 +1758,12 @@ xfs_parseargs(
 			}
 			args->flags |= XFSMNT_IHASHSIZE;
 			args->ihashsize = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
+			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+			vfsp->vfs_flag |= VFS_GRPID;
+		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+			vfsp->vfs_flag &= ~VFS_GRPID;
 		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
 			args->flags |= XFSMNT_WSYNC;
 		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
@@ -1809,12 +1804,20 @@ xfs_parseargs(
 #endif
 		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
 			args->flags |= XFSMNT_NOUUID;
-		} else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
-			args->flags |= XFSMNT_NOLOGFLUSH;
+		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+			args->flags |= XFSMNT_BARRIER;
 		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
 			args->flags &= ~XFSMNT_IDELETE;
 		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
 			args->flags |= XFSMNT_IDELETE;
+		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+			args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+			args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+			args->flags &= ~XFSMNT_COMPAT_ATTR;
+		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+			args->flags |= XFSMNT_COMPAT_ATTR;
 		} else if (!strcmp(this_char, "osyncisdsync")) {
 			/* no-op, this is now the default */
 printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1864,6 +1867,11 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
 		args->sunit = args->swidth = 0;
 	}
 
+done:
+	if (args->flags & XFSMNT_32BITINODES)
+		vfsp->vfs_flag |= VFS_32BITINODES;
+	if (args->flags2)
+		args->flags |= XFSMNT_FLAGS2;
 	return 0;
 }
 
@@ -1884,12 +1892,13 @@ xfs_showargs(
 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
 		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
 		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
-		{ XFS_MOUNT_NOLOGFLUSH,		"," MNTOPT_NOLOGFLUSH },
+		{ XFS_MOUNT_BARRIER,		"," MNTOPT_BARRIER },
 		{ XFS_MOUNT_IDELETE,		"," MNTOPT_NOIKEEP },
 		{ 0, NULL }
 	};
 	struct proc_xfs_info	*xfs_infop;
 	struct xfs_mount	*mp = XFS_BHVTOM(bhv);
+	struct vfs		*vfsp = XFS_MTOVFS(mp);
 
 	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
 		if (mp->m_flags & xfs_infop->flag)
@@ -1900,21 +1909,20 @@ xfs_showargs(
 		seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize);
 
 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%d", 1<<mp->m_writeio_log);
+		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+				(int)(1 << mp->m_writeio_log) >> 10);
 
 	if (mp->m_logbufs > 0)
 		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
 
 	if (mp->m_logbsize > 0)
-		seq_printf(m, "," MNTOPT_LOGBSIZE "=%d", mp->m_logbsize);
+		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
 
-	if (mp->m_ddev_targp != mp->m_logdev_targp)
-		seq_printf(m, "," MNTOPT_LOGDEV "=%s",
-				XFS_BUFTARG_NAME(mp->m_logdev_targp));
+	if (mp->m_logname)
+		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
 
-	if (mp->m_rtdev_targp && mp->m_ddev_targp != mp->m_rtdev_targp)
-		seq_printf(m, "," MNTOPT_RTDEV "=%s",
-				XFS_BUFTARG_NAME(mp->m_rtdev_targp));
+	if (mp->m_rtname)
+		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
 
 	if (mp->m_dalign > 0)
 		seq_printf(m, "," MNTOPT_SUNIT "=%d",
@@ -1924,9 +1932,18 @@ xfs_showargs(
 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-	if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT))
+	if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+		seq_printf(m, "," MNTOPT_ATTR2);
+
+	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
+		seq_printf(m, "," MNTOPT_LARGEIO);
+
+	if (!(vfsp->vfs_flag & VFS_32BITINODES))
 		seq_printf(m, "," MNTOPT_64BITINODE);
-	
+
+	if (vfsp->vfs_flag & VFS_GRPID)
+		seq_printf(m, "," MNTOPT_GRPID);
+
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1377c868f3f..7c1f7453146 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,40 +1,26 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -42,33 +28,32 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_itable.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
-#include "xfs_attr_sf.h"
 #include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
-#include "xfs_inode_item.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_dir_leaf.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
 #include "xfs_bmap.h"
-#include "xfs_da_btree.h"
 #include "xfs_attr.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_error.h"
-#include "xfs_bit.h"
-#include "xfs_rtalloc.h"
 #include "xfs_quota.h"
 #include "xfs_utils.h"
+#include "xfs_rtalloc.h"
+#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
-#include "xfs_dir_leaf.h"
-#include "xfs_mac.h"
 #include "xfs_log_priv.h"
+#include "xfs_mac.h"
 
 
 /*
@@ -104,7 +89,7 @@ xfs_open(
 	 * If it's a directory with any blocks, read-ahead block 0
 	 * as we're almost certain to have the next operation be a read there.
 	 */
-	if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) {
+	if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
 		mode = xfs_ilock_map_shared(ip);
 		if (ip->i_d.di_nextents > 0)
 			(void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
@@ -163,55 +148,25 @@ xfs_getattr(
 	/*
 	 * Copy from in-core inode.
 	 */
-	vap->va_type = vp->v_type;
-	vap->va_mode = ip->i_d.di_mode & MODEMASK;
+	vap->va_mode = ip->i_d.di_mode;
 	vap->va_uid = ip->i_d.di_uid;
 	vap->va_gid = ip->i_d.di_gid;
 	vap->va_projid = ip->i_d.di_projid;
 
 	/*
 	 * Check vnode type block/char vs. everything else.
-	 * Do it with bitmask because that's faster than looking
-	 * for multiple values individually.
 	 */
-	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+	switch (ip->i_d.di_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		vap->va_rdev = ip->i_df.if_u2.if_rdev;
+		vap->va_blocksize = BLKDEV_IOSIZE;
+		break;
+	default:
 		vap->va_rdev = 0;
 
 		if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
-
-#if 0
-			/* Large block sizes confuse various
-			 * user space programs, so letting the
-			 * stripe size through is not a good
-			 * idea for now.
-			 */
-			vap->va_blocksize = mp->m_swidth ?
-				/*
-				 * If the underlying volume is a stripe, then
-				 * return the stripe width in bytes as the
-				 * recommended I/O size.
-				 */
-				(mp->m_swidth << mp->m_sb.sb_blocklog) :
-				/*
-				 * Return the largest of the preferred buffer
-				 * sizes since doing small I/Os into larger
-				 * buffers causes buffers to be decommissioned.
-				 * The value returned is in bytes.
-				 */
-				(1 << (int)MAX(mp->m_readio_log,
-					       mp->m_writeio_log));
-
-#else
-			vap->va_blocksize =
-				/*
-				 * Return the largest of the preferred buffer
-				 * sizes since doing small I/Os into larger
-				 * buffers causes buffers to be decommissioned.
-				 * The value returned is in bytes.
-				 */
-				1 << (int)MAX(mp->m_readio_log,
-					       mp->m_writeio_log);
-#endif
+			vap->va_blocksize = xfs_preferred_iosize(mp);
 		} else {
 
 			/*
@@ -224,9 +179,7 @@ xfs_getattr(
 				(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
 				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
 		}
-	} else {
-		vap->va_rdev = ip->i_df.if_u2.if_rdev;
-		vap->va_blocksize = BLKDEV_IOSIZE;
+		break;
 	}
 
 	vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
@@ -468,7 +421,7 @@ xfs_setattr(
 				m |= S_ISGID;
 #if 0
 			/* Linux allows this, Irix doesn't. */
-			if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR)
+			if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
 				m |= S_ISVTX;
 #endif
 			if (m && !capable(CAP_FSETID))
@@ -546,10 +499,10 @@ xfs_setattr(
 			goto error_return;
 		}
 
-		if (vp->v_type == VDIR) {
+		if (VN_ISDIR(vp)) {
 			code = XFS_ERROR(EISDIR);
 			goto error_return;
-		} else if (vp->v_type != VREG) {
+		} else if (!VN_ISREG(vp)) {
 			code = XFS_ERROR(EINVAL);
 			goto error_return;
 		}
@@ -580,8 +533,7 @@ xfs_setattr(
 		/*
 		 * Can't change extent size if any extents are allocated.
 		 */
-		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-		    (mask & XFS_AT_EXTSIZE) &&
+		if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
 		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
 		     vap->va_extsize) ) {
 			code = XFS_ERROR(EINVAL);	/* EFBIG? */
@@ -609,7 +561,8 @@ xfs_setattr(
 		/*
 		 * Can't change realtime flag if any extents are allocated.
 		 */
-		if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) &&
+		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+		    (mask & XFS_AT_XFLAGS) &&
 		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
 		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
 			code = XFS_ERROR(EINVAL);	/* EFBIG? */
@@ -673,8 +626,10 @@ xfs_setattr(
 	 */
 	if (mask & XFS_AT_SIZE) {
 		code = 0;
-		if (vap->va_size > ip->i_d.di_size)
+		if ((vap->va_size > ip->i_d.di_size) && 
+		    (flags & ATTR_NOSIZETOK) == 0) {
 			code = xfs_igrow_start(ip, vap->va_size, credp);
+		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (!code)
 			code = xfs_itruncate_data(ip, vap->va_size);
@@ -1117,6 +1072,7 @@ xfs_fsync(
 	xfs_inode_t	*ip;
 	xfs_trans_t	*tp;
 	int		error;
+	int		log_flushed = 0, changed = 1;
 
 	vn_trace_entry(BHV_TO_VNODE(bdp),
 			__FUNCTION__, (inst_t *)__return_address);
@@ -1170,10 +1126,18 @@ xfs_fsync(
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 		if (xfs_ipincount(ip)) {
-			xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+			_xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
 				      XFS_LOG_FORCE |
 				      ((flag & FSYNC_WAIT)
-				       ? XFS_LOG_SYNC : 0));
+				       ? XFS_LOG_SYNC : 0),
+				      &log_flushed);
+		} else {
+			/*
+			 * If the inode is not pinned and nothing
+			 * has changed we don't need to flush the
+			 * cache.
+			 */
+			changed = 0;
 		}
 		error = 0;
 	} else	{
@@ -1209,10 +1173,27 @@ xfs_fsync(
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		if (flag & FSYNC_WAIT)
 			xfs_trans_set_sync(tp);
-		error = xfs_trans_commit(tp, 0, NULL);
+		error = _xfs_trans_commit(tp, 0, NULL, &log_flushed);
 
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
+
+	if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
+		/*
+		 * If the log write didn't issue an ordered tag we need
+		 * to flush the disk cache for the data device now.
+		 */
+		if (!log_flushed)
+			xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
+
+		/*
+		 * If this inode is on the RT dev we need to flush that
+		 * cache aswell.
+		 */
+		if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
+			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
+	}
+
 	return error;
 }
 
@@ -1567,7 +1548,7 @@ xfs_release(
 	vp = BHV_TO_VNODE(bdp);
 	ip = XFS_BHVTOI(bdp);
 
-	if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) {
+	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
 		return 0;
 	}
 
@@ -1895,7 +1876,7 @@ xfs_create(
 	dp = XFS_BHVTOI(dir_bdp);
 	mp = dp->i_mount;
 
-	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
+	dm_di_mode = vap->va_mode;
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
@@ -1973,8 +1954,7 @@ xfs_create(
 	    (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
 		goto error_return;
 	rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
-	error = xfs_dir_ialloc(&tp, dp,
-			MAKEIMODE(vap->va_type,vap->va_mode), 1,
+	error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
 			rdev, credp, prid, resblks > 0,
 			&ip, &committed);
 	if (error) {
@@ -2620,7 +2600,7 @@ xfs_link(
 	vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
 
 	target_namelen = VNAMELEN(dentry);
-	if (src_vp->v_type == VDIR)
+	if (VN_ISDIR(src_vp))
 		return XFS_ERROR(EPERM);
 
 	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
@@ -2805,7 +2785,7 @@ xfs_mkdir(
 
 	tp = NULL;
 	dp_joined_to_trans = B_FALSE;
-	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
+	dm_di_mode = vap->va_mode;
 
 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
@@ -2879,8 +2859,7 @@ xfs_mkdir(
 	/*
 	 * create the directory inode.
 	 */
-	error = xfs_dir_ialloc(&tp, dp,
-			MAKEIMODE(vap->va_type,vap->va_mode), 2,
+	error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
 			0, credp, prid, resblks > 0,
 		&cdp, NULL);
 	if (error) {
@@ -3650,7 +3629,7 @@ xfs_rwlock(
 	vnode_t		*vp;
 
 	vp = BHV_TO_VNODE(bdp);
-	if (vp->v_type == VDIR)
+	if (VN_ISDIR(vp))
 		return 1;
 	ip = XFS_BHVTOI(bdp);
 	if (locktype == VRWLOCK_WRITE) {
@@ -3681,7 +3660,7 @@ xfs_rwunlock(
 	vnode_t		*vp;
 
 	vp = BHV_TO_VNODE(bdp);
-	if (vp->v_type == VDIR)
+	if (VN_ISDIR(vp))
 		return;
 	ip = XFS_BHVTOI(bdp);
 	if (locktype == VRWLOCK_WRITE) {
@@ -3847,51 +3826,10 @@ xfs_reclaim(
 		return 0;
 	}
 
-	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
-		if (ip->i_d.di_size > 0) {
-			/*
-			 * Flush and invalidate any data left around that is
-			 * a part of this file.
-			 *
-			 * Get the inode's i/o lock so that buffers are pushed
-			 * out while holding the proper lock.  We can't hold
-			 * the inode lock here since flushing out buffers may
-			 * cause us to try to get the lock in xfs_strategy().
-			 *
-			 * We don't have to call remapf() here, because there
-			 * cannot be any mapped file references to this vnode
-			 * since it is being reclaimed.
-			 */
-			xfs_ilock(ip, XFS_IOLOCK_EXCL);
+	vn_iowait(vp);
 
-			/*
-			 * If we hit an IO error, we need to make sure that the
-			 * buffer and page caches of file data for
-			 * the file are tossed away. We don't want to use
-			 * VOP_FLUSHINVAL_PAGES here because we don't want dirty
-			 * pages to stay attached to the vnode, but be
-			 * marked P_BAD. pdflush/vnode_pagebad
-			 * hates that.
-			 */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
-			} else {
-				VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
-			}
-
-			ASSERT(VN_CACHED(vp) == 0);
-			ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
-			       ip->i_delayed_blks == 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-		} else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-			/*
-			 * di_size field may not be quite accurate if we're
-			 * shutting down.
-			 */
-			VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
-			ASSERT(VN_CACHED(vp) == 0);
-		}
-	}
+	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+	ASSERT(VN_CACHED(vp) == 0);
 
 	/* If we have nothing to flush with this inode then complete the
 	 * teardown now, otherwise break the link between the xfs inode
@@ -4567,7 +4505,7 @@ xfs_change_file_space(
 	/*
 	 * must be a regular file and have write permission
 	 */
-	if (vp->v_type != VREG)
+	if (!VN_ISREG(vp))
 		return XFS_ERROR(EINVAL);
 
 	xfs_ilock(ip, XFS_ILOCK_SHARED);