From e9720acd728a46cb40daa52c99a979f7c4ff195c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 7 Mar 2008 11:08:40 -0800
Subject: [NET]: Make /proc/net a symlink on /proc/self/net (v3)

Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.

The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.

The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.

# ls -l /proc/net
lrwxrwxrwx  1 root root 8 Mar  5 15:17 /proc/net -> self/net

In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.

Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
  screwup pointed out by Stephen.

  To get the correct nlink count the ->getattr callback for /proc/net
  is overridden to read one from the net->proc_net entry.

  To make selinux still work the net->proc_net entry is initialized
  properly, i.e. with the "net" name and the proc_net parent.

Selinux fixes are
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>

Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/base.c     |   1 +
 fs/proc/generic.c  |  26 +++++++-----
 fs/proc/internal.h |   7 ++++
 fs/proc/proc_net.c | 117 ++++++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 114 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 96ee899d650..cc43cf0c1fa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, task),
 	DIR("fd",         S_IRUSR|S_IXUSR, fd),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
+	DIR("net",        S_IRUGO|S_IXUSR, net),
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	ONE("status",     S_IRUGO, pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 68971e66cd4..a36ad3c75cf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
-struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
+		struct dentry *dentry)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
 	int error = -ENOENT;
 
 	lock_kernel();
 	spin_lock(&proc_subdir_lock);
-	de = PDE(dir);
 	if (de) {
 		for (de = de->subdir; de ; de = de->next) {
 			if (de->namelen != dentry->d_name.len)
@@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
 				unsigned int ino;
 
-				if (de->shadow_proc)
-					de = de->shadow_proc(current, de);
 				ino = de->low_ino;
 				de_get(de);
 				spin_unlock(&proc_subdir_lock);
@@ -417,6 +414,12 @@ out_unlock:
 	return ERR_PTR(error);
 }
 
+struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	return proc_lookup_de(PDE(dir), dir, dentry);
+}
+
 /*
  * This returns non-zero if at EOF, so that the /proc
  * root directory can use this and check if it should
@@ -426,10 +429,9 @@ out_unlock:
  * value of the readdir() call, as long as it's non-negative
  * for success..
  */
-int proc_readdir(struct file * filp,
-	void * dirent, filldir_t filldir)
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+		filldir_t filldir)
 {
-	struct proc_dir_entry * de;
 	unsigned int ino;
 	int i;
 	struct inode *inode = filp->f_path.dentry->d_inode;
@@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
 	lock_kernel();
 
 	ino = inode->i_ino;
-	de = PDE(inode);
 	if (!de) {
 		ret = -EINVAL;
 		goto out;
@@ -499,6 +500,13 @@ out:	unlock_kernel();
 	return ret;	
 }
 
+int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	return proc_readdir_de(PDE(inode), filp, dirent, filldir);
+}
+
 /*
  * These are the generic /proc directory operations. They
  * use the in-memory "struct proc_dir_entry" tree to parse
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1c81c8f1aee..bc72f5c8c47 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
 extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
@@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
 {
 	return PROC_I(inode)->fd;
 }
+
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
+		struct dentry *dentry);
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+		filldir_t filldir);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 14e9b5aaf86..4caa5f774fb 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -63,6 +63,82 @@ int seq_release_net(struct inode *ino, struct file *f)
 }
 EXPORT_SYMBOL_GPL(seq_release_net);
 
+static struct net *get_proc_task_net(struct inode *dir)
+{
+	struct task_struct *task;
+	struct nsproxy *ns;
+	struct net *net = NULL;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(dir), PIDTYPE_PID);
+	if (task != NULL) {
+		ns = task_nsproxy(task);
+		if (ns != NULL)
+			net = get_net(ns->net_ns);
+	}
+	rcu_read_unlock();
+
+	return net;
+}
+
+static struct dentry *proc_tgid_net_lookup(struct inode *dir,
+		struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *de;
+	struct net *net;
+
+	de = ERR_PTR(-ENOENT);
+	net = get_proc_task_net(dir);
+	if (net != NULL) {
+		de = proc_lookup_de(net->proc_net, dir, dentry);
+		put_net(net);
+	}
+	return de;
+}
+
+static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct net *net;
+
+	net = get_proc_task_net(inode);
+
+	generic_fillattr(inode, stat);
+
+	if (net != NULL) {
+		stat->nlink = net->proc_net->nlink;
+		put_net(net);
+	}
+
+	return 0;
+}
+
+const struct inode_operations proc_net_inode_operations = {
+	.lookup		= proc_tgid_net_lookup,
+	.getattr	= proc_tgid_net_getattr,
+};
+
+static int proc_tgid_net_readdir(struct file *filp, void *dirent,
+		filldir_t filldir)
+{
+	int ret;
+	struct net *net;
+
+	ret = -EINVAL;
+	net = get_proc_task_net(filp->f_path.dentry->d_inode);
+	if (net != NULL) {
+		ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
+		put_net(net);
+	}
+	return ret;
+}
+
+const struct file_operations proc_net_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_tgid_net_readdir,
+};
+
 
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
 	const char *name, mode_t mode, const struct file_operations *fops)
@@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(get_proc_net);
 
-static struct proc_dir_entry *shadow_pde;
-
-static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
-						struct proc_dir_entry *de)
-{
-	return task->nsproxy->net_ns->proc_net;
-}
-
 struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 		struct proc_dir_entry *parent)
 {
@@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
 
 static __net_init int proc_net_ns_init(struct net *net)
 {
-	struct proc_dir_entry *root, *netd, *net_statd;
+	struct proc_dir_entry *netd, *net_statd;
 	int err;
 
 	err = -ENOMEM;
-	root = kzalloc(sizeof(*root), GFP_KERNEL);
-	if (!root)
+	netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+	if (!netd)
 		goto out;
 
-	err = -EEXIST;
-	netd = proc_net_mkdir(net, "net", root);
-	if (!netd)
-		goto free_root;
+	netd->data = net;
+	netd->nlink = 2;
+	netd->name = "net";
+	netd->namelen = 3;
+	netd->parent = &proc_root;
 
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
 	if (!net_statd)
 		goto free_net;
 
-	root->data = net;
-
-	net->proc_net_root = root;
 	net->proc_net = netd;
 	net->proc_net_stat = net_statd;
-	err = 0;
+	return 0;
 
+free_net:
+	kfree(netd);
 out:
 	return err;
-free_net:
-	remove_proc_entry("net", root);
-free_root:
-	kfree(root);
-	goto out;
 }
 
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
 	remove_proc_entry("stat", net->proc_net);
-	remove_proc_entry("net", net->proc_net_root);
-	kfree(net->proc_net_root);
+	kfree(net->proc_net);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
 
 int __init proc_net_init(void)
 {
-	shadow_pde = proc_mkdir("net", NULL);
-	shadow_pde->shadow_proc = proc_net_shadow;
+	proc_symlink("net", NULL, "self/net");
 
 	return register_pernet_subsys(&proc_net_ns_ops);
 }
-- 
cgit v1.2.3


From 3a4780a85d4a160a471ed887bfce58b414f556b1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 29 Feb 2008 01:56:06 -0800
Subject: [PATCH] fs/ocfs2/dlm/dlmdomain.c: fix printk warning

fs/ocfs2/dlm/dlmdomain.c: In function 'dlm_send_join_cancels':
fs/ocfs2/dlm/dlmdomain.c:983: warning: format '%u' expects type 'unsigned int', but argument 7 has type 'long unsigned int'

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmdomain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 638d2ebb892..906974cfbf1 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -937,7 +937,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm,
 			 sizeof(unsigned long))) {
 		mlog(ML_ERROR,
 		     "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n",
-		     map_size, BITS_TO_LONGS(O2NM_MAX_NODES));
+		     map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES));
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 2af37ce82d199d1d8cd6286f42f37d321627a807 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 28 Feb 2008 10:41:55 +0800
Subject: ocfs2: Use dlm_print_one_lock_resource for lock resource print

__dlm_print_one_lock_resource must be called with spin_lock
the res->spinlock. While in some cases, we use it without this
precondition and lead to the failure of assert_spin_locked.
So call dlm_print_one_lock_resource instead.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmconvert.c  | 2 +-
 fs/ocfs2/dlm/dlmmaster.c   | 4 ++--
 fs/ocfs2/dlm/dlmrecovery.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ecb4d997221..75997b4deaf 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -487,7 +487,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
 			       "cookie=%u:%llu\n",
 		     dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)),
 		     dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie)));
-		__dlm_print_one_lock_resource(res);
+		dlm_print_one_lock_resource(res);
 		goto leave;
 	}
 
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index c92d1b19fc0..6d318b0bd81 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2348,7 +2348,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 			mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
 		     	"but it is already dropped!\n", dlm->name,
 		     	res->lockname.len, res->lockname.name, node);
-			__dlm_print_one_lock_resource(res);
+			dlm_print_one_lock_resource(res);
 		}
 		ret = 0;
 		goto done;
@@ -2408,7 +2408,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
 		mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
 		     "but it is already dropped!\n", dlm->name,
 		     res->lockname.len, res->lockname.name, node);
-		__dlm_print_one_lock_resource(res);
+		dlm_print_one_lock_resource(res);
 	}
 
 	dlm_lockres_put(res);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 91f747b8a53..550d4e62b32 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1191,7 +1191,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
 			    (ml->type == LKM_EXMODE ||
 			     memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
 				mlog(ML_ERROR, "mismatched lvbs!\n");
-				__dlm_print_one_lock_resource(lock->lockres);
+				dlm_print_one_lock_resource(lock->lockres);
 				BUG();
 			}
 			memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
-- 
cgit v1.2.3


From 0f71b7b40f55de909e40fa5ab217a5da3439c7d8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 12 Feb 2008 14:56:25 -0800
Subject: ocfs2: Fix endian bug in o2dlm protocol negotiation.

struct dlm_query_join_packet is made up of four one-byte fields.  They
are effectively in big-endian order already.  However, little-endian
machines swap them before putting the packet on the wire (because
query_join's response is a status, and that status is treated as a u32
on the wire).  Thus, a big-endian and little-endian machines will
treat this structure differently.

The solution is to have little-endian machines swap the structure when
converting from the structure to the u32 representation.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h |  20 +++++-----
 fs/ocfs2/dlm/dlmdomain.c | 101 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 76 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9843ee17ea2..1f939631ab7 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -602,17 +602,19 @@ enum dlm_query_join_response_code {
 	JOIN_PROTOCOL_MISMATCH,
 };
 
+struct dlm_query_join_packet {
+	u8 code;	/* Response code.  dlm_minor and fs_minor
+			   are only valid if this is JOIN_OK */
+	u8 dlm_minor;	/* The minor version of the protocol the
+			   dlm is speaking. */
+	u8 fs_minor;	/* The minor version of the protocol the
+			   filesystem is speaking. */
+	u8 reserved;
+};
+
 union dlm_query_join_response {
 	u32 intval;
-	struct {
-		u8 code;	/* Response code.  dlm_minor and fs_minor
-				   are only valid if this is JOIN_OK */
-		u8 dlm_minor;	/* The minor version of the protocol the
-				   dlm is speaking. */
-		u8 fs_minor;	/* The minor version of the protocol the
-				   filesystem is speaking. */
-		u8 reserved;
-	} packet;
+	struct dlm_query_join_packet packet;
 };
 
 struct dlm_lock_request
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 906974cfbf1..0879d86113e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -713,14 +713,46 @@ static int dlm_query_join_proto_check(char *proto_type, int node,
 	return rc;
 }
 
+/*
+ * struct dlm_query_join_packet is made up of four one-byte fields.  They
+ * are effectively in big-endian order already.  However, little-endian
+ * machines swap them before putting the packet on the wire (because
+ * query_join's response is a status, and that status is treated as a u32
+ * on the wire).  Thus, a big-endian and little-endian machines will treat
+ * this structure differently.
+ *
+ * The solution is to have little-endian machines swap the structure when
+ * converting from the structure to the u32 representation.  This will
+ * result in the structure having the correct format on the wire no matter
+ * the host endian format.
+ */
+static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet,
+					  u32 *wire)
+{
+	union dlm_query_join_response response;
+
+	response.packet = *packet;
+	*wire = cpu_to_be32(response.intval);
+}
+
+static void dlm_query_join_wire_to_packet(u32 wire,
+					  struct dlm_query_join_packet *packet)
+{
+	union dlm_query_join_response response;
+
+	response.intval = cpu_to_be32(wire);
+	*packet = response.packet;
+}
+
 static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 				  void **ret_data)
 {
 	struct dlm_query_join_request *query;
-	union dlm_query_join_response response = {
-		.packet.code = JOIN_DISALLOW,
+	struct dlm_query_join_packet packet = {
+		.code = JOIN_DISALLOW,
 	};
 	struct dlm_ctxt *dlm = NULL;
+	u32 response;
 	u8 nodenum;
 
 	query = (struct dlm_query_join_request *) msg->buf;
@@ -737,11 +769,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 		mlog(0, "node %u is not in our live map yet\n",
 		     query->node_idx);
 
-		response.packet.code = JOIN_DISALLOW;
+		packet.code = JOIN_DISALLOW;
 		goto respond;
 	}
 
-	response.packet.code = JOIN_OK_NO_MAP;
+	packet.code = JOIN_OK_NO_MAP;
 
 	spin_lock(&dlm_domain_lock);
 	dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
@@ -760,7 +792,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 				mlog(0, "disallow join as node %u does not "
 				     "have node %u in its nodemap\n",
 				     query->node_idx, nodenum);
-				response.packet.code = JOIN_DISALLOW;
+				packet.code = JOIN_DISALLOW;
 				goto unlock_respond;
 			}
 		}
@@ -780,23 +812,23 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 			/*If this is a brand new context and we
 			 * haven't started our join process yet, then
 			 * the other node won the race. */
-			response.packet.code = JOIN_OK_NO_MAP;
+			packet.code = JOIN_OK_NO_MAP;
 		} else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			/* Disallow parallel joins. */
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
 			mlog(0, "node %u trying to join, but recovery "
 			     "is ongoing.\n", bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (test_bit(bit, dlm->recovery_map)) {
 			mlog(0, "node %u trying to join, but it "
 			     "still needs recovery.\n", bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else if (test_bit(bit, dlm->domain_map)) {
 			mlog(0, "node %u trying to join, but it "
 			     "is still in the domain! needs recovery?\n",
 			     bit);
-			response.packet.code = JOIN_DISALLOW;
+			packet.code = JOIN_DISALLOW;
 		} else {
 			/* Alright we're fully a part of this domain
 			 * so we keep some state as to who's joining
@@ -807,19 +839,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 			if (dlm_query_join_proto_check("DLM", bit,
 						       &dlm->dlm_locking_proto,
 						       &query->dlm_proto)) {
-				response.packet.code =
-					JOIN_PROTOCOL_MISMATCH;
+				packet.code = JOIN_PROTOCOL_MISMATCH;
 			} else if (dlm_query_join_proto_check("fs", bit,
 							      &dlm->fs_locking_proto,
 							      &query->fs_proto)) {
-				response.packet.code =
-					JOIN_PROTOCOL_MISMATCH;
+				packet.code = JOIN_PROTOCOL_MISMATCH;
 			} else {
-				response.packet.dlm_minor =
-					query->dlm_proto.pv_minor;
-				response.packet.fs_minor =
-					query->fs_proto.pv_minor;
-				response.packet.code = JOIN_OK;
+				packet.dlm_minor = query->dlm_proto.pv_minor;
+				packet.fs_minor = query->fs_proto.pv_minor;
+				packet.code = JOIN_OK;
 				__dlm_set_joining_node(dlm, query->node_idx);
 			}
 		}
@@ -830,9 +858,10 @@ unlock_respond:
 	spin_unlock(&dlm_domain_lock);
 
 respond:
-	mlog(0, "We respond with %u\n", response.packet.code);
+	mlog(0, "We respond with %u\n", packet.code);
 
-	return response.intval;
+	dlm_query_join_packet_to_wire(&packet, &response);
+	return response;
 }
 
 static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -968,7 +997,8 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 {
 	int status;
 	struct dlm_query_join_request join_msg;
-	union dlm_query_join_response join_resp;
+	struct dlm_query_join_packet packet;
+	u32 join_resp;
 
 	mlog(0, "querying node %d\n", node);
 
@@ -984,11 +1014,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 
 	status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
 				    sizeof(join_msg), node,
-				    &join_resp.intval);
+				    &join_resp);
 	if (status < 0 && status != -ENOPROTOOPT) {
 		mlog_errno(status);
 		goto bail;
 	}
+	dlm_query_join_wire_to_packet(join_resp, &packet);
 
 	/* -ENOPROTOOPT from the net code means the other side isn't
 	    listening for our message type -- that's fine, it means
@@ -997,10 +1028,10 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 	if (status == -ENOPROTOOPT) {
 		status = 0;
 		*response = JOIN_OK_NO_MAP;
-	} else if (join_resp.packet.code == JOIN_DISALLOW ||
-		   join_resp.packet.code == JOIN_OK_NO_MAP) {
-		*response = join_resp.packet.code;
-	} else if (join_resp.packet.code == JOIN_PROTOCOL_MISMATCH) {
+	} else if (packet.code == JOIN_DISALLOW ||
+		   packet.code == JOIN_OK_NO_MAP) {
+		*response = packet.code;
+	} else if (packet.code == JOIN_PROTOCOL_MISMATCH) {
 		mlog(ML_NOTICE,
 		     "This node requested DLM locking protocol %u.%u and "
 		     "filesystem locking protocol %u.%u.  At least one of "
@@ -1012,14 +1043,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 		     dlm->fs_locking_proto.pv_minor,
 		     node);
 		status = -EPROTO;
-		*response = join_resp.packet.code;
-	} else if (join_resp.packet.code == JOIN_OK) {
-		*response = join_resp.packet.code;
+		*response = packet.code;
+	} else if (packet.code == JOIN_OK) {
+		*response = packet.code;
 		/* Use the same locking protocol as the remote node */
-		dlm->dlm_locking_proto.pv_minor =
-			join_resp.packet.dlm_minor;
-		dlm->fs_locking_proto.pv_minor =
-			join_resp.packet.fs_minor;
+		dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
+		dlm->fs_locking_proto.pv_minor = packet.fs_minor;
 		mlog(0,
 		     "Node %d responds JOIN_OK with DLM locking protocol "
 		     "%u.%u and fs locking protocol %u.%u\n",
@@ -1031,11 +1060,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 	} else {
 		status = -EINVAL;
 		mlog(ML_ERROR, "invalid response %d from node %u\n",
-		     join_resp.packet.code, node);
+		     packet.code, node);
 	}
 
 	mlog(0, "status %d, node %d response is %d\n", status, node,
-		  *response);
+	     *response);
 
 bail:
 	return status;
-- 
cgit v1.2.3


From 90d99779a4cc134daaf8910d814b7a8a5d1e8970 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 22 Jan 2008 20:52:20 +0100
Subject: [PATCH] [OCFS2]: constify function pointer tables

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f7794306b2b..1f1873bf41f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2409,7 +2409,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ocfs2_dlm_seq_ops = {
+static const struct seq_operations ocfs2_dlm_seq_ops = {
 	.start =	ocfs2_dlm_seq_start,
 	.stop =		ocfs2_dlm_seq_stop,
 	.next =		ocfs2_dlm_seq_next,
-- 
cgit v1.2.3


From 4338ab6a750303cbae4cc76cc7de5edba6598ebe Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 3 Mar 2008 10:53:02 +0800
Subject: ocfs2: Fix an endian bug in online resize.

In ocfs2_group_add, 'cr' is a disk field of type 'ocfs2_chain_rec', and we
were putting cpu byteorder values into it. Swap things to the right endian
before storing.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/resize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 37835ffcb03..8166968e901 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -597,7 +597,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
 	}
 
-	cr->c_blkno = le64_to_cpu(input->group);
+	cr->c_blkno = cpu_to_le64(input->group);
 	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
 	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
 
-- 
cgit v1.2.3


From 2c5c54aca9d0263f81bd4886232835ba31f7635a Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Sat, 1 Mar 2008 14:04:20 -0800
Subject: ocfs2/dlm: Add missing dlm_lock_put()s

Normally locks for remote nodes are freed when that node sends an UNLOCK
message to the master. The master node tags an DLM_UNLOCK_FREE_LOCK action
to do an extra put on the lock at the end.

However, there are times when the master node has to free the locks for the
remote nodes forcibly.

Two cases when this happens are:
1. When the master has migrated the lockres plus all locks to another node.
2. When the master is clearing all the locks of a dead node.

It was in the above two conditions that the dlm was missing the extra put.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c   | 3 +++
 fs/ocfs2/dlm/dlmrecovery.c | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 6d318b0bd81..320081d53f2 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2933,6 +2933,9 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 				dlm_lockres_clear_refmap_bit(lock->ml.node, res);
 				list_del_init(&lock->list);
 				dlm_lock_put(lock);
+				/* In a normal unlock, we would have added a
+				 * DLM_UNLOCK_FREE_LOCK action. Force it. */
+				dlm_lock_put(lock);
 			}
 		}
 		queue++;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 550d4e62b32..db17727594a 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2130,11 +2130,16 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&res->spinlock);
 
+	/* We do two dlm_lock_put(). One for removing from list and the other is
+	 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
+
 	/* TODO: check pending_asts, pending_basts here */
 	list_for_each_entry_safe(lock, next, &res->granted, list) {
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
@@ -2142,6 +2147,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
@@ -2149,6 +2156,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
+			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
+			dlm_lock_put(lock);
 			freed++;
 		}
 	}
-- 
cgit v1.2.3


From 52987e2ab456c1a828046494aac53819b1454341 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Sat, 1 Mar 2008 14:04:21 -0800
Subject: ocfs2/dlm: Add missing dlm_lockres_put()s in migration path

During migration, the recovery master node may be asked to master a lockres
it may not know about. In that case, it would not only have to create a
lockres and add it to the hash, but also remember to to do the _put_
corresponding to the kref_init in dlm_init_lockres(), as soon as the migration
is completed. Yes, we don't wait for the dlm_purge_lockres() to do that
matching put. Note the ref added for it being in the hash protects the lockres
from being freed prematurely.

This patch adds that missing put, as described above, to plug a memleak.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h   |  1 +
 fs/ocfs2/dlm/dlmrecovery.c | 40 ++++++++++++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 1f939631ab7..dc8ea666efd 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -176,6 +176,7 @@ struct dlm_mig_lockres_priv
 {
 	struct dlm_lock_resource *lockres;
 	u8 real_master;
+	u8 extra_ref;
 };
 
 struct dlm_assert_master_priv
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index db17727594a..f9468355242 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1327,6 +1327,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 		(struct dlm_migratable_lockres *)msg->buf;
 	int ret = 0;
 	u8 real_master;
+	u8 extra_refs = 0;
 	char *buf = NULL;
 	struct dlm_work_item *item = NULL;
 	struct dlm_lock_resource *res = NULL;
@@ -1404,16 +1405,28 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 		__dlm_insert_lockres(dlm, res);
 		spin_unlock(&dlm->spinlock);
 
+		/* Add an extra ref for this lock-less lockres lest the
+		 * dlm_thread purges it before we get the chance to add
+		 * locks to it */
+		dlm_lockres_get(res);
+
+		/* There are three refs that need to be put.
+		 * 1. Taken above.
+		 * 2. kref_init in dlm_new_lockres()->dlm_init_lockres().
+		 * 3. dlm_lookup_lockres()
+		 * The first one is handled at the end of this function. The
+		 * other two are handled in the worker thread after locks have
+		 * been attached. Yes, we don't wait for purge time to match
+		 * kref_init. The lockres will still have atleast one ref
+		 * added because it is in the hash __dlm_insert_lockres() */
+		extra_refs++;
+
 		/* now that the new lockres is inserted,
 		 * make it usable by other processes */
 		spin_lock(&res->spinlock);
 		res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
-
-		/* add an extra ref for just-allocated lockres 
-		 * otherwise the lockres will be purged immediately */
-		dlm_lockres_get(res);
 	}
 
 	/* at this point we have allocated everything we need,
@@ -1443,12 +1456,17 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
 	dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf);
 	item->u.ml.lockres = res; /* already have a ref */
 	item->u.ml.real_master = real_master;
+	item->u.ml.extra_ref = extra_refs;
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
 	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
 
 leave:
+	/* One extra ref taken needs to be put here */
+	if (extra_refs)
+		dlm_lockres_put(res);
+
 	dlm_put(dlm);
 	if (ret < 0) {
 		if (buf)
@@ -1464,17 +1482,19 @@ leave:
 
 static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
 {
-	struct dlm_ctxt *dlm = data;
+	struct dlm_ctxt *dlm;
 	struct dlm_migratable_lockres *mres;
 	int ret = 0;
 	struct dlm_lock_resource *res;
 	u8 real_master;
+	u8 extra_ref;
 
 	dlm = item->dlm;
 	mres = (struct dlm_migratable_lockres *)data;
 
 	res = item->u.ml.lockres;
 	real_master = item->u.ml.real_master;
+	extra_ref = item->u.ml.extra_ref;
 
 	if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
 		/* this case is super-rare. only occurs if
@@ -1517,6 +1537,12 @@ again:
 	}
 
 leave:
+	/* See comment in dlm_mig_lockres_handler() */
+	if (res) {
+		if (extra_ref)
+			dlm_lockres_put(res);
+		dlm_lockres_put(res);
+	}
 	kfree(data);
 	mlog_exit(ret);
 }
@@ -1644,7 +1670,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 				/* retry!? */
 				BUG();
 			}
-		}
+		} else /* put.. incase we are not the master */
+			dlm_lockres_put(res);
 		spin_unlock(&res->spinlock);
 	}
 	spin_unlock(&dlm->spinlock);
@@ -1921,6 +1948,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 		     "Recovering res %s:%.*s, is already on recovery list!\n",
 		     dlm->name, res->lockname.len, res->lockname.name);
 		list_del_init(&res->recovering);
+		dlm_lockres_put(res);
 	}
 	/* We need to hold a reference while on the recovery list */
 	dlm_lockres_get(res);
-- 
cgit v1.2.3


From b31cfc0237f89c3a8bc8f31b5da996e71b543214 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Sat, 1 Mar 2008 14:04:22 -0800
Subject: ocfs2/dlm: Add missing dlm_lockres_put()s

dlm_master_request_handler() forgot to put a lockres when
dlm_assert_master_worker() failed or was skipped.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 320081d53f2..ea6b8957786 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1663,7 +1663,12 @@ way_up_top:
 		dlm_put_mle(tmpmle);
 	}
 send_response:
-
+	/*
+	 * __dlm_lookup_lockres() grabbed a reference to this lockres.
+	 * The reference is released by dlm_assert_master_worker() under
+	 * the call to dlm_dispatch_assert_master().  If
+	 * dlm_assert_master_worker() isn't called, we drop it here.
+	 */
 	if (dispatch_assert) {
 		if (response != DLM_MASTER_RESP_YES)
 			mlog(ML_ERROR, "invalid response %d\n", response);
@@ -1678,7 +1683,11 @@ send_response:
 		if (ret < 0) {
 			mlog(ML_ERROR, "failed to dispatch assert master work\n");
 			response = DLM_MASTER_RESP_ERROR;
+			dlm_lockres_put(res);
 		}
+	} else {
+		if (res)
+			dlm_lockres_put(res);
 	}
 
 	dlm_put(dlm);
-- 
cgit v1.2.3


From 535f7026fddafce6d0a0524db01a432c23a0a7b4 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Sat, 1 Mar 2008 14:04:24 -0800
Subject: ocfs2/dlm: Print message showing the recovery master

Knowing the dlm recovery master helps in debugging recovery
issues. This patch prints a message on the recovery master node.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmrecovery.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f9468355242..bcb9260c373 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -519,9 +519,9 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 	return 0;
 
 master_here:
-	mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
-	     task_pid_nr(dlm->dlm_reco_thread_task),
-	     dlm->name, dlm->reco.dead_node, dlm->node_num);
+	mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node "
+	     "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task),
+	     dlm->node_num, dlm->reco.dead_node, dlm->name);
 
 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
 	if (status < 0) {
-- 
cgit v1.2.3


From c824c3c723f2e37a00b3b739a55b28de595fd72e Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Sat, 1 Mar 2008 14:04:25 -0800
Subject: ocfs2/dlm: dlm_thread should not sleep while holding the dlm_spinlock

This patch addresses the bug in which the dlm_thread could go to sleep
while holding the dlm_spinlock.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index cebd089f895..4060bb328bc 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -176,12 +176,14 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 	     res->lockname.name, master);
 
 	if (!master) {
+		/* drop spinlock...  retake below */
+		spin_unlock(&dlm->spinlock);
+
 		spin_lock(&res->spinlock);
 		/* This ensures that clear refmap is sent after the set */
 		__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
 		spin_unlock(&res->spinlock);
-		/* drop spinlock to do messaging, retake below */
-		spin_unlock(&dlm->spinlock);
+
 		/* clear our bit from the master's refmap, ignore errors */
 		ret = dlm_drop_lockres_ref(dlm, res);
 		if (ret < 0) {
-- 
cgit v1.2.3


From cdef59a94c2fc962ada379d4240d556db7b56d55 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 5 Mar 2008 15:49:55 +0800
Subject: ocfs2: Fix NULL pointer dereferences in o2net

In some situations, ocfs2_set_nn_state might get called with sc = NULL and
valid = 0. If sc = NULL, we can't dereference it to get the o2nm_node
member. Instead, do what o2net_initialize_handshake does and use NULL when
calling o2net_reconnect_delay and o2net_idle_timeout.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ee50c9610e7..b8057c51b20 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -451,9 +451,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 		/* delay if we're withing a RECONNECT_DELAY of the
 		 * last attempt */
 		delay = (nn->nn_last_connect_attempt +
-			 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
+			 msecs_to_jiffies(o2net_reconnect_delay(NULL)))
 			- jiffies;
-		if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
+		if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL)))
 			delay = 0;
 		mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
 		queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1552,12 +1552,11 @@ static void o2net_connect_expired(struct work_struct *work)
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
-		struct o2nm_node *node = nn->nn_sc->sc_node;
 		mlog(ML_ERROR, "no connection established with node %u after "
 		     "%u.%u seconds, giving up and returning errors.\n",
 		     o2net_num_from_nn(nn),
-		     o2net_idle_timeout(node) / 1000,
-		     o2net_idle_timeout(node) % 1000);
+		     o2net_idle_timeout(NULL) / 1000,
+		     o2net_idle_timeout(NULL) % 1000);
 
 		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
 	}
-- 
cgit v1.2.3


From b2211a361a4289c83971f89da53fe2eb9e72769d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 11 Mar 2008 18:03:35 -0700
Subject: net: fix build with CONFIG_NET=n

fs/built-in.o:(.rodata+0x1134): undefined reference to `proc_net_inode_operations'
fs/built-in.o:(.rodata+0x1138): undefined reference to `proc_net_operations'

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/base.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index cc43cf0c1fa..3217774d269 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2274,7 +2274,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, task),
 	DIR("fd",         S_IRUSR|S_IXUSR, fd),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
+#ifdef CONFIG_NET
 	DIR("net",        S_IRUGO|S_IXUSR, net),
+#endif
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	ONE("status",     S_IRUGO, pid_status),
-- 
cgit v1.2.3


From fb39380b8d683b55630ba5ba381f4e43e417420e Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <marcelo@kvack.org>
Date: Thu, 13 Mar 2008 12:32:35 -0700
Subject: pagemap: proper read error handling

Fix pagemap_read() error handling by releasing acquired resources and checking
for get_user_pages() partial failure.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6dc0334815f..4206454734e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -640,17 +640,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 
 	ret = -EACCES;
 	if (!ptrace_may_attach(task))
-		goto out;
+		goto out_task;
 
 	ret = -EINVAL;
 	/* file position must be aligned */
 	if (*ppos % PM_ENTRY_BYTES)
-		goto out;
+		goto out_task;
 
 	ret = 0;
 	mm = get_task_mm(task);
 	if (!mm)
-		goto out;
+		goto out_task;
 
 	ret = -ENOMEM;
 	uaddr = (unsigned long)buf & PAGE_MASK;
@@ -658,7 +658,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
 	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
-		goto out_task;
+		goto out_mm;
 
 	down_read(&current->mm->mmap_sem);
 	ret = get_user_pages(current, current->mm, uaddr, pagecount,
@@ -668,6 +668,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (ret < 0)
 		goto out_free;
 
+	if (ret != pagecount) {
+		pagecount = ret;
+		ret = -EFAULT;
+		goto out_pages;
+	}
+
 	pm.out = buf;
 	pm.end = buf + count;
 
@@ -699,15 +705,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 			ret = pm.out - buf;
 	}
 
+out_pages:
 	for (; pagecount; pagecount--) {
 		page = pages[pagecount-1];
 		if (!PageReserved(page))
 			SetPageDirty(page);
 		page_cache_release(page);
 	}
-	mmput(mm);
 out_free:
 	kfree(pages);
+out_mm:
+	mmput(mm);
 out_task:
 	put_task_struct(task);
 out:
-- 
cgit v1.2.3


From b663c6fd98c9cf586279db03cec3257c413efd00 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 14 Mar 2008 19:37:11 -0400
Subject: nfsd: fix oops on access from high-numbered ports

This bug was always here, but before my commit 6fa02839bf9412e18e77
("recheck for secure ports in fh_verify"), it could only be triggered by
failure of a kmalloc().  After that commit it could be triggered by a
client making a request from a non-reserved port for access to an export
marked "secure".  (Exports are "secure" by default.)

The result is a struct svc_export with a reference count one too low,
resulting in likely oopses next time the export is accessed.

The reference counting here is not straightforward; a later patch will
clean up fh_verify().

Thanks to Lukas Hejtmanek for the bug report and followup.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Lukas Hejtmanek <xhejtman@ics.muni.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfsfh.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 1eb771d79cc..3e6b3f41ee1 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -232,6 +232,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 		fhp->fh_dentry = dentry;
 		fhp->fh_export = exp;
 		nfsd_nr_verified++;
+		cache_get(&exp->h);
 	} else {
 		/*
 		 * just rechecking permissions
@@ -241,6 +242,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 		dprintk("nfsd: fh_verify - just checking\n");
 		dentry = fhp->fh_dentry;
 		exp = fhp->fh_export;
+		cache_get(&exp->h);
 		/*
 		 * Set user creds for this exportpoint; necessary even
 		 * in the "just checking" case because this may be a
@@ -252,8 +254,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 		if (error)
 			goto out;
 	}
-	cache_get(&exp->h);
-
 
 	error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
 	if (error)
-- 
cgit v1.2.3


From 3d10a15d6919488204bdb264050d156ced20d9aa Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sun, 16 Mar 2008 22:48:08 +0000
Subject: hfs_bnode_find() can fail, resulting in hfs_bnode_split() breakage

oops and fs corruption; the latter can happen even on valid fs in case of oom.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfs/brec.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 878bf25dbc6..92fb358ce82 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -229,7 +229,7 @@ skip:
 static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 {
 	struct hfs_btree *tree;
-	struct hfs_bnode *node, *new_node;
+	struct hfs_bnode *node, *new_node, *next_node;
 	struct hfs_bnode_desc node_desc;
 	int num_recs, new_rec_off, new_off, old_rec_off;
 	int data_start, data_end, size;
@@ -248,6 +248,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 	new_node->type = node->type;
 	new_node->height = node->height;
 
+	if (node->next)
+		next_node = hfs_bnode_find(tree, node->next);
+	else
+		next_node = NULL;
+
+	if (IS_ERR(next_node)) {
+		hfs_bnode_put(node);
+		hfs_bnode_put(new_node);
+		return next_node;
+	}
+
 	size = tree->node_size / 2 - node->num_recs * 2 - 14;
 	old_rec_off = tree->node_size - 4;
 	num_recs = 1;
@@ -261,6 +272,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 		/* panic? */
 		hfs_bnode_put(node);
 		hfs_bnode_put(new_node);
+		if (next_node)
+			hfs_bnode_put(next_node);
 		return ERR_PTR(-ENOSPC);
 	}
 
@@ -315,8 +328,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 	hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc));
 
 	/* update next bnode header */
-	if (new_node->next) {
-		struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next);
+	if (next_node) {
 		next_node->prev = new_node->this;
 		hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc));
 		node_desc.prev = cpu_to_be32(next_node->prev);
-- 
cgit v1.2.3


From 40044ce0bf2b7e548584d91f108444e83ed5eab3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 17 Mar 2008 21:14:40 +0100
Subject: Revert "unexport bio_{,un}map_user"

Outside users like asmlib uses the mapping functions. API wise, the
export is definitely sane. It's a better idea to keep this export
than to require external users to open-code this piece of code instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 3312fcc3c09..553b5b7960a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1194,6 +1194,8 @@ EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
+EXPORT_SYMBOL(bio_map_user);
+EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
-- 
cgit v1.2.3


From 1e0bd7550ea9cf474b1ad4c6ff5729a507f75fdc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 13 Mar 2008 08:15:31 -0400
Subject: [PATCH] export sessionid alongside the loginuid in procfs

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9a4da0aae02..8a10f6fe24a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1036,6 +1036,26 @@ static const struct file_operations proc_loginuid_operations = {
 	.read		= proc_loginuid_read,
 	.write		= proc_loginuid_write,
 };
+
+static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
+				  size_t count, loff_t *ppos)
+{
+	struct inode * inode = file->f_path.dentry->d_inode;
+	struct task_struct *task = get_proc_task(inode);
+	ssize_t length;
+	char tmpbuf[TMPBUFLEN];
+
+	if (!task)
+		return -ESRCH;
+	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
+				audit_get_sessionid(task));
+	put_task_struct(task);
+	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
+static const struct file_operations proc_sessionid_operations = {
+	.read		= proc_sessionid_read,
+};
 #endif
 
 #ifdef CONFIG_FAULT_INJECTION
@@ -2319,6 +2339,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
+	REG("sessionid",  S_IRUSR, sessionid),
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
@@ -2649,6 +2670,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, loginuid),
+	REG("sessionid",  S_IRUSR, sessionid),
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
-- 
cgit v1.2.3


From f8512ad0da16cbe156f3a7627971cdf0b39c4138 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@citi.umich.edu>
Date: Wed, 19 Mar 2008 11:24:39 -0400
Subject: nfs: don't ignore return value from nfs_pageio_add_request

Ignoring the return value from nfs_pageio_add_request can cause deadlocks.

In read path:
  call nfs_pageio_add_request from readpage_async_filler
  assume at this point that there are requests already in desc, that
    can't be merged with the current request.
  so nfs_pageio_doio is fired up to clear out desc.
  assume something goes wrong in setting up the io, so desc->pg_error is set.
  This causes nfs_pageio_add_request to return 0, *WITHOUT* adding the original
    request.
  BUT, since return code is ignored, readpage_async_filler assumes it has
    been added, and does nothing further, leaving page locked.
  do_generic_mapping_read will eventually call lock_page, resulting in deadlock

In write path:
  page is marked dirty by generic_perform_write
  nfs_writepages is called
  call nfs_pageio_add_request from nfs_page_async_flush
  assume at this point that there are requests already in desc, that
    can't be merged with the current request.
  so nfs_pageio_doio is fired up to clear out desc.
  assume something goes wrong in setting up the io, so desc->pg_error is set.
  This causes nfs_page_async_flush to return 0, *WITHOUT* adding the original
    request, yet marking the request as locked (PG_BUSY) and in writeback,
    clearing dirty marks.
  The next time a write is done to the page, deadlock will result as
    nfs_write_end calls nfs_update_request

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c  | 5 ++++-
 fs/nfs/write.c | 8 +++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3d7d9631e12..5a70be589bb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -533,7 +533,10 @@ readpage_async_filler(void *data, struct page *page)
 
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
-	nfs_pageio_add_request(desc->pgio, new);
+	if (!nfs_pageio_add_request(desc->pgio, new)) {
+		error = desc->pgio->pg_error;
+		goto out_unlock;
+	}
 	return 0;
 out_error:
 	error = PTR_ERR(new);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80c61fdb272..bed63416a55 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,6 +39,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    unsigned int, unsigned int);
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
 				  struct inode *inode, int ioflags);
+static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -288,7 +289,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 		BUG();
 	}
 	spin_unlock(&inode->i_lock);
-	nfs_pageio_add_request(pgio, req);
+	if (!nfs_pageio_add_request(pgio, req)) {
+		nfs_redirty_request(req);
+		nfs_end_page_writeback(page);
+		nfs_clear_page_tag_locked(req);
+		return pgio->pg_error;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3