Merge with /home/shaggy/git/linus-clean/

author: Dave Kleikamp <shaggy@austin.ibm.com> 2005-06-20 08:44:00 -0500
committer: Dave Kleikamp <shaggy@austin.ibm.com> 2005-06-20 08:44:00 -0500
commit: d039ba24f135147f60a13bcaa768189a5b773b6e (patch)
tree: 444b7596ab8312b5954d15c3135052a7c09c6fbe /fs
parent: 72e3148a6e987974e3e949c5668e5ca812d7c818 (diff)
parent: 8b22c249e7de453961e4d253b19fc2a0bdd65d53 (diff)
12 files changed, 201 insertions, 118 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c374be51b04..f8f6b6b7617 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1125,7 +1125,7 @@ static int dump_write(struct file *file, const void *addr, int nr)
 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
 }
 
-static int dump_seek(struct file *file, off_t off)
+static int dump_seek(struct file *file, loff_t off)
 {
 	if (file->f_op->llseek) {
 		if (file->f_op->llseek(file, off, 0) != off)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index f0cd67d9d31..c8998dc6688 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -520,7 +520,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 		DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
 
 		down_write(&current->mm->mmap_sem);
-		textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, 0, 0);
+		textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_SHARED, 0);
 		up_write(&current->mm->mmap_sem);
 		if (!textpos  || textpos >= (unsigned long) -4096) {
 			if (!textpos)
@@ -532,7 +532,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 		down_write(&current->mm->mmap_sem);
 		realdatastart = do_mmap(0, 0, data_len + extra +
 				MAX_SHARED_LIBS * sizeof(unsigned long),
-				PROT_READ|PROT_WRITE|PROT_EXEC, 0, 0);
+				PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
 		up_write(&current->mm->mmap_sem);
 
 		if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
@@ -574,7 +574,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 		down_write(&current->mm->mmap_sem);
 		textpos = do_mmap(0, 0, text_len + data_len + extra +
 					MAX_SHARED_LIBS * sizeof(unsigned long),
-				PROT_READ | PROT_EXEC | PROT_WRITE, 0, 0);
+				PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
 		up_write(&current->mm->mmap_sem);
 		if (!textpos  || textpos >= (unsigned long) -4096) {
 			if (!textpos)
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 95483baab70..dab4774ee7b 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,8 @@ kills the cifsd thread (NB: killing the cifs kernel threads is not
 recommended, unmount and rmmod cifs will kill them when they are
 no longer needed).  Fix readdir to ASCII servers (ie older servers
 which do not support Unicode) and also require asterik.
-
+Fix out of memory case in which data could be written one page
+off in the page cache.
 
 Version 1.33
 ------------
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d00b3bfe1a5..78af5850c55 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -96,5 +96,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.34"
+#define CIFS_VERSION   "1.35"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index dde2d251fc3..30ab70ce554 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1352,6 +1352,8 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
 				      GFP_KERNEL)) {
 			page_cache_release(page);
 			cFYI(1, ("Add page cache failed"));
+			data += PAGE_CACHE_SIZE;
+			bytes_read -= PAGE_CACHE_SIZE;
 			continue;
 		}
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b8b78cbb34c..8d336a90025 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -82,12 +82,12 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		/* get new inode */
 		if (*pinode == NULL) {
 			*pinode = new_inode(sb);
-			if(*pinode == NULL) 
+			if (*pinode == NULL) 
 				return -ENOMEM;
 			/* Is an i_ino of zero legal? */
 			/* Are there sanity checks we can use to ensure that
 			   the server is really filling in that field? */
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 				(*pinode)->i_ino =
 					(unsigned long)findData.UniqueId;
 			} /* note ino incremented to unique num in new_inode */
@@ -134,7 +134,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		inode->i_gid = le64_to_cpu(findData.Gid);
 		inode->i_nlink = le64_to_cpu(findData.Nlinks);
 
-		if(is_size_safe_to_change(cifsInfo)) {
+		if (is_size_safe_to_change(cifsInfo)) {
 		/* can not safely change the file size here if the
 		   client is writing to it due to potential races */
 
@@ -162,7 +162,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		if (S_ISREG(inode->i_mode)) {
 			cFYI(1, (" File inode "));
 			inode->i_op = &cifs_file_inode_ops;
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
 				inode->i_fop = &cifs_file_direct_ops;
 			else
 				inode->i_fop = &cifs_file_ops;
@@ -198,17 +198,17 @@ int cifs_get_inode_info(struct inode **pinode,
 	pTcon = cifs_sb->tcon;
 	cFYI(1,("Getting info on %s ", search_path));
 
-	if((pfindData == NULL) && (*pinode != NULL)) {
-		if(CIFS_I(*pinode)->clientCanCacheRead) {
+	if ((pfindData == NULL) && (*pinode != NULL)) {
+		if (CIFS_I(*pinode)->clientCanCacheRead) {
 			cFYI(1,("No need to revalidate cached inode sizes"));
 			return rc;
 		}
 	}
 
 	/* if file info not passed in then get it from server */
-	if(pfindData == NULL) {
+	if (pfindData == NULL) {
 		buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-		if(buf == NULL)
+		if (buf == NULL)
 			return -ENOMEM;
 		pfindData = (FILE_ALL_INFO *)buf;
 		/* could do find first instead but this returns more info */
@@ -268,7 +268,7 @@ int cifs_get_inode_info(struct inode **pinode,
 			   IndexNumber field is not guaranteed unique? */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL		
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM){
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM){
 				int rc1 = 0;
 				__u64 inode_num;
 
@@ -277,7 +277,7 @@ int cifs_get_inode_info(struct inode **pinode,
 					cifs_sb->local_nls,
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-				if(rc1) {
+				if (rc1) {
 					cFYI(1,("GetSrvInodeNum rc %d", rc1));
 					/* BB EOPNOSUPP disable SERVER_INUM? */
 				} else /* do we need cast or hash to ino? */
@@ -355,7 +355,7 @@ int cifs_get_inode_info(struct inode **pinode,
 		if (S_ISREG(inode->i_mode)) {
 			cFYI(1, (" File inode "));
 			inode->i_op = &cifs_file_inode_ops;
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
 				inode->i_fop = &cifs_file_direct_ops;
 			else
 				inode->i_fop = &cifs_file_ops;
@@ -422,7 +422,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 	if (!rc) {
-		if(direntry->d_inode)
+		if (direntry->d_inode)
 			direntry->d_inode->i_nlink--;
 	} else if (rc == -ENOENT) {
 		d_drop(direntry);
@@ -441,7 +441,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 					      cifs_sb->mnt_cifs_flags & 
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			CIFSSMBClose(xid, pTcon, netfid);
-			if(direntry->d_inode)
+			if (direntry->d_inode)
 				direntry->d_inode->i_nlink--;
 		}
 	} else if (rc == -EACCES) {
@@ -496,7 +496,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 					    cifs_sb->mnt_cifs_flags & 
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			if (!rc) {
-				if(direntry->d_inode)
+				if (direntry->d_inode)
 					direntry->d_inode->i_nlink--;
 			} else if (rc == -ETXTBSY) {
 				int oplock = FALSE;
@@ -517,14 +517,14 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
 						cifs_sb->mnt_cifs_flags &
 						    CIFS_MOUNT_MAP_SPECIAL_CHR);
 					CIFSSMBClose(xid, pTcon, netfid);
-					if(direntry->d_inode)
+					if (direntry->d_inode)
 			                        direntry->d_inode->i_nlink--;
 				}
 			/* BB if rc = -ETXTBUSY goto the rename logic BB */
 			}
 		}
 	}
-	if(direntry->d_inode) {
+	if (direntry->d_inode) {
 		cifsInode = CIFS_I(direntry->d_inode);
 		cifsInode->time = 0;	/* will force revalidate to get info
 					   when needed */
@@ -582,7 +582,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 		if (direntry->d_inode)
 			direntry->d_inode->i_nlink = 2;
 		if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 				CIFSSMBUnixSetPerms(xid, pTcon, full_path,
 						    mode,
 						    (__u64)current->euid,
diff --git a/fs/exec.c b/fs/exec.c
index e56ee243702..3a4b35a14c0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -649,6 +649,7 @@ static inline int de_thread(struct task_struct *tsk)
 	}
 	sig->group_exit_task = NULL;
 	sig->notify_count = 0;
+	sig->real_timer.data = (unsigned long)current;
 	spin_unlock_irq(lock);
 
 	/*
@@ -675,10 +676,8 @@ static inline int de_thread(struct task_struct *tsk)
 		proc_dentry2 = proc_pid_unhash(leader);
 		write_lock_irq(&tasklist_lock);
 
-		if (leader->tgid != current->tgid)
-			BUG();
-		if (current->pid == current->tgid)
-			BUG();
+		BUG_ON(leader->tgid != current->tgid);
+		BUG_ON(current->pid == current->tgid);
 		/*
 		 * An exec() starts a new thread group with the
 		 * TGID of the previous thread group. Rehash the
@@ -726,8 +725,7 @@ static inline int de_thread(struct task_struct *tsk)
 		proc_pid_flush(proc_dentry1);
 		proc_pid_flush(proc_dentry2);
 
-		if (exit_state != EXIT_ZOMBIE)
-			BUG();
+		BUG_ON(exit_state != EXIT_ZOMBIE);
 		release_task(leader);
         }
 
@@ -772,10 +770,8 @@ no_thread_group:
 			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
-	if (!thread_group_empty(current))
-		BUG();
-	if (!thread_group_leader(current))
-		BUG();
+	BUG_ON(!thread_group_empty(current));
+	BUG_ON(!thread_group_leader(current));
 	return 0;
 }
 	
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 98d830401c5..5a97e346bd9 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -188,7 +188,6 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
 		} else {
 			jbd_unlock_bh_state(bh);
 		}
-		jh = next_jh;
 	} while (jh != last_jh);
 
 	return ret;
@@ -339,8 +338,10 @@ int log_do_checkpoint(journal_t *journal)
 			}
 		} while (jh != last_jh && !retry);
 
-		if (batch_count)
+		if (batch_count) {
 			__flush_batch(journal, bhs, &batch_count);
+			retry = 1;
+		}
 
 		/*
 		 * If someone cleaned up this transaction while we slept, we're
diff --git a/fs/mpage.c b/fs/mpage.c
index b92c0e64aef..bb9aebe9386 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -79,8 +79,11 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
 
-		if (!uptodate)
+		if (!uptodate){
 			SetPageError(page);
+			if (page->mapping)
+				set_bit(AS_EIO, &page->mapping->flags);
+		}
 		end_page_writeback(page);
 	} while (bvec >= bio->bi_io_vec);
 	bio_put(bio);
diff --git a/fs/namei.c b/fs/namei.c
index dd78f01b6de..a7f7f44119b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -493,12 +493,21 @@ fail:
 	return PTR_ERR(link);
 }
 
-static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd)
+struct path {
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+};
+
+static inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int error;
+	struct dentry *dentry = path->dentry;
 
-	touch_atime(nd->mnt, dentry);
+	touch_atime(path->mnt, dentry);
 	nd_set_link(nd, NULL);
+
+	if (path->mnt == nd->mnt)
+		mntget(path->mnt);
 	error = dentry->d_inode->i_op->follow_link(dentry, nd);
 	if (!error) {
 		char *s = nd_get_link(nd);
@@ -507,6 +516,8 @@ static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd)
 		if (dentry->d_inode->i_op->put_link)
 			dentry->d_inode->i_op->put_link(dentry, nd);
 	}
+	dput(dentry);
+	mntput(path->mnt);
 
 	return error;
 }
@@ -518,7 +529,7 @@ static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd)
  * Without that kind of total limit, nasty chains of consecutive
  * symlinks can cause almost arbitrarily long lookups. 
  */
-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
+static inline int do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int err = -ELOOP;
 	if (current->link_count >= MAX_NESTED_LINKS)
@@ -527,17 +538,20 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto loop;
 	BUG_ON(nd->depth >= MAX_NESTED_LINKS);
 	cond_resched();
-	err = security_inode_follow_link(dentry, nd);
+	err = security_inode_follow_link(path->dentry, nd);
 	if (err)
 		goto loop;
 	current->link_count++;
 	current->total_link_count++;
 	nd->depth++;
-	err = __do_follow_link(dentry, nd);
+	err = __do_follow_link(path, nd);
 	current->link_count--;
 	nd->depth--;
 	return err;
 loop:
+	dput(path->dentry);
+	if (path->mnt != nd->mnt)
+		mntput(path->mnt);
 	path_release(nd);
 	return err;
 }
@@ -565,87 +579,91 @@ int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static int __follow_mount(struct path *path)
 {
 	int res = 0;
+	while (d_mountpoint(path->dentry)) {
+		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		if (!mounted)
+			break;
+		dput(path->dentry);
+		if (res)
+			mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
+		res = 1;
+	}
+	return res;
+}
+
+static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+{
 	while (d_mountpoint(*dentry)) {
 		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
 		if (!mounted)
 			break;
+		dput(*dentry);
 		mntput(*mnt);
 		*mnt = mounted;
-		dput(*dentry);
 		*dentry = dget(mounted->mnt_root);
-		res = 1;
 	}
-	return res;
 }
 
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 {
 	struct vfsmount *mounted;
 
 	mounted = lookup_mnt(*mnt, *dentry);
 	if (mounted) {
+		dput(*dentry);
 		mntput(*mnt);
 		*mnt = mounted;
-		dput(*dentry);
 		*dentry = dget(mounted->mnt_root);
 		return 1;
 	}
 	return 0;
 }
 
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
-{
-	return __follow_down(mnt,dentry);
-}
- 
-static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
+static inline void follow_dotdot(struct nameidata *nd)
 {
 	while(1) {
 		struct vfsmount *parent;
-		struct dentry *old = *dentry;
+		struct dentry *old = nd->dentry;
 
                 read_lock(&current->fs->lock);
-		if (*dentry == current->fs->root &&
-		    *mnt == current->fs->rootmnt) {
+		if (nd->dentry == current->fs->root &&
+		    nd->mnt == current->fs->rootmnt) {
                         read_unlock(&current->fs->lock);
 			break;
 		}
                 read_unlock(&current->fs->lock);
 		spin_lock(&dcache_lock);
-		if (*dentry != (*mnt)->mnt_root) {
-			*dentry = dget((*dentry)->d_parent);
+		if (nd->dentry != nd->mnt->mnt_root) {
+			nd->dentry = dget(nd->dentry->d_parent);
 			spin_unlock(&dcache_lock);
 			dput(old);
 			break;
 		}
 		spin_unlock(&dcache_lock);
 		spin_lock(&vfsmount_lock);
-		parent = (*mnt)->mnt_parent;
-		if (parent == *mnt) {
+		parent = nd->mnt->mnt_parent;
+		if (parent == nd->mnt) {
 			spin_unlock(&vfsmount_lock);
 			break;
 		}
 		mntget(parent);
-		*dentry = dget((*mnt)->mnt_mountpoint);
+		nd->dentry = dget(nd->mnt->mnt_mountpoint);
 		spin_unlock(&vfsmount_lock);
 		dput(old);
-		mntput(*mnt);
-		*mnt = parent;
+		mntput(nd->mnt);
+		nd->mnt = parent;
 	}
-	follow_mount(mnt, dentry);
+	follow_mount(&nd->mnt, &nd->dentry);
 }
 
-struct path {
-	struct vfsmount *mnt;
-	struct dentry *dentry;
-};
-
 /*
  *  It's more convoluted than I'd like it to be, but... it's still fairly
  *  small and for now I'd prefer to have fast path as straight as possible.
@@ -664,6 +682,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 done:
 	path->mnt = mnt;
 	path->dentry = dentry;
+	__follow_mount(path);
 	return 0;
 
 need_lookup:
@@ -751,7 +770,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(&nd->mnt, &nd->dentry);
+				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -771,8 +790,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 		err = do_lookup(nd, &this, &next);
 		if (err)
 			break;
-		/* Check mountpoints.. */
-		follow_mount(&next.mnt, &next.dentry);
 
 		err = -ENOENT;
 		inode = next.dentry->d_inode;
@@ -783,10 +800,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
-			mntget(next.mnt);
-			err = do_follow_link(next.dentry, nd);
-			dput(next.dentry);
-			mntput(next.mnt);
+			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
 			err = -ENOENT;
@@ -798,6 +812,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 				break;
 		} else {
 			dput(nd->dentry);
+			if (nd->mnt != next.mnt)
+				mntput(nd->mnt);
 			nd->mnt = next.mnt;
 			nd->dentry = next.dentry;
 		}
@@ -819,7 +835,7 @@ last_component:
 			case 2:	
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(&nd->mnt, &nd->dentry);
+				follow_dotdot(nd);
 				inode = nd->dentry->d_inode;
 				/* fallthrough */
 			case 1:
@@ -833,19 +849,17 @@ last_component:
 		err = do_lookup(nd, &this, &next);
 		if (err)
 			break;
-		follow_mount(&next.mnt, &next.dentry);
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
 		    && inode && inode->i_op && inode->i_op->follow_link) {
-			mntget(next.mnt);
-			err = do_follow_link(next.dentry, nd);
-			dput(next.dentry);
-			mntput(next.mnt);
+			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
 			inode = nd->dentry->d_inode;
 		} else {
 			dput(nd->dentry);
+			if (nd->mnt != next.mnt)
+				mntput(nd->mnt);
 			nd->mnt = next.mnt;
 			nd->dentry = next.dentry;
 		}
@@ -885,6 +899,8 @@ return_base:
 		return 0;
 out_dput:
 		dput(next.dentry);
+		if (nd->mnt != next.mnt)
+			mntput(next.mnt);
 		break;
 	}
 	path_release(nd);
@@ -1398,7 +1414,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 {
 	int acc_mode, error = 0;
-	struct dentry *dentry;
+	struct path path;
 	struct dentry *dir;
 	int count = 0;
 
@@ -1442,23 +1458,24 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 	dir = nd->dentry;
 	nd->flags &= ~LOOKUP_PARENT;
 	down(&dir->d_inode->i_sem);
-	dentry = __lookup_hash(&nd->last, nd->dentry, nd);
+	path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
+	path.mnt = nd->mnt;
 
 do_last:
-	error = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
+	error = PTR_ERR(path.dentry);
+	if (IS_ERR(path.dentry)) {
 		up(&dir->d_inode->i_sem);
 		goto exit;
 	}
 
 	/* Negative dentry, just create the file */
-	if (!dentry->d_inode) {
+	if (!path.dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current->fs->umask;
-		error = vfs_create(dir->d_inode, dentry, mode, nd);
+		error = vfs_create(dir->d_inode, path.dentry, mode, nd);
 		up(&dir->d_inode->i_sem);
 		dput(nd->dentry);
-		nd->dentry = dentry;
+		nd->dentry = path.dentry;
 		if (error)
 			goto exit;
 		/* Don't check for write permission, don't truncate */
@@ -1476,22 +1493,24 @@ do_last:
 	if (flag & O_EXCL)
 		goto exit_dput;
 
-	if (d_mountpoint(dentry)) {
+	if (__follow_mount(&path)) {
 		error = -ELOOP;
 		if (flag & O_NOFOLLOW)
 			goto exit_dput;
-		while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
 	}
 	error = -ENOENT;
-	if (!dentry->d_inode)
+	if (!path.dentry->d_inode)
 		goto exit_dput;
-	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
+	if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
 		goto do_link;
 
 	dput(nd->dentry);
-	nd->dentry = dentry;
+	nd->dentry = path.dentry;
+	if (nd->mnt != path.mnt)
+		mntput(nd->mnt);
+	nd->mnt = path.mnt;
 	error = -EISDIR;
-	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
 		goto exit;
 ok:
 	error = may_open(nd, acc_mode, flag);
@@ -1500,7 +1519,9 @@ ok:
 	return 0;
 
 exit_dput:
-	dput(dentry);
+	dput(path.dentry);
+	if (nd->mnt != path.mnt)
+		mntput(path.mnt);
 exit:
 	path_release(nd);
 	return error;
@@ -1520,18 +1541,15 @@ do_link:
 	 * are done. Procfs-like symlinks just set LAST_BIND.
 	 */
 	nd->flags |= LOOKUP_PARENT;
-	error = security_inode_follow_link(dentry, nd);
+	error = security_inode_follow_link(path.dentry, nd);
 	if (error)
 		goto exit_dput;
-	error = __do_follow_link(dentry, nd);
-	dput(dentry);
+	error = __do_follow_link(&path, nd);
 	if (error)
 		return error;
 	nd->flags &= ~LOOKUP_PARENT;
-	if (nd->last_type == LAST_BIND) {
-		dentry = nd->dentry;
+	if (nd->last_type == LAST_BIND)
 		goto ok;
-	}
 	error = -EISDIR;
 	if (nd->last_type != LAST_NORM)
 		goto exit;
@@ -1546,7 +1564,8 @@ do_link:
 	}
 	dir = nd->dentry;
 	down(&dir->d_inode->i_sem);
-	dentry = __lookup_hash(&nd->last, nd->dentry, nd);
+	path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
+	path.mnt = nd->mnt;
 	putname(nd->last.name);
 	goto do_last;
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 73f96acd5d3..ff6155f5e8d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -528,19 +528,39 @@ static inline void nfs_renew_times(struct dentry * dentry)
 	dentry->d_time = jiffies;
 }
 
+/*
+ * Return the intent data that applies to this particular path component
+ *
+ * Note that the current set of intents only apply to the very last
+ * component of the path.
+ * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
+ */
+static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask)
+{
+	if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
+		return 0;
+	return nd->flags & mask;
+}
+
+/*
+ * Inode and filehandle revalidation for lookups.
+ *
+ * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
+ * or if the intent information indicates that we're about to open this
+ * particular file and the "nocto" mount flag is not set.
+ *
+ */
 static inline
 int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 
 	if (nd != NULL) {
-		int ndflags = nd->flags;
 		/* VFS wants an on-the-wire revalidation */
-		if (ndflags & LOOKUP_REVAL)
+		if (nd->flags & LOOKUP_REVAL)
 			goto out_force;
 		/* This is an open(2) */
-		if ((ndflags & LOOKUP_OPEN) &&
-				!(ndflags & LOOKUP_CONTINUE) &&
+		if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
 				!(server->flags & NFS_MOUNT_NOCTO))
 			goto out_force;
 	}
@@ -560,12 +580,8 @@ static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 		       struct nameidata *nd)
 {
-	int ndflags = 0;
-
-	if (nd)
-		ndflags = nd->flags;
 	/* Don't revalidate a negative dentry if we're creating a new file */
-	if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE))
+	if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
 		return 0;
 	return !nfs_check_verifier(dir, dentry);
 }
@@ -700,12 +716,16 @@ struct dentry_operations nfs_dentry_operations = {
 	.d_iput		= nfs_dentry_iput,
 };
 
+/*
+ * Use intent information to check whether or not we're going to do
+ * an O_EXCL create using this path component.
+ */
 static inline
 int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 {
 	if (NFS_PROTO(dir)->version == 2)
 		return 0;
-	if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+	if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
 		return 0;
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
@@ -772,12 +792,13 @@ struct dentry_operations nfs4_dentry_operations = {
 	.d_iput		= nfs_dentry_iput,
 };
 
+/*
+ * Use intent information to determine whether we need to substitute
+ * the NFSv4-style stateful OPEN for the LOOKUP call
+ */
 static int is_atomic_open(struct inode *dir, struct nameidata *nd)
 {
-	if (!nd)
-		return 0;
-	/* Check that we are indeed trying to open this file */
-	if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN))
+	if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
 		return 0;
 	/* NFS does not (yet) have a stateful open for directories */
 	if (nd->flags & LOOKUP_DIRECTORY)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f06eee6dcff..55c90759249 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -37,6 +37,7 @@
 
 static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
+static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
 static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
 static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
@@ -48,7 +49,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 
 struct file_operations nfs_file_operations = {
-	.llseek		= remote_llseek,
+	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read		= nfs_file_read,
@@ -114,6 +115,45 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
+/**
+ * nfs_revalidate_size - Revalidate the file size
+ * @inode - pointer to inode struct
+ * @file - pointer to struct file
+ *
+ * Revalidates the file length. This is basically a wrapper around
+ * nfs_revalidate_inode() that takes into account the fact that we may
+ * have cached writes (in which case we don't care about the server's
+ * idea of what the file length is), or O_DIRECT (in which case we
+ * shouldn't trust the cache).
+ */
+static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (server->flags & NFS_MOUNT_NOAC)
+		goto force_reval;
+	if (filp->f_flags & O_DIRECT)
+		goto force_reval;
+	if (nfsi->npages != 0)
+		return 0;
+	return nfs_revalidate_inode(server, inode);
+force_reval:
+	return __nfs_revalidate_inode(server, inode);
+}
+
+static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
+{
+	/* origin == SEEK_END => we must revalidate the cached file length */
+	if (origin == 2) {
+		struct inode *inode = filp->f_mapping->host;
+		int retval = nfs_revalidate_file_size(inode, filp);
+		if (retval < 0)
+			return (loff_t)retval;
+	}
+	return remote_llseek(filp, offset, origin);
+}
+
 /*
  * Flush all dirty pages, and check for write errors.
  *
author	Dave Kleikamp <shaggy@austin.ibm.com>	2005-06-20 08:44:00 -0500
committer	Dave Kleikamp <shaggy@austin.ibm.com>	2005-06-20 08:44:00 -0500
commit	d039ba24f135147f60a13bcaa768189a5b773b6e (patch)
tree	444b7596ab8312b5954d15c3135052a7c09c6fbe /fs
parent	72e3148a6e987974e3e949c5668e5ca812d7c818 (diff)
parent	8b22c249e7de453961e4d253b19fc2a0bdd65d53 (diff)