From cbc3d65ebcb0c494183d45cf202a53352cbf3871 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 27 Jul 2005 09:17:57 -0500 Subject: JFS: Improve sync barrier processing Under heavy load, hot metadata pages are often locked by non-committed transactions, making them difficult to flush to disk. This prevents the sync point from advancing past a transaction that had modified the page. There is a point during the sync barrier processing where all outstanding transactions have been committed to disk, but no new transaction have been allowed to proceed. This is the best time to write the metadata. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_logmgr.c | 36 +++++++++++++++++++----------------- fs/jfs/jfs_logmgr.h | 2 +- fs/jfs/jfs_txnmgr.c | 10 +++++----- fs/jfs/super.c | 2 +- 4 files changed, 26 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 22815e88e7c..d27bac6acaa 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -191,7 +191,7 @@ static int lbmIOWait(struct lbuf * bp, int flag); static bio_end_io_t lbmIODone; static void lbmStartIO(struct lbuf * bp); static void lmGCwrite(struct jfs_log * log, int cant_block); -static int lmLogSync(struct jfs_log * log, int nosyncwait); +static int lmLogSync(struct jfs_log * log, int hard_sync); @@ -915,19 +915,17 @@ static void lmPostGC(struct lbuf * bp) * if new sync address is available * (normally the case if sync() is executed by back-ground * process). - * if not, explicitly run jfs_blogsync() to initiate - * getting of new sync address. * calculate new value of i_nextsync which determines when * this code is called again. * * PARAMETERS: log - log structure - * nosyncwait - 1 if called asynchronously + * hard_sync - 1 to force all metadata to be written * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit */ -static int lmLogSync(struct jfs_log * log, int nosyncwait) +static int lmLogSync(struct jfs_log * log, int hard_sync) { int logsize; int written; /* written since last syncpt */ @@ -941,11 +939,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) unsigned long flags; /* push dirty metapages out to disk */ - list_for_each_entry(sbi, &log->sb_list, log_list) { - filemap_flush(sbi->ipbmap->i_mapping); - filemap_flush(sbi->ipimap->i_mapping); - filemap_flush(sbi->direct_inode->i_mapping); - } + if (hard_sync) + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_fdatawrite(sbi->ipbmap->i_mapping); + filemap_fdatawrite(sbi->ipimap->i_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + } + else + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_flush(sbi->ipbmap->i_mapping); + filemap_flush(sbi->ipimap->i_mapping); + filemap_flush(sbi->direct_inode->i_mapping); + } /* * forward syncpt @@ -1021,10 +1026,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) /* next syncpt trigger = written + more */ log->nextsync = written + more; - /* return if lmLogSync() from outside of transaction, e.g., sync() */ - if (nosyncwait) - return lsn; - /* if number of bytes written from last sync point is more * than 1/4 of the log size, stop new transactions from * starting until all current transactions are completed @@ -1049,11 +1050,12 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) * * FUNCTION: write log SYNCPT record for specified log * - * PARAMETERS: log - log structure + * PARAMETERS: log - log structure + * hard_sync - set to 1 to force metadata to be written */ -void jfs_syncpt(struct jfs_log *log) +void jfs_syncpt(struct jfs_log *log, int hard_sync) { LOG_LOCK(log); - lmLogSync(log, 1); + lmLogSync(log, hard_sync); LOG_UNLOCK(log); } diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 747114cd38b..e4978b5b65e 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -510,6 +510,6 @@ extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); extern int lmGroupCommit(struct jfs_log *, struct tblock *); extern int jfsIOWait(void *); extern void jfs_flush_journal(struct jfs_log * log, int wait); -extern void jfs_syncpt(struct jfs_log *log); +extern void jfs_syncpt(struct jfs_log *log, int hard_sync); #endif /* _H_JFS_LOGMGR */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 121c981ff45..3555acff12d 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -552,6 +552,11 @@ void txEnd(tid_t tid) * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { + TXN_UNLOCK(); + + /* write dirty metadata & forward log syncpt */ + jfs_syncpt(log, 1); + jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ @@ -560,11 +565,6 @@ void txEnd(tid_t tid) /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); - TXN_UNLOCK(); - - /* forward log syncpt */ - jfs_syncpt(log); - goto wakeup; } } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index ee32211288c..c2abdaee0c0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -531,7 +531,7 @@ static int jfs_sync_fs(struct super_block *sb, int wait) /* log == NULL indicates read-only mount */ if (log) { jfs_flush_journal(log, wait); - jfs_syncpt(log); + jfs_syncpt(log, 0); } return 0; -- cgit v1.2.3 From 30db1ae8640d3527ca7ac8df4bcbf14ccc6ae9cd Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 1 Aug 2005 16:54:26 -0500 Subject: JFS: Check for invalid inodes in jfs_delete_inode Some error paths may iput an invalid inode with i_nlink=0. jfs should not try to actually delete such an inode. Signed-off-by: Dave Kleikamp --- fs/jfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 2137138c59b..767c7ecb429 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -128,6 +128,10 @@ void jfs_delete_inode(struct inode *inode) { jfs_info("In jfs_delete_inode, inode = 0x%p", inode); + if (is_bad_inode(inode) || + (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I))) + return; + if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); -- cgit v1.2.3 From 8a9cd6d676728792aaee31f30015d284acd154a3 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 10 Aug 2005 11:14:39 -0500 Subject: JFS: Fix race in txLock TxAnchor.anon_list is protected by jfsTxnLock (TXN_LOCK), but there was a place in txLock() that was removing an entry from the list without holding the spinlock. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_txnmgr.c | 2 ++ fs/jfs/super.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 3555acff12d..c7a92f9deb2 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -657,7 +657,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, /* only anonymous txn. * Remove from anon_list */ + TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); + TXN_UNLOCK(); } jfs_ip->atlhead = tlck->next; } else { diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c2abdaee0c0..9ff89720f93 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -114,6 +114,8 @@ static void jfs_destroy_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); + BUG_ON(!list_empty(&ji->anon_inode_list)); + spin_lock_irq(&ji->ag_lock); if (ji->active_ag != -1) { struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; -- cgit v1.2.3 From 0bf955ce98cb3cf40e20d0cc435299eb76e8819e Mon Sep 17 00:00:00 2001 From: Robert Love Date: Mon, 15 Aug 2005 12:27:54 -0400 Subject: [PATCH] inotify: fix idr_get_new_above usage We are saving the wrong thing in ->last_wd. We want the wd, not the return value. Signed-off-by: Robert Love Signed-off-by: Linus Torvalds --- fs/inotify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index 27ebcac5e07..868901b1e77 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -402,7 +402,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev, return ERR_PTR(ret); } - dev->last_wd = ret; + dev->last_wd = watch->wd; watch->mask = mask; atomic_set(&watch->count, 0); INIT_LIST_HEAD(&watch->d_list); -- cgit v1.2.3 From 89204c40a03346cd951e698d854105db4cfedc28 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Mon, 15 Aug 2005 12:13:28 -0400 Subject: [PATCH] inotify: add MOVE_SELF event This adds a MOVE_SELF event to inotify. It is sent whenever the inode you are watching is moved. We need this event so that we can catch something like this: - app1: watch /etc/mtab - app2: cp /etc/mtab /tmp/mtab-work mv /etc/mtab /etc/mtab~ mv /tmp/mtab-work /etc/mtab app1 still thinks it's watching /etc/mtab but it's actually watching /etc/mtab~. Signed-off-by: John McCutchan Signed-off-by: Robert Love Signed-off-by: Linus Torvalds --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 57046d98a74..b85f158aef0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2216,7 +2216,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); if (!error) { const char *new_name = old_dentry->d_name.name; - fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, new_dentry->d_inode); + fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, + new_dentry->d_inode, old_dentry->d_inode); } fsnotify_oldname_free(old_name); -- cgit v1.2.3 From e74589ac250e463973361774a90fee2c9d71da02 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 16 Aug 2005 16:38:28 +0100 Subject: =?UTF-8?q?NTFS:=20Fix=20bug=20in=20mft=20record=20writing=20where?= =?UTF-8?q?=20we=20forgot=20to=20set=20the=20device=20in=20=20=20=20=20=20?= =?UTF-8?q?=20the=20buffers=20when=20mapping=20them=20after=20the=20VM=20h?= =?UTF-8?q?ad=20discarded=20them.=20=20=20=20=20=20=20Thanks=20to=20Martin?= =?UTF-8?q?=20MOKREJ=C5=A0=20for=20the=20bug=20report.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 3 +++ fs/ntfs/mft.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 9709fac6531..21e21fe519e 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -174,6 +174,9 @@ ToDo/Notes: fact that the vfs and ntfs inodes are one struct in memory to find the ntfs inode in memory if present. Also, the ntfs inode has its own locking so it does not matter if the vfs inode is locked. + - Fix bug in mft record writing where we forgot to set the device in + the buffers when mapping them after the VM had discarded them + Thanks to Martin MOKREJŠ for the bug report. 2.1.22 - Many bug and race fixes and error handling improvements. diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index ac9ff39aa83..317f7c679fd 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -533,6 +533,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, LCN lcn; unsigned int vcn_ofs; + bh->b_bdev = vol->sb->s_bdev; /* Obtain the vcn and offset of the current block. */ vcn = ((VCN)mft_no << vol->mft_record_size_bits) + (block_start - m_start); @@ -725,6 +726,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) LCN lcn; unsigned int vcn_ofs; + bh->b_bdev = vol->sb->s_bdev; /* Obtain the vcn and offset of the current block. */ vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + (block_start - m_start); -- cgit v1.2.3 From 58fcb8df0bf663bb6b8f46cd3010bfe8d13d97cf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Aug 2005 18:15:12 -0400 Subject: [PATCH] NFS: Ensure ACL xdr code doesn't overflow. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs_common/nfsacl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 18c58c32e32..251e5a1bb1c 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -239,6 +239,7 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, if (xdr_decode_word(buf, base, &entries) || entries > NFS_ACL_MAX_ENTRIES) return -EINVAL; + nfsacl_desc.desc.array_maxlen = entries; err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); if (err) return err; -- cgit v1.2.3 From 65e4308d2500e7daf60c3dccc202c61ffb066c63 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2005 11:49:44 -0400 Subject: [PATCH] NFS: Ensure we always update inode->i_mode when doing O_EXCL creates When the client performs an exclusive create and opens the file for writing, a Netapp filer will first create the file using the mode 01777. It does this since an NFSv3/v4 exclusive create cannot immediately set the mode bits. The 01777 mode then gets put into the inode->i_mode. After the file creation is successful, we then do a setattr to change the mode to the correct value (as per the NFS spec). The problem is that nfs_refresh_inode() no longer updates inode->i_mode, so the latter retains the 01777 mode. A bit later, the VFS notices this, and calls remove_suid(). This of course now resets the file mode to inode->i_mode & 0777. Hey presto, the file mode on the server is now magically changed to 0777. Duh... Fixes http://bugzilla.linux-nfs.org/show_bug.cgi?id=32 Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 37 ++++++++++++++++++++++++------------- fs/nfs/nfs3proc.c | 4 ++++ fs/nfs/nfs4proc.c | 10 ++++++++-- fs/nfs/proc.c | 2 ++ 4 files changed, 38 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4845911f1c6..bb7ca022bcb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -814,28 +814,39 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_wb_all(inode); } error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error == 0) { + if (error == 0) nfs_refresh_inode(inode, &fattr); + nfs_end_data_update(inode); + unlock_kernel(); + return error; +} + +/** + * nfs_setattr_update_inode - Update inode metadata after a setattr call. + * @inode: pointer to struct inode + * @attr: pointer to struct iattr + * + * Note: we do this in the *proc.c in order to ensure that + * it works for things like exclusive creates too. + */ +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +{ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) { - int mode; - mode = inode->i_mode & ~S_IALLUGO; - mode |= attr->ia_mode & S_IALLUGO; + int mode = attr->ia_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if ((attr->ia_valid & ATTR_UID) != 0) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - if ((attr->ia_valid & ATTR_SIZE) != 0) { - inode->i_size = attr->ia_size; - vmtruncate(inode, attr->ia_size); - } - } - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfs_end_data_update(inode); - unlock_kernel(); - return error; + } + if ((attr->ia_valid & ATTR_SIZE) != 0) { + inode->i_size = attr->ia_size; + vmtruncate(inode, attr->ia_size); + } } /* diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7851569b31c..2681485cf2d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -120,6 +120,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -370,6 +372,8 @@ again: * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); + if (status == 0) + nfs_setattr_update_inode(dentry->d_inode, sattr); nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1b76f80aedb..0c5a308e496 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -753,6 +753,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, .rpc_argp = &arg, .rpc_resp = &res, }; + int status; fattr->valid = 0; @@ -762,7 +763,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, 0); + return status; } static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, @@ -1145,6 +1147,8 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); if (state != NULL) nfs4_close_state(state, FMODE_WRITE); put_rpccred(cred); @@ -1449,8 +1453,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) + if (status == 0) { + nfs_setattr_update_inode(state->inode, sattr); goto out; + } } else if (flags != 0) goto out; nfs4_close_state(state, flags); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index cedf636bcf3..be23c3fb926 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -114,6 +114,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); return status; } -- cgit v1.2.3 From 481d0374217f3fefaf98efbd8d21d73c138dd928 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 16 Aug 2005 19:42:56 +0100 Subject: NTFS: Complete the previous fix for the unset device when mapping buffers for mft record writing. I had missed the writepage based mft record write code path. Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 2 +- fs/ntfs/aops.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 21e21fe519e..9eecc9939df 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -175,7 +175,7 @@ ToDo/Notes: the ntfs inode in memory if present. Also, the ntfs inode has its own locking so it does not matter if the vfs inode is locked. - Fix bug in mft record writing where we forgot to set the device in - the buffers when mapping them after the VM had discarded them + the buffers when mapping them after the VM had discarded them. Thanks to Martin MOKREJŠ for the bug report. 2.1.22 - Many bug and race fixes and error handling improvements. diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 3f43bfe6184..78adad7a988 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -924,6 +924,7 @@ static int ntfs_write_mst_block(struct page *page, LCN lcn; unsigned int vcn_ofs; + bh->b_bdev = vol->sb->s_bdev; /* Obtain the vcn and offset of the current block. */ vcn = (VCN)block << bh_size_bits; vcn_ofs = vcn & vol->cluster_size_mask; -- cgit v1.2.3 From c4f92dba97f4e3aa757500896f87001569f4604b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 Aug 2005 14:25:23 -0400 Subject: [PATCH] nfsd to unlock kernel before exiting The nfsd holds the big kernel lock upon exit, when it really shouldn't. Not to mention that this breaks Ingo's RT patch. This is a trivial fix to release the lock. Ingo, this patch also works with your kernel, and stops the problem with nfsd. Note, there's a "goto out;" where "out:" is right above svc_exit_thread. The point of the goto also holds the kernel_lock, so I don't see any problem here in releasing it. Signed-off-by: Steven Rostedt Signed-off-by: Linus Torvalds --- fs/nfsd/nfssvc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07b9a065e9d..1697539a717 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -287,6 +287,7 @@ out: svc_exit_thread(rqstp); /* Release module */ + unlock_kernel(); module_put_and_exit(0); } -- cgit v1.2.3 From 5529680981807b44abf3be30fb6d612ff04f68ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:09 -0700 Subject: [PATCH] NFS: split nfsi->flags into two fields Certain bits in nfsi->flags can be manipulated with atomic bitops, and some are better manipulated via logical bitmask operations. This patch splits the flags field into two. The next patch introduces atomic bitops for one of the fields. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 16 ++++++++------- fs/nfs/file.c | 5 +++-- fs/nfs/inode.c | 61 +++++++++++++++++++++++++++++--------------------------- fs/nfs/nfs3acl.c | 2 +- fs/nfs/read.c | 4 ++-- 5 files changed, 47 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b38a57e78a6..5732e13cd0d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +462,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -608,7 +608,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 || nfs_attribute_timeout(dir)) return 0; return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); @@ -1575,11 +1575,12 @@ out: int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != cred || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) + || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) return -ENOENT; memcpy(res, cache, sizeof(*res)); return 0; @@ -1587,14 +1588,15 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != set->cred) { if (cache->cred) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } - NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5621ba9885f..f6b9eda925c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -134,9 +134,10 @@ nfs_file_release(struct inode *inode, struct file *filp) */ static int nfs_revalidate_file(struct inode *inode, struct file *filp) { + struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; @@ -164,7 +165,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bb7ca022bcb..62218455351 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,9 +620,9 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +632,7 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); - NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; } /* @@ -841,7 +841,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -872,8 +872,7 @@ nfs_wait_on_inode(struct inode *inode, int flag) int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; if (__IS_FLG(inode, MS_NOATIME)) @@ -1019,7 +1018,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) struct nfs_fattr fattr; struct nfs_inode *nfsi = NFS_I(inode); unsigned long verifier; - unsigned int flags; + unsigned long cache_validity; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1036,7 +1035,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out_nowait; if (NFS_ATTRTIMEO(inode) == 0) continue; - if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) continue; status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -1065,18 +1064,21 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } - flags = nfsi->flags; - nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; + cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). */ if (verifier == nfsi->cache_change_attribute) - nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); - /* Do the page cache invalidation */ + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + nfs_revalidate_mapping(inode, inode->i_mapping); - if (flags & NFS_INO_INVALID_ACL) + + if (cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1107,7 +1109,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); @@ -1122,14 +1124,14 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { if (S_ISREG(inode->i_mode)) { if (filemap_fdatawrite(mapping) == 0) filemap_fdatawait(mapping); nfs_wb_all(inode); } invalidate_inode_pages2(mapping); - nfsi->flags &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ @@ -1164,10 +1166,10 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1200,9 +1202,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; if (nfsi->change_attr != fattr->change_attr) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } } @@ -1227,28 +1229,28 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } if (cur_size != new_isize) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (nfsi->npages == 0) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!timespec_equal(&inode->i_atime, &fattr->atime)) - nfsi->flags |= NFS_INO_INVALID_ATIME; + nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; return 0; @@ -1384,7 +1386,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ)) - nfsi->flags |= invalid; + nfsi->cache_validity |= invalid; return 0; out_changed: @@ -1961,7 +1963,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); if (!nfsi) return NULL; - nfsi->flags = 0; + nfsi->flags = 0UL; + nfsi->cache_validity = 0UL; #ifdef CONFIG_NFS_V3_ACL nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 1b7a3ef2f81..a020e650ffc 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6f866b8aa2d..90df0500ca1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +473,7 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } - NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; data->complete(data, status); } -- cgit v1.2.3 From 412d582ec1dd59aab2353f8cb7e74f2c79cd20b9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:11 -0700 Subject: [PATCH] NFS: use atomic bitops to manipulate flags in nfsi->flags Introduce atomic bitops to manipulate the bits in the nfs_inode structure's "flags" field. Using bitops means we can use a generic wait_on_bit call instead of an ad hoc locking scheme in fs/nfs/inode.c, so we can remove the "nfs_i_wait" field from nfs_inode at the same time. The other new flags field will continue to use bitmask and logic AND and OR. This permits several flags to be set at the same time efficiently. The following patch adds a spin lock to protect these flags, and this spin lock will later cover other fields in the nfs_inode structure, amortizing the cost of using this type of serialization. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 4 ++-- fs/nfs/inode.c | 69 ++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 45 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5732e13cd0d..27cf5577f23 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -182,7 +182,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); desc->plus = 0; goto again; } @@ -545,7 +545,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } if (res == -ETOOSMALL && desc->plus) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); nfs_zap_caches(inode); desc->plus = 0; desc->entry->eof = 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 62218455351..ee27578277f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -739,7 +739,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) - NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -849,26 +849,43 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } } +static int nfs_wait_schedule(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /* * Wait for the inode to get unlocked. - * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). */ -static int -nfs_wait_on_inode(struct inode *inode, int flag) +static int nfs_wait_on_inode(struct inode *inode) { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_inode *nfsi = NFS_I(inode); - + sigset_t oldmask; int error; - if (!(NFS_FLAGS(inode) & flag)) - return 0; + atomic_inc(&inode->i_count); - error = nfs_wait_event(clnt, nfsi->nfs_i_wait, - !(NFS_FLAGS(inode) & flag)); + rpc_clnt_sigmask(clnt, &oldmask); + error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, + nfs_wait_schedule, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); iput(inode); + return error; } +static void nfs_wake_up_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); +} + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -1029,18 +1046,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (NFS_STALE(inode)) goto out_nowait; - while (NFS_REVALIDATING(inode)) { - status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) - goto out_nowait; - if (NFS_ATTRTIMEO(inode) == 0) - continue; - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) - continue; - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; + status = nfs_wait_on_inode(inode); + if (status < 0) + goto out; + if (NFS_STALE(inode)) { + status = -ESTALE; + /* Do we trust the cached ESTALE? */ + if (NFS_ATTRTIMEO(inode) != 0) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { + /* no */ + } else + goto out; + } } - NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; /* Protect against RPC races by saving the change attribute */ verifier = nfs_save_change_attribute(inode); @@ -1052,7 +1070,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); } goto out; } @@ -1083,9 +1101,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); -out: - NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&nfsi->nfs_i_wait); + out: + nfs_wake_up_inode(inode); + out_nowait: unlock_kernel(); return status; @@ -1404,7 +1422,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign */ nfs_invalidate_inode(inode); out_err: - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); return -ESTALE; } @@ -1996,7 +2014,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; - init_waitqueue_head(&nfsi->nfs_i_wait); nfs4_init_once(nfsi); } } -- cgit v1.2.3 From dc59250c6ebed099a9bc0a11298e2281dd896657 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:12 -0700 Subject: [PATCH] NFS: Introduce the use of inode->i_lock to protect fields in nfsi Down the road we want to eliminate the use of the global kernel lock entirely from the NFS client. To do this, we need to protect the fields in the nfs_inode structure adequately. Start by serializing updates to the "cache_validity" field. Note this change addresses an SMP hang found by njw@osdl.org, where processes deadlock because nfs_end_data_update and nfs_revalidate_mapping update the "cache_validity" field without proper serialization. Test plan: Millions of fsx ops on SMP clients. Run Nick Wilson's breaknfs program on large SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 7 +++++++ fs/nfs/inode.c | 34 +++++++++++++++++++++++++++++++--- fs/nfs/nfs3acl.c | 2 ++ fs/nfs/read.c | 4 ++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 27cf5577f23..147cbf9261c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -1596,7 +1600,10 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } + /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ee27578277f..541b418327c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -615,6 +615,8 @@ nfs_zap_caches(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + spin_lock(&inode->i_lock); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -623,6 +625,8 @@ nfs_zap_caches(struct inode *inode) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + + spin_unlock(&inode->i_lock); } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } /* @@ -841,7 +847,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -1082,6 +1090,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } + spin_lock(&inode->i_lock); cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; @@ -1091,6 +1100,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) */ if (verifier == nfsi->cache_change_attribute) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + spin_unlock(&inode->i_lock); nfs_revalidate_mapping(inode, inode->i_mapping); @@ -1149,12 +1159,16 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfs_wb_all(inode); } invalidate_inode_pages2(mapping); + + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ nfsi->cache_change_attribute++; } + spin_unlock(&inode->i_lock); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1184,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ + spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1212,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if (nfs_have_delegation(inode, FMODE_READ)) return 0; + spin_lock(&inode->i_lock); + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1226,13 +1244,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } } - if ((fattr->valid & NFS_ATTR_FATTR) == 0) + if ((fattr->valid & NFS_ATTR_FATTR) == 0) { + spin_unlock(&inode->i_lock); return 0; + } /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); return -EIO; + } cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); @@ -1271,6 +1293,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; + spin_unlock(&inode->i_lock); return 0; } @@ -1309,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign goto out_err; } + spin_lock(&inode->i_lock); + /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); goto out_changed; + } /* * Update the read time so we don't revalidate too often. @@ -1406,6 +1433,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; + spin_unlock(&inode->i_lock); return 0; out_changed: /* diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index a020e650ffc..6a5bbc0ae94 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 90df0500ca1..6ceb1d471f2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } + spin_lock(&data->inode->i_lock); NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); data->complete(data, status); } -- cgit v1.2.3 From d86c390ffbf5746df9a3cc2c5f7b75d27704580f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Aug 2005 11:24:17 -0700 Subject: [PATCH] reiserfs+acl+quota deadlock fix When i_acl_default is set to some error we do not hold the lock (hence we are not allowed to drop it and reacquire later). Signed-off-by: Jan Kara Cc: Jeff Mahoney Cc: Chris Mason Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d9f614a5773..ff291c973a5 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1985,7 +1985,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, * iput doesn't deadlock in reiserfs_delete_xattrs. The locking * code really needs to be reworked, but this will take care of it * for now. -jeffm */ - if (REISERFS_I(dir)->i_acl_default) { + if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { reiserfs_write_unlock_xattrs(dir->i_sb); iput(inode); reiserfs_write_lock_xattrs(dir->i_sb); -- cgit v1.2.3 From 2fb1e3086df9b454538491fba8121298da37cd23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Aug 2005 22:42:16 +0100 Subject: [PATCH] jffs2: fix symlink error handling The current calling conventions for ->follow_link() are already fairly complex. What we have is 1) you can return -error; then you must release nameidata yourself and ->put_link() will _not_ be called. 2) you can do nd_set_link(nd, ERR_PTR(-error)) and return 0 3) you can do nd_set_link(nd, path) and return 0 4) you can return 0 (after having moved nameidata yourself) jffs2 follow_link() is broken - it has an exit where it returns -EIO and leaks nameidata. Signed-off-by: Linus Torvalds --- fs/jffs2/symlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index 65ab6b001dc..073633e11ea 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -30,6 +30,7 @@ struct inode_operations jffs2_symlink_inode_operations = static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) { struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); + char *p = (char *)f->dents; /* * We don't acquire the f->sem mutex here since the only data we @@ -45,13 +46,14 @@ static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) * nd_set_link() call. */ - if (!f->dents) { + if (!p) { printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n"); - return -EIO; + p = ERR_PTR(-EIO); + } else { + D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents)); } - D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents)); - nd_set_link(nd, (char *)f->dents); + nd_set_link(nd, p); /* * We unlock the f->sem mutex but VFS will use the f->dents string. This is safe -- cgit v1.2.3 From cc314eef0128a807e50fa03baf2d0abc0647952c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Aug 2005 18:02:56 -0700 Subject: Fix nasty ncpfs symlink handling bug. This bug could cause oopses and page state corruption, because ncpfs used the generic page-cache symlink handlign functions. But those functions only work if the page cache is guaranteed to be "stable", ie a page that was installed when the symlink walk was started has to still be installed in the page cache at the end of the walk. We could have fixed ncpfs to not use the generic helper routines, but it is in many ways much cleaner to instead improve on the symlink walking helper routines so that they don't require that absolute stability. We do this by allowing "follow_link()" to return a error-pointer as a cookie, which is fed back to the cleanup "put_link()" routine. This also simplifies NFS symlink handling. Signed-off-by: Linus Torvalds --- fs/autofs/symlink.c | 5 +++-- fs/cifs/cifsfs.h | 4 ++-- fs/cifs/link.c | 6 +++--- fs/ext2/symlink.c | 4 ++-- fs/ext3/symlink.c | 4 ++-- fs/namei.c | 40 +++++++++++++++++++++------------------- fs/nfs/symlink.c | 37 ++++++++----------------------------- fs/sysfs/symlink.c | 6 +++--- 8 files changed, 44 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index f028396f138..52e8772b066 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -12,11 +12,12 @@ #include "autofs_i.h" -static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd) +/* Nothing to release.. */ +static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; nd_set_link(nd, s); - return 0; + return NULL; } struct inode_operations autofs_symlink_inode_operations = { diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 78af5850c55..1fd21f66f24 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -83,8 +83,8 @@ extern int cifs_dir_notify(struct file *, unsigned long arg); extern struct dentry_operations cifs_dentry_ops; /* Functions related to symlinks */ -extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd); -extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd); +extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); +extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bde0fabfece..ab925ef4f86 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -92,7 +92,7 @@ cifs_hl_exit: return rc; } -int +void * cifs_follow_link(struct dentry *direntry, struct nameidata *nd) { struct inode *inode = direntry->d_inode; @@ -148,7 +148,7 @@ out: out_no_free: FreeXid(xid); nd_set_link(nd, target_path); - return 0; + return NULL; /* No cookie */ } int @@ -330,7 +330,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) return rc; } -void cifs_put_link(struct dentry *direntry, struct nameidata *nd) +void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) { char *p = nd_get_link(nd); if (!IS_ERR(p)) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 9f7bac01d55..1e67d87cfa9 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -21,11 +21,11 @@ #include "xattr.h" #include -static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); nd_set_link(nd, (char *)ei->i_data); - return 0; + return NULL; } struct inode_operations ext2_symlink_inode_operations = { diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c index 8c3e72818fb..4f79122cde6 100644 --- a/fs/ext3/symlink.c +++ b/fs/ext3/symlink.c @@ -23,11 +23,11 @@ #include #include "xattr.h" -static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) +static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); nd_set_link(nd, (char*)ei->i_data); - return 0; + return NULL; } struct inode_operations ext3_symlink_inode_operations = { diff --git a/fs/namei.c b/fs/namei.c index b85f158aef0..6ec1f0fefc5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -501,6 +501,7 @@ struct path { static inline int __do_follow_link(struct path *path, struct nameidata *nd) { int error; + void *cookie; struct dentry *dentry = path->dentry; touch_atime(path->mnt, dentry); @@ -508,13 +509,15 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd) if (path->mnt == nd->mnt) mntget(path->mnt); - error = dentry->d_inode->i_op->follow_link(dentry, nd); - if (!error) { + cookie = dentry->d_inode->i_op->follow_link(dentry, nd); + error = PTR_ERR(cookie); + if (!IS_ERR(cookie)) { char *s = nd_get_link(nd); + error = 0; if (s) error = __vfs_follow_link(nd, s); if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, nd); + dentry->d_inode->i_op->put_link(dentry, nd, cookie); } dput(dentry); mntput(path->mnt); @@ -2344,15 +2347,17 @@ out: int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; - int res; + void *cookie; + nd.depth = 0; - res = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!res) { - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); + if (!IS_ERR(cookie)) { + int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd); + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + cookie = ERR_PTR(res); } - return res; + return PTR_ERR(cookie); } int vfs_follow_link(struct nameidata *nd, const char *link) @@ -2395,23 +2400,20 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) return res; } -int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) +void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) { - struct page *page; + struct page *page = NULL; nd_set_link(nd, page_getlink(dentry, &page)); - return 0; + return page; } -void page_put_link(struct dentry *dentry, struct nameidata *nd) +void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - if (!IS_ERR(nd_get_link(nd))) { - struct page *page; - page = find_get_page(dentry->d_inode->i_mapping, 0); - if (!page) - BUG(); + struct page *page = cookie; + + if (page) { kunmap(page); page_cache_release(page); - page_cache_release(page); } } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 35f10659914..18dc95b0b64 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -27,26 +27,14 @@ /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. - * - * At the beginning of the page we store pointer to struct page in question, - * simplifying nfs_put_link() (if inode got invalidated we can't find the page - * to be freed via pagecache lookup). - * The NUL-terminated string follows immediately thereafter. */ -struct nfs_symlink { - struct page *page; - char body[0]; -}; - static int nfs_symlink_filler(struct inode *inode, struct page *page) { - const unsigned int pgbase = offsetof(struct nfs_symlink, body); - const unsigned int pglen = PAGE_SIZE - pgbase; int error; lock_kernel(); - error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen); + error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); unlock_kernel(); if (error < 0) goto error; @@ -60,11 +48,10 @@ error: return -EIO; } -static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - struct nfs_symlink *p; void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); if (err) goto read_failed; @@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) err = ERR_PTR(-EIO); goto getlink_read_error; } - p = kmap(page); - p->page = page; - nd_set_link(nd, p->body); - return 0; + nd_set_link(nd, kmap(page)); + return page; getlink_read_error: page_cache_release(page); read_failed: nd_set_link(nd, err); - return 0; + return NULL; } -static void nfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - char *s = nd_get_link(nd); - if (!IS_ERR(s)) { - struct nfs_symlink *p; - struct page *page; - - p = container_of(s, struct nfs_symlink, body[0]); - page = p->page; - + if (cookie) { + struct page *page = cookie; kunmap(page); page_cache_release(page); } diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index fae57c83a72..de402fa915f 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -151,17 +151,17 @@ static int sysfs_getlink(struct dentry *dentry, char * path) } -static int sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) { int error = -ENOMEM; unsigned long page = get_zeroed_page(GFP_KERNEL); if (page) error = sysfs_getlink(dentry, (char *) page); nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); - return 0; + return NULL; } -static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { char *page = nd_get_link(nd); if (!IS_ERR(page)) -- cgit v1.2.3 From 008b150a3c4d971cd65d02d107b8fcc860bc959c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2005 00:17:39 +0100 Subject: [PATCH] Fix up symlink function pointers This fixes up the symlink functions for the calling convention change: * afs, autofs4, befs, devfs, freevxfs, jffs2, jfs, ncpfs, procfs, smbfs, sysvfs, ufs, xfs - prototype change for ->follow_link() * befs, smbfs, xfs - same for ->put_link() Signed-off-by: Linus Torvalds --- fs/afs/mntpt.c | 8 ++++---- fs/autofs4/symlink.c | 4 ++-- fs/befs/linuxvfs.c | 8 ++++---- fs/devfs/base.c | 4 ++-- fs/freevxfs/vxfs_immed.c | 4 ++-- fs/jffs2/symlink.c | 6 +++--- fs/jfs/symlink.c | 4 ++-- fs/proc/base.c | 8 ++++---- fs/proc/generic.c | 4 ++-- fs/smbfs/symlink.c | 6 +++--- fs/sysv/symlink.c | 4 ++-- fs/ufs/symlink.c | 4 ++-- fs/xfs/linux-2.6/xfs_iops.c | 10 +++++----- 13 files changed, 37 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index bfc28abe1cb..31ee06590de 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -30,7 +30,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); static int afs_mntpt_open(struct inode *inode, struct file *file); -static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); +static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); struct file_operations afs_mntpt_file_operations = { .open = afs_mntpt_open, @@ -233,7 +233,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* * follow a link from a mountpoint directory, thus causing it to be mounted */ -static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) { struct vfsmount *newmnt; struct dentry *old_dentry; @@ -249,7 +249,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) newmnt = afs_mntpt_do_automount(dentry); if (IS_ERR(newmnt)) { path_release(nd); - return PTR_ERR(newmnt); + return (void *)newmnt; } old_dentry = nd->dentry; @@ -267,7 +267,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) } kleave(" = %d", err); - return err; + return ERR_PTR(err); } /* end afs_mntpt_follow_link() */ /*****************************************************************************/ diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index c265a66edf0..2ea2c98fd84 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c @@ -12,11 +12,11 @@ #include "autofs_i.h" -static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct autofs_info *ino = autofs4_dentry_ino(dentry); nd_set_link(nd, (char *)ino->u.symlink); - return 0; + return NULL; } struct inode_operations autofs4_symlink_inode_operations = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index de5bb280a82..abf950e5b64 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -41,8 +41,8 @@ static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); static void befs_destroy_inodecache(void); -static int befs_follow_link(struct dentry *, struct nameidata *); -static void befs_put_link(struct dentry *, struct nameidata *); +static void *befs_follow_link(struct dentry *, struct nameidata *); +static void befs_put_link(struct dentry *, struct nameidata *, void *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, @@ -487,10 +487,10 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) } nd_set_link(nd, link); - return 0; + return NULL; } -static void befs_put_link(struct dentry *dentry, struct nameidata *nd) +static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 1ecfe1f184d..8b679b67e5e 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2491,11 +2491,11 @@ static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode, return 0; } /* End Function devfs_mknod */ -static int devfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode); nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV)); - return 0; + return NULL; } /* End Function devfs_follow_link */ static struct inode_operations devfs_iops = { diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index ac677ab262b..b20c3efa364 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -38,7 +38,7 @@ #include "vxfs_inode.h" -static int vxfs_immed_follow_link(struct dentry *, struct nameidata *); +static void * vxfs_immed_follow_link(struct dentry *, struct nameidata *); static int vxfs_immed_readpage(struct file *, struct page *); @@ -77,7 +77,7 @@ vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np) { struct vxfs_inode_info *vip = VXFS_INO(dp->d_inode); nd_set_link(np, vip->vii_immed.vi_immed); - return 0; + return NULL; } /** diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index 073633e11ea..82ef484f5e1 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -18,7 +18,7 @@ #include #include "nodelist.h" -static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd); +static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd); struct inode_operations jffs2_symlink_inode_operations = { @@ -27,7 +27,7 @@ struct inode_operations jffs2_symlink_inode_operations = .setattr = jffs2_setattr }; -static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) { struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); char *p = (char *)f->dents; @@ -60,6 +60,6 @@ static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) * since the only way that may cause f->dents to be changed is iput() operation. * But VFS will not use f->dents after iput() has been called. */ - return 0; + return NULL; } diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 287d8d6c3cf..16477b3835e 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -22,11 +22,11 @@ #include "jfs_inode.h" #include "jfs_xattr.h" -static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s = JFS_IP(dentry->d_inode)->i_inline; nd_set_link(nd, s); - return 0; + return NULL; } struct inode_operations jfs_symlink_inode_operations = { diff --git a/fs/proc/base.c b/fs/proc/base.c index ace151fa487..491f2d9f89a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -890,7 +890,7 @@ static struct file_operations proc_seccomp_operations = { }; #endif /* CONFIG_SECCOMP */ -static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int error = -EACCES; @@ -907,7 +907,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); nd->last_type = LAST_BIND; out: - return error; + return ERR_PTR(error); } static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, @@ -1692,11 +1692,11 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, return vfs_readlink(dentry,buffer,buflen,tmp); } -static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; sprintf(tmp, "%d", current->tgid); - return vfs_follow_link(nd,tmp); + return ERR_PTR(vfs_follow_link(nd,tmp)); } static struct inode_operations proc_self_inode_operations = { diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6c6315d0402..abe8920313f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -329,10 +329,10 @@ static void release_inode_number(unsigned int inum) spin_unlock(&proc_inum_lock); } -static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) { nd_set_link(nd, PDE(dentry->d_inode)->data); - return 0; + return NULL; } static struct inode_operations proc_link_inode_operations = { diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index 8b069e06433..0c64bc3a012 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c @@ -34,7 +34,7 @@ int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname) return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname); } -static int smb_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd) { char *link = __getname(); DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry)); @@ -52,10 +52,10 @@ static int smb_follow_link(struct dentry *dentry, struct nameidata *nd) } } nd_set_link(nd, link); - return 0; + return NULL; } -static void smb_put_link(struct dentry *dentry, struct nameidata *nd) +static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); if (!IS_ERR(s)) diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c index ed637db2dcb..b85ce61d635 100644 --- a/fs/sysv/symlink.c +++ b/fs/sysv/symlink.c @@ -8,10 +8,10 @@ #include "sysv.h" #include -static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd) { nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data); - return 0; + return NULL; } struct inode_operations sysv_fast_symlink_inode_operations = { diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index a0e49149098..337512ed578 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -29,11 +29,11 @@ #include #include -static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ufs_inode_info *p = UFS_I(dentry->d_inode); nd_set_link(nd, (char*)p->i_u1.i_symlink); - return 0; + return NULL; } struct inode_operations ufs_fast_symlink_inode_operations = { diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 407e9935939..f252605514e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -374,7 +374,7 @@ linvfs_rename( * we need to be very careful about how much stack we use. * uio is kmalloced for this reason... */ -STATIC int +STATIC void * linvfs_follow_link( struct dentry *dentry, struct nameidata *nd) @@ -391,14 +391,14 @@ linvfs_follow_link( link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); if (!link) { nd_set_link(nd, ERR_PTR(-ENOMEM)); - return 0; + return NULL; } uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL); if (!uio) { kfree(link); nd_set_link(nd, ERR_PTR(-ENOMEM)); - return 0; + return NULL; } vp = LINVFS_GET_VP(dentry->d_inode); @@ -422,10 +422,10 @@ linvfs_follow_link( kfree(uio); nd_set_link(nd, link); - return 0; + return NULL; } -static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); if (!IS_ERR(s)) -- cgit v1.2.3 From 01c314a0c0f6367960a7cb1ffb5796560ccaa1c1 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Fri, 19 Aug 2005 17:57:48 -0700 Subject: [PATCH] NFSv4: unbalanced BKL in nfs_atomic_lookup() Added missing unlock_kernel() to NFSv4 atomic lookup. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 147cbf9261c..2df639f143e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -939,6 +939,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry error = nfs_revalidate_inode(NFS_SERVER(dir), dir); if (error < 0) { res = ERR_PTR(error); + unlock_kernel(); goto out; } -- cgit v1.2.3 From db873896d168217e213902c7163fda7ee798781b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 20 Aug 2005 13:20:01 -0700 Subject: befs: fix up missed follow_link declaration change We'd updated the prototype and the return value, but not the function declaration itself. --- fs/befs/linuxvfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index abf950e5b64..e0a6025f1d0 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -461,7 +461,7 @@ befs_destroy_inodecache(void) * The data stream become link name. Unless the LONG_SYMLINK * flag is set. */ -static int +static void * befs_follow_link(struct dentry *dentry, struct nameidata *nd) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); -- cgit v1.2.3 From a5ea169c9581553662bb79a1c8c98fed1ee84246 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 21 Aug 2005 01:08:36 +0400 Subject: [PATCH] freevxfs: fix breakage introduced by symlink fixes Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- fs/freevxfs/vxfs_immed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index b20c3efa364..d0401dc68d4 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -72,7 +72,7 @@ struct address_space_operations vxfs_immed_aops = { * Returns: * Zero on success, else a negative error code. */ -static int +static void * vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np) { struct vxfs_inode_info *vip = VXFS_INO(dp->d_inode); -- cgit v1.2.3 From f6fdd7d9c273bb2a20ab467cb57067494f932fa3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 20 Aug 2005 18:51:29 -0700 Subject: Don't allow normal users to set idle IO priority It has all the normal priority inversion problems. Signed-off-by: Linus Torvalds --- fs/ioprio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index 97e1f088ba0..d1c1f2b2c9d 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -62,6 +62,8 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_CLASS_IDLE: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; break; default: return -EINVAL; -- cgit v1.2.3