diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 10 | ||||
-rw-r--r-- | fs/cifs/Kconfig | 1 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 93 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 21 | ||||
-rw-r--r-- | fs/cifs/cifsproto.h | 11 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 1 | ||||
-rw-r--r-- | fs/cifs/connect.c | 1 | ||||
-rw-r--r-- | fs/cifs/dir.c | 64 | ||||
-rw-r--r-- | fs/cifs/file.c | 137 | ||||
-rw-r--r-- | fs/cifs/misc.c | 34 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 4 | ||||
-rw-r--r-- | fs/cifs/transport.c | 50 | ||||
-rw-r--r-- | fs/ecryptfs/Kconfig | 4 | ||||
-rw-r--r-- | fs/ecryptfs/crypto.c | 39 | ||||
-rw-r--r-- | fs/ecryptfs/inode.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/keystore.c | 39 | ||||
-rw-r--r-- | fs/ecryptfs/kthread.c | 24 | ||||
-rw-r--r-- | fs/ecryptfs/main.c | 3 | ||||
-rw-r--r-- | fs/ecryptfs/mmap.c | 4 | ||||
-rw-r--r-- | fs/ecryptfs/read_write.c | 32 | ||||
-rw-r--r-- | fs/ecryptfs/super.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 165 | ||||
-rw-r--r-- | fs/nfs/super.c | 2 | ||||
-rw-r--r-- | fs/proc/array.c | 7 |
24 files changed, 366 insertions, 384 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 24afd7422ae..6fa530256bf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -280,7 +280,7 @@ void invalidate_bdev(struct block_device *bdev) EXPORT_SYMBOL(invalidate_bdev); /* - * Kick pdflush then try to free up some ZONE_NORMAL memory. + * Kick the writeback threads then try to free up some ZONE_NORMAL memory. */ static void free_more_memory(void) { @@ -1709,9 +1709,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page, /* * If it's a fully non-blocking write attempt and we cannot * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd - * activity, but those code paths have their own higher-level - * throttling. + * potentially cause a busy-wait loop from writeback threads + * and kswapd activity, but those code paths have their own + * higher-level throttling. */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); @@ -3208,7 +3208,7 @@ EXPORT_SYMBOL(block_sync_page); * still running obsolete flush daemons, so we terminate them here. * * Use of bdflush() is deprecated and will be removed in a future kernel. - * The `pdflush' kernel threads fully replace bdflush daemons and this call. + * The `flush-X' kernel threads fully replace bdflush daemons and this call. */ SYSCALL_DEFINE2(bdflush, int, func, long, data) { diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 6994a0f54f0..80f35259680 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,6 +2,7 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS + select SLOW_WORK help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 90c5b39f031..9a5e4f5f312 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -64,9 +64,6 @@ unsigned int multiuser_mount = 0; unsigned int extended_security = CIFSSEC_DEF; /* unsigned int ntlmv2_support = 0; */ unsigned int sign_CIFS_PDUs = 1; -extern struct task_struct *oplockThread; /* remove sparse warning */ -struct task_struct *oplockThread = NULL; -/* extern struct task_struct * dnotifyThread; remove sparse warning */ static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, int, 0); @@ -972,89 +969,12 @@ cifs_destroy_mids(void) kmem_cache_destroy(cifs_oplock_cachep); } -static int cifs_oplock_thread(void *dummyarg) -{ - struct oplock_q_entry *oplock_item; - struct cifsTconInfo *pTcon; - struct inode *inode; - __u16 netfid; - int rc, waitrc = 0; - - set_freezable(); - do { - if (try_to_freeze()) - continue; - - spin_lock(&cifs_oplock_lock); - if (list_empty(&cifs_oplock_list)) { - spin_unlock(&cifs_oplock_lock); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(39*HZ); - } else { - oplock_item = list_entry(cifs_oplock_list.next, - struct oplock_q_entry, qhead); - cFYI(1, ("found oplock item to write out")); - pTcon = oplock_item->tcon; - inode = oplock_item->pinode; - netfid = oplock_item->netfid; - spin_unlock(&cifs_oplock_lock); - DeleteOplockQEntry(oplock_item); - /* can not grab inode sem here since it would - deadlock when oplock received on delete - since vfs_unlink holds the i_mutex across - the call */ - /* mutex_lock(&inode->i_mutex);*/ - if (S_ISREG(inode->i_mode)) { -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (CIFS_I(inode)->clientCanCacheAll == 0) - break_lease(inode, FMODE_READ); - else if (CIFS_I(inode)->clientCanCacheRead == 0) - break_lease(inode, FMODE_WRITE); -#endif - rc = filemap_fdatawrite(inode->i_mapping); - if (CIFS_I(inode)->clientCanCacheRead == 0) { - waitrc = filemap_fdatawait( - inode->i_mapping); - invalidate_remote_inode(inode); - } - if (rc == 0) - rc = waitrc; - } else - rc = 0; - /* mutex_unlock(&inode->i_mutex);*/ - if (rc) - CIFS_I(inode)->write_behind_rc = rc; - cFYI(1, ("Oplock flush inode %p rc %d", - inode, rc)); - - /* releasing stale oplock after recent reconnect - of smb session using a now incorrect file - handle is not a data integrity issue but do - not bother sending an oplock release if session - to server still is disconnected since oplock - already released by the server in that case */ - if (!pTcon->need_reconnect) { - rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, - 0, LOCKING_ANDX_OPLOCK_RELEASE, - false /* wait flag */); - cFYI(1, ("Oplock release rc = %d", rc)); - } - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); /* yield in case q were corrupt */ - } - } while (!kthread_should_stop()); - - return 0; -} - static int __init init_cifs(void) { int rc = 0; cifs_proc_init(); INIT_LIST_HEAD(&cifs_tcp_ses_list); - INIT_LIST_HEAD(&cifs_oplock_list); #ifdef CONFIG_CIFS_EXPERIMENTAL INIT_LIST_HEAD(&GlobalDnotifyReqList); INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); @@ -1083,7 +1003,6 @@ init_cifs(void) rwlock_init(&GlobalSMBSeslock); rwlock_init(&cifs_tcp_ses_lock); spin_lock_init(&GlobalMid_Lock); - spin_lock_init(&cifs_oplock_lock); if (cifs_max_pending < 2) { cifs_max_pending = 2; @@ -1118,16 +1037,13 @@ init_cifs(void) if (rc) goto out_unregister_key_type; #endif - oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); - if (IS_ERR(oplockThread)) { - rc = PTR_ERR(oplockThread); - cERROR(1, ("error %d create oplock thread", rc)); - goto out_unregister_dfs_key_type; - } + rc = slow_work_register_user(); + if (rc) + goto out_unregister_resolver_key; return 0; - out_unregister_dfs_key_type: + out_unregister_resolver_key: #ifdef CONFIG_CIFS_DFS_UPCALL unregister_key_type(&key_type_dns_resolver); out_unregister_key_type: @@ -1164,7 +1080,6 @@ exit_cifs(void) cifs_destroy_inodecache(); cifs_destroy_mids(); cifs_destroy_request_bufs(); - kthread_stop(oplockThread); } MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6cfc81a3270..5d0fde18039 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -18,6 +18,7 @@ */ #include <linux/in.h> #include <linux/in6.h> +#include <linux/slow-work.h> #include "cifs_fs_sb.h" #include "cifsacl.h" /* @@ -346,14 +347,16 @@ struct cifsFileInfo { /* lock scope id (0 if none) */ struct file *pfile; /* needed for writepage */ struct inode *pInode; /* needed for oplock break */ + struct vfsmount *mnt; struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ bool closePend:1; /* file is marked to close */ bool invalidHandle:1; /* file closed via session abend */ - bool messageMode:1; /* for pipes: message vs byte mode */ + bool oplock_break_cancelled:1; atomic_t count; /* reference count */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; + struct slow_work oplock_break; /* slow_work job for oplock breaks */ }; /* Take a reference on the file private data */ @@ -365,8 +368,10 @@ static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) /* Release a reference on the file private data */ static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { - if (atomic_dec_and_test(&cifs_file->count)) + if (atomic_dec_and_test(&cifs_file->count)) { + iput(cifs_file->pInode); kfree(cifs_file); + } } /* @@ -382,7 +387,6 @@ struct cifsInodeInfo { unsigned long time; /* jiffies of last update/check of inode */ bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ - bool oplockPending:1; bool delete_pending:1; /* DELETE_ON_CLOSE is set */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ @@ -585,9 +589,9 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_LANMAN 0x10010 #define CIFSSEC_MUST_PLNTXT 0x20020 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0xAF0AF /* allows weak security but also krb5 */ +#define CIFSSEC_MASK 0xBF0BF /* allows weak security but also krb5 */ #else -#define CIFSSEC_MASK 0xA70A7 /* current flags supported if weak */ +#define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ #endif /* UPCALL */ #else /* do not allow weak pw hash */ #ifdef CONFIG_CIFS_UPCALL @@ -669,12 +673,6 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; */ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; -/* Global list of oplocks */ -GLOBAL_EXTERN struct list_head cifs_oplock_list; - -/* Protects the cifs_oplock_list */ -GLOBAL_EXTERN spinlock_t cifs_oplock_lock; - /* Outstanding dir notify requests */ GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* DirNotify response queue */ @@ -725,3 +723,4 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ +extern const struct slow_work_ops cifs_oplock_break_ops; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index da8fbf56599..6928c24d1d4 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -86,18 +86,17 @@ extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int stage, const struct nls_table *nls_cp); extern __u16 GetNextMid(struct TCP_Server_Info *server); -extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, - struct cifsTconInfo *); -extern void DeleteOplockQEntry(struct oplock_q_entry *); -extern void DeleteTconOplockQEntries(struct cifsTconInfo *); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); +extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, + __u16 fileHandle, struct file *file, + struct vfsmount *mnt, unsigned int oflags); extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct super_block *sb, int mode, int oflags, - int *poplock, __u16 *pnetfid, int xid); + struct vfsmount *mnt, int mode, int oflags, + __u32 *poplock, __u16 *pnetfid, int xid); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 301e307e127..941441d3e38 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -94,6 +94,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); open_file->invalidHandle = true; + open_file->oplock_break_cancelled = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d49682433c2..43003e0bef1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1670,7 +1670,6 @@ cifs_put_tcon(struct cifsTconInfo *tcon) CIFSSMBTDis(xid, tcon); _FreeXid(xid); - DeleteTconOplockQEntries(tcon); tconInfoFree(tcon); cifs_put_smb_ses(ses); } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a6424cfc012..627a60a6c1b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -24,6 +24,7 @@ #include <linux/stat.h> #include <linux/slab.h> #include <linux/namei.h> +#include <linux/mount.h> #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -129,44 +130,45 @@ cifs_bp_rename_retry: return full_path; } -static void -cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, - struct cifsTconInfo *tcon, bool write_only) +struct cifsFileInfo * +cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, + struct file *file, struct vfsmount *mnt, unsigned int oflags) { int oplock = 0; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; + struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (pCifsFile == NULL) - return; + return pCifsFile; if (oplockEnabled) oplock = REQ_OPLOCK; pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; - pCifsFile->pInode = newinode; + pCifsFile->pInode = igrab(newinode); + pCifsFile->mnt = mnt; + pCifsFile->pfile = file; pCifsFile->invalidHandle = false; pCifsFile->closePend = false; mutex_init(&pCifsFile->fh_mutex); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); atomic_set(&pCifsFile->count, 1); + slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); - /* set the following in open now - pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &tcon->openFileList); + list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); pCifsInode = CIFS_I(newinode); if (pCifsInode) { /* if readable file instance put first in list*/ - if (write_only) + if (oflags & FMODE_READ) + list_add(&pCifsFile->flist, &pCifsInode->openFileList); + else list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); - else - list_add(&pCifsFile->flist, &pCifsInode->openFileList); if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = true; @@ -176,18 +178,18 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, pCifsInode->clientCanCacheRead = true; } write_unlock(&GlobalSMBSeslock); + + return pCifsFile; } int cifs_posix_open(char *full_path, struct inode **pinode, - struct super_block *sb, int mode, int oflags, - int *poplock, __u16 *pnetfid, int xid) + struct vfsmount *mnt, int mode, int oflags, + __u32 *poplock, __u16 *pnetfid, int xid) { int rc; - __u32 oplock; - bool write_only = false; FILE_UNIX_BASIC_INFO *presp_data; __u32 posix_flags = 0; - struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); struct cifs_fattr fattr; cFYI(1, ("posix open %s", full_path)); @@ -223,12 +225,9 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (oflags & O_DIRECT) posix_flags |= SMB_O_DIRECT; - if (!(oflags & FMODE_READ)) - write_only = true; - mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, - pnetfid, presp_data, &oplock, full_path, + pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) @@ -244,7 +243,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, /* get new inode and set it up */ if (*pinode == NULL) { - *pinode = cifs_iget(sb, &fattr); + *pinode = cifs_iget(mnt->mnt_sb, &fattr); if (!*pinode) { rc = -ENOMEM; goto posix_open_ret; @@ -253,7 +252,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, cifs_fattr_to_inode(*pinode, &fattr); } - cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); + cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); posix_open_ret: kfree(presp_data); @@ -280,7 +279,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int rc = -ENOENT; int xid; int create_options = CREATE_NOT_DIR; - int oplock = 0; + __u32 oplock = 0; int oflags; bool posix_create = false; /* @@ -298,7 +297,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; int disposition = FILE_OVERWRITE_IF; - bool write_only = false; xid = GetXid(); @@ -323,7 +321,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - rc = cifs_posix_open(full_path, &newinode, inode->i_sb, + rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability @@ -351,11 +349,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, desiredAccess = 0; if (oflags & FMODE_READ) desiredAccess |= GENERIC_READ; /* is this too little? */ - if (oflags & FMODE_WRITE) { + if (oflags & FMODE_WRITE) desiredAccess |= GENERIC_WRITE; - if (!(oflags & FMODE_READ)) - write_only = true; - } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; @@ -470,8 +465,8 @@ cifs_create_set_dentry: /* mknod case - do not leave file open */ CIFSSMBClose(xid, tcon, fileHandle); } else if (!(posix_create) && (newinode)) { - cifs_fill_fileinfo(newinode, fileHandle, - cifs_sb->tcon, write_only); + cifs_new_fileinfo(newinode, fileHandle, NULL, + nd->path.mnt, oflags); } cifs_create_out: kfree(buf); @@ -611,7 +606,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - int oplock = 0; + __u32 oplock = 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -683,8 +678,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, + rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fa7beac8b80..429337eb7af 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -30,6 +30,7 @@ #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/delay.h> +#include <linux/mount.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -39,27 +40,6 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" -static inline struct cifsFileInfo *cifs_init_private( - struct cifsFileInfo *private_data, struct inode *inode, - struct file *file, __u16 netfid) -{ - memset(private_data, 0, sizeof(struct cifsFileInfo)); - private_data->netfid = netfid; - private_data->pid = current->tgid; - mutex_init(&private_data->fh_mutex); - mutex_init(&private_data->lock_mutex); - INIT_LIST_HEAD(&private_data->llist); - private_data->pfile = file; /* needed for writepage */ - private_data->pInode = inode; - private_data->invalidHandle = false; - private_data->closePend = false; - /* Initialize reference count to one. The private data is - freed on the release of the last reference */ - atomic_set(&private_data->count, 1); - - return private_data; -} - static inline int cifs_convert_flags(unsigned int flags) { if ((flags & O_ACCMODE) == O_RDONLY) @@ -123,9 +103,11 @@ static inline int cifs_get_disposition(unsigned int flags) } /* all arguments to this function must be checked for validity in caller */ -static inline int cifs_posix_open_inode_helper(struct inode *inode, - struct file *file, struct cifsInodeInfo *pCifsInode, - struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) +static inline int +cifs_posix_open_inode_helper(struct inode *inode, struct file *file, + struct cifsInodeInfo *pCifsInode, + struct cifsFileInfo *pCifsFile, __u32 oplock, + u16 netfid) { write_lock(&GlobalSMBSeslock); @@ -219,17 +201,6 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct timespec temp; int rc; - /* want handles we can use to read with first - in the list so we do not have to walk the - list to search for one in write_begin */ - if ((file->f_flags & O_ACCMODE) == O_WRONLY) { - list_add_tail(&pCifsFile->flist, - &pCifsInode->openFileList); - } else { - list_add(&pCifsFile->flist, - &pCifsInode->openFileList); - } - write_unlock(&GlobalSMBSeslock); if (pCifsInode->clientCanCacheRead) { /* we have the inode open somewhere else no need to discard cache data */ @@ -279,7 +250,8 @@ client_can_cache: int cifs_open(struct inode *inode, struct file *file) { int rc = -EACCES; - int xid, oplock; + int xid; + __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -324,7 +296,7 @@ int cifs_open(struct inode *inode, struct file *file) le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, &inode, inode->i_sb, + rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -414,24 +386,17 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, ("cifs_open returned 0x%x", rc)); goto out; } - file->private_data = - kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + + pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, + file->f_flags); + file->private_data = pCifsFile; if (file->private_data == NULL) { rc = -ENOMEM; goto out; } - pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); - write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &tcon->openFileList); - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - if (pCifsInode) { - rc = cifs_open_inode_helper(inode, file, pCifsInode, - pCifsFile, tcon, - &oplock, buf, full_path, xid); - } else { - write_unlock(&GlobalSMBSeslock); - } + rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon, + &oplock, buf, full_path, xid); if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to @@ -474,7 +439,8 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) static int cifs_reopen_file(struct file *file, bool can_flush) { int rc = -EACCES; - int xid, oplock; + int xid; + __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -543,7 +509,7 @@ reopen_error_exit: le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, NULL, inode->i_sb, + rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -2308,6 +2274,73 @@ out: return rc; } +static void +cifs_oplock_break(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + struct inode *inode = cfile->pInode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb); + int rc, waitrc = 0; + + if (inode && S_ISREG(inode->i_mode)) { +#ifdef CONFIG_CIFS_EXPERIMENTAL + if (cinode->clientCanCacheAll == 0) + break_lease(inode, FMODE_READ); + else if (cinode->clientCanCacheRead == 0) + break_lease(inode, FMODE_WRITE); +#endif + rc = filemap_fdatawrite(inode->i_mapping); + if (cinode->clientCanCacheRead == 0) { + waitrc = filemap_fdatawait(inode->i_mapping); + invalidate_remote_inode(inode); + } + if (!rc) + rc = waitrc; + if (rc) + cinode->write_behind_rc = rc; + cFYI(1, ("Oplock flush inode %p rc %d", inode, rc)); + } + + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!cfile->closePend && !cfile->oplock_break_cancelled) { + rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, + LOCKING_ANDX_OPLOCK_RELEASE, false); + cFYI(1, ("Oplock release rc = %d", rc)); + } +} + +static int +cifs_oplock_break_get(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + mntget(cfile->mnt); + cifsFileInfo_get(cfile); + return 0; +} + +static void +cifs_oplock_break_put(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + mntput(cfile->mnt); + cifsFileInfo_put(cfile); +} + +const struct slow_work_ops cifs_oplock_break_ops = { + .get_ref = cifs_oplock_break_get, + .put_ref = cifs_oplock_break_put, + .execute = cifs_oplock_break, +}; + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index e079a9190ec..0241b25ac33 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -32,7 +32,6 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; -extern struct task_struct *oplockThread; /* The xid serves as a useful identifier for each incoming vfs request, in a similar way to the mid which is useful to track each sent smb, @@ -500,6 +499,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) struct cifsTconInfo *tcon; struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; + int rc; cFYI(1, ("Checking for oplock break or dnotify response")); if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && @@ -562,30 +562,40 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) continue; cifs_stats_inc(&tcon->num_oplock_brks); - write_lock(&GlobalSMBSeslock); + read_lock(&GlobalSMBSeslock); list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, tlist); if (pSMB->Fid != netfile->netfid) continue; - write_unlock(&GlobalSMBSeslock); - read_unlock(&cifs_tcp_ses_lock); + /* + * don't do anything if file is about to be + * closed anyway. + */ + if (netfile->closePend) { + read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); + return true; + } + cFYI(1, ("file id match, oplock break")); pCifsInode = CIFS_I(netfile->pInode); pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead = false; - pCifsInode->oplockPending = true; - AllocOplockQEntry(netfile->pInode, - netfile->netfid, tcon); - cFYI(1, ("about to wake up oplock thread")); - if (oplockThread) - wake_up_process(oplockThread); - + rc = slow_work_enqueue(&netfile->oplock_break); + if (rc) { + cERROR(1, ("failed to enqueue oplock " + "break: %d\n", rc)); + } else { + netfile->oplock_break_cancelled = false; + } + read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); return true; } - write_unlock(&GlobalSMBSeslock); + read_unlock(&GlobalSMBSeslock); read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("No matching file for oplock break")); return true; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f823a4a208a..1f098ca7163 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -146,7 +146,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } -void +static void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { @@ -161,7 +161,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, cifs_fill_common_info(fattr, cifs_sb); } -void +static void cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, struct cifs_sb_info *cifs_sb) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1da4ab250ea..07b8e71544e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -103,56 +103,6 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } -struct oplock_q_entry * -AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon) -{ - struct oplock_q_entry *temp; - if ((pinode == NULL) || (tcon == NULL)) { - cERROR(1, ("Null parms passed to AllocOplockQEntry")); - return NULL; - } - temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep, - GFP_KERNEL); - if (temp == NULL) - return temp; - else { - temp->pinode = pinode; - temp->tcon = tcon; - temp->netfid = fid; - spin_lock(&cifs_oplock_lock); - list_add_tail(&temp->qhead, &cifs_oplock_list); - spin_unlock(&cifs_oplock_lock); - } - return temp; -} - -void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) -{ - spin_lock(&cifs_oplock_lock); - /* should we check if list empty first? */ - list_del(&oplockEntry->qhead); - spin_unlock(&cifs_oplock_lock); - kmem_cache_free(cifs_oplock_cachep, oplockEntry); -} - - -void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) -{ - struct oplock_q_entry *temp; - - if (tcon == NULL) - return; - - spin_lock(&cifs_oplock_lock); - list_for_each_entry(temp, &cifs_oplock_list, qhead) { - if ((temp->tcon) && (temp->tcon == tcon)) { - list_del(&temp->qhead); - kmem_cache_free(cifs_oplock_cachep, temp); - } - } - spin_unlock(&cifs_oplock_lock); -} - static int smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) { diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 0c754e64232..8aadb99b763 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -1,6 +1,8 @@ config ECRYPT_FS tristate "eCrypt filesystem layer support (EXPERIMENTAL)" - depends on EXPERIMENTAL && KEYS && CRYPTO && NET + depends on EXPERIMENTAL && KEYS && NET + select CRYPTO_ECB + select CRYPTO_CBC help Encrypted filesystem that operates on the VFS layer. See <file:Documentation/filesystems/ecryptfs.txt> to learn more about diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b91851f1cda..fbb6e5eed69 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -245,13 +245,11 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) crypto_free_blkcipher(crypt_stat->tfm); if (crypt_stat->hash_tfm) crypto_free_hash(crypt_stat->hash_tfm); - mutex_lock(&crypt_stat->keysig_list_mutex); list_for_each_entry_safe(key_sig, key_sig_tmp, &crypt_stat->keysig_list, crypt_stat_list) { list_del(&key_sig->crypt_stat_list); kmem_cache_free(ecryptfs_key_sig_cache, key_sig); } - mutex_unlock(&crypt_stat->keysig_list_mutex); memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); } @@ -511,13 +509,14 @@ int ecryptfs_encrypt_page(struct page *page) + extent_offset), crypt_stat); rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, offset, crypt_stat->extent_size); - if (rc) { + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting " "to write lower page; rc = [%d]" "\n", rc); goto out; } } + rc = 0; out: if (enc_extent_page) { kunmap(enc_extent_page); @@ -633,7 +632,7 @@ int ecryptfs_decrypt_page(struct page *page) rc = ecryptfs_read_lower(enc_extent_virt, offset, crypt_stat->extent_size, ecryptfs_inode); - if (rc) { + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting " "to read lower page; rc = [%d]" "\n", rc); @@ -797,6 +796,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) kfree(full_alg_name); if (IS_ERR(crypt_stat->tfm)) { rc = PTR_ERR(crypt_stat->tfm); + crypt_stat->tfm = NULL; ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " "Error initializing cipher [%s]\n", crypt_stat->cipher); @@ -925,7 +925,9 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( struct ecryptfs_global_auth_tok *global_auth_tok; int rc = 0; + mutex_lock(&crypt_stat->keysig_list_mutex); mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(global_auth_tok, &mount_crypt_stat->global_auth_tok_list, mount_crypt_stat_list) { @@ -934,13 +936,13 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig); if (rc) { printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc); - mutex_unlock( - &mount_crypt_stat->global_auth_tok_list_mutex); goto out; } } - mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + out: + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + mutex_unlock(&crypt_stat->keysig_list_mutex); return rc; } @@ -1212,14 +1214,15 @@ int ecryptfs_read_and_validate_header_region(char *data, crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, ecryptfs_inode); - if (rc) { + if (rc < 0) { printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", __func__, rc); goto out; } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { rc = -EINVAL; - } + } else + rc = 0; out: return rc; } @@ -1314,10 +1317,11 @@ ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry, rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, 0, virt_len); - if (rc) + if (rc < 0) printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", __func__, - rc); + "information to lower file; rc = [%d]\n", __func__, rc); + else + rc = 0; return rc; } @@ -1597,7 +1601,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, ecryptfs_inode); - if (!rc) + if (rc >= 0) rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, ecryptfs_dentry, ECRYPTFS_VALIDATE_HEADER_SIZE); @@ -1702,7 +1706,7 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, } else { printk(KERN_ERR "%s: No support for requested filename " "encryption method in this release\n", __func__); - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out; } out: @@ -1763,7 +1767,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, if (IS_ERR(*key_tfm)) { rc = PTR_ERR(*key_tfm); printk(KERN_ERR "Unable to allocate crypto cipher with name " - "[%s]; rc = [%d]\n", cipher_name, rc); + "[%s]; rc = [%d]\n", full_alg_name, rc); goto out; } crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY); @@ -1776,7 +1780,8 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); if (rc) { printk(KERN_ERR "Error attempting to set key of size [%zd] for " - "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); + "cipher [%s]; rc = [%d]\n", *key_size, full_alg_name, + rc); rc = -EINVAL; goto out; } @@ -2166,7 +2171,7 @@ int ecryptfs_encrypt_and_encode_filename( (*encoded_name)[(*encoded_name_size)] = '\0'; (*encoded_name_size)++; } else { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; } if (rc) { printk(KERN_ERR "%s: Error attempting to encode " diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 2f0945d6329..056fed62d0d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -476,6 +476,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); struct dentry *lower_dir_dentry; + dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { @@ -489,6 +490,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) d_drop(dentry); out_unlock: unlock_dir(lower_dir_dentry); + dput(lower_dentry); return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 259525c9abb..a0a7847567e 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -416,7 +416,9 @@ ecryptfs_find_global_auth_tok_for_sig( &mount_crypt_stat->global_auth_tok_list, mount_crypt_stat_list) { if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { - (*global_auth_tok) = walker; + rc = key_validate(walker->global_auth_tok_key); + if (!rc) + (*global_auth_tok) = walker; goto out; } } @@ -612,7 +614,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, } /* TODO: Support other key modules than passphrase for * filename encryption */ - BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { + rc = -EOPNOTSUPP; + printk(KERN_INFO "%s: Filename encryption only supports " + "password tokens\n", __func__); + goto out_free_unlock; + } sg_init_one( &s->hash_sg, (u8 *)s->auth_tok->token.password.session_key_encryption_key, @@ -910,7 +917,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, } /* TODO: Support other key modules than passphrase for * filename encryption */ - BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { + rc = -EOPNOTSUPP; + printk(KERN_INFO "%s: Filename encryption only supports " + "password tokens\n", __func__); + goto out_free_unlock; + } rc = crypto_blkcipher_setkey( s->desc.tfm, s->auth_tok->token.password.session_key_encryption_key, @@ -1316,8 +1328,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, rc = -EINVAL; goto out_free; } - ecryptfs_cipher_code_to_string(crypt_stat->cipher, - (u16)data[(*packet_size)]); + rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, + (u16)data[(*packet_size)]); + if (rc) + goto out_free; /* A little extra work to differentiate among the AES key * sizes; see RFC2440 */ switch(data[(*packet_size)++]) { @@ -1328,7 +1342,9 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, crypt_stat->key_size = (*new_auth_tok)->session_key.encrypted_key_size; } - ecryptfs_init_crypt_ctx(crypt_stat); + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) + goto out_free; if (unlikely(data[(*packet_size)++] != 0x03)) { printk(KERN_WARNING "Only S2K ID 3 is currently supported\n"); rc = -ENOSYS; @@ -2366,21 +2382,18 @@ struct kmem_cache *ecryptfs_key_sig_cache; int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig) { struct ecryptfs_key_sig *new_key_sig; - int rc = 0; new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL); if (!new_key_sig) { - rc = -ENOMEM; printk(KERN_ERR "Error allocating from ecryptfs_key_sig_cache\n"); - goto out; + return -ENOMEM; } memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); - mutex_lock(&crypt_stat->keysig_list_mutex); + /* Caller must hold keysig_list_mutex */ list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); - mutex_unlock(&crypt_stat->keysig_list_mutex); -out: - return rc; + + return 0; } struct kmem_cache *ecryptfs_global_auth_tok_cache; diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index c6d7a4d748a..e14cf7e588d 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -136,6 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file, const struct cred *cred) { struct ecryptfs_open_req *req; + int flags = O_LARGEFILE; int rc = 0; /* Corresponding dput() and mntput() are done when the @@ -143,10 +144,14 @@ int ecryptfs_privileged_open(struct file **lower_file, * destroyed. */ dget(lower_dentry); mntget(lower_mnt); - (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDWR | O_LARGEFILE), cred); + flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; + (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; + if (flags & O_RDONLY) { + rc = PTR_ERR((*lower_file)); + goto out; + } req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); if (!req) { rc = -ENOMEM; @@ -180,21 +185,8 @@ int ecryptfs_privileged_open(struct file **lower_file, __func__); goto out_unlock; } - if (IS_ERR(*req->lower_file)) { + if (IS_ERR(*req->lower_file)) rc = PTR_ERR(*req->lower_file); - dget(lower_dentry); - mntget(lower_mnt); - (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDONLY | O_LARGEFILE), cred); - if (IS_ERR(*lower_file)) { - rc = PTR_ERR(*req->lower_file); - (*lower_file) = NULL; - printk(KERN_WARNING "%s: Error attempting privileged " - "open of lower file with either RW or RO " - "perms; rc = [%d]. Giving up.\n", - __func__, rc); - } - } out_unlock: mutex_unlock(&req->mux); out_free: diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f0aa9883c2..101fe4c7b1e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -129,11 +129,10 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); rc = ecryptfs_privileged_open(&inode_info->lower_file, lower_dentry, lower_mnt, cred); - if (rc || IS_ERR(inode_info->lower_file)) { + if (rc) { printk(KERN_ERR "Error opening lower persistent file " "for lower_dentry [0x%p] and lower_mnt [0x%p]; " "rc = [%d]\n", lower_dentry, lower_mnt, rc); - rc = PTR_ERR(inode_info->lower_file); inode_info->lower_file = NULL; } } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 05772aeaa8f..df4ce99d059 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -396,9 +396,11 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, sizeof(u64)); kfree(file_size_virt); - if (rc) + if (rc < 0) printk(KERN_ERR "%s: Error writing file size to header; " "rc = [%d]\n", __func__, rc); + else + rc = 0; out: return rc; } diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index a137c6ea2fe..0cc4fafd655 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -34,15 +34,14 @@ * * Write data to the lower file. * - * Returns zero on success; non-zero on error + * Returns bytes written on success; less than zero on error */ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size) { struct ecryptfs_inode_info *inode_info; - ssize_t octets_written; mm_segment_t fs_save; - int rc = 0; + ssize_t rc; inode_info = ecryptfs_inode_to_private(ecryptfs_inode); mutex_lock(&inode_info->lower_file_mutex); @@ -50,14 +49,9 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, inode_info->lower_file->f_pos = offset; fs_save = get_fs(); set_fs(get_ds()); - octets_written = vfs_write(inode_info->lower_file, data, size, - &inode_info->lower_file->f_pos); + rc = vfs_write(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); set_fs(fs_save); - if (octets_written < 0) { - printk(KERN_ERR "%s: octets_written = [%td]; " - "expected [%td]\n", __func__, octets_written, size); - rc = -EINVAL; - } mutex_unlock(&inode_info->lower_file_mutex); mark_inode_dirty_sync(ecryptfs_inode); return rc; @@ -91,6 +85,8 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, + offset_in_page); virt = kmap(page_for_lower); rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); + if (rc > 0) + rc = 0; kunmap(page_for_lower); return rc; } @@ -229,30 +225,24 @@ out: * Read @size bytes of data at byte offset @offset from the lower * inode into memory location @data. * - * Returns zero on success; non-zero on error + * Returns bytes read on success; 0 on EOF; less than zero on error */ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, struct inode *ecryptfs_inode) { struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_inode); - ssize_t octets_read; mm_segment_t fs_save; - int rc = 0; + ssize_t rc; mutex_lock(&inode_info->lower_file_mutex); BUG_ON(!inode_info->lower_file); inode_info->lower_file->f_pos = offset; fs_save = get_fs(); set_fs(get_ds()); - octets_read = vfs_read(inode_info->lower_file, data, size, - &inode_info->lower_file->f_pos); + rc = vfs_read(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); set_fs(fs_save); - if (octets_read < 0) { - printk(KERN_ERR "%s: octets_read = [%td]; " - "expected [%td]\n", __func__, octets_read, size); - rc = -EINVAL; - } mutex_unlock(&inode_info->lower_file_mutex); return rc; } @@ -284,6 +274,8 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page); virt = kmap(page_for_ecryptfs); rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode); + if (rc > 0) + rc = 0; kunmap(page_for_ecryptfs); flush_dcache_page(page_for_ecryptfs); return rc; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 12d649602d3..b15a43a80ab 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -77,7 +77,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); - mutex_lock(&inode_info->lower_file_mutex); if (inode_info->lower_file) { struct dentry *lower_dentry = inode_info->lower_file->f_dentry; @@ -89,7 +88,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) d_drop(lower_dentry); } } - mutex_unlock(&inode_info->lower_file_mutex); ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e1e5e19d21..9d5360c4c2a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -41,8 +41,9 @@ struct wb_writeback_args { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; - int for_kupdate; - int range_cyclic; + int for_kupdate:1; + int range_cyclic:1; + int for_background:1; }; /* @@ -249,14 +250,25 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * completion. Caller need not hold sb s_umount semaphore. * */ -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) +void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, + long nr_pages) { struct wb_writeback_args args = { + .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, }; + /* + * We treat @nr_pages=0 as the special case to do background writeback, + * ie. to sync pages until the background dirty threshold is reached. + */ + if (!nr_pages) { + args.nr_pages = LONG_MAX; + args.for_background = 1; + } + bdi_alloc_queue_work(bdi, &args); } @@ -310,7 +322,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) * For inodes being constantly redirtied, dirtied_when can get stuck. * It _appears_ to be in the future, but is actually in distant past. * This test is necessary to prevent such wrapped-around relative times - * from permanently stopping the whole pdflush writeback. + * from permanently stopping the whole bdi writeback. */ ret = ret && time_before_eq(inode->dirtied_when, jiffies); #endif @@ -324,13 +336,38 @@ static void move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, unsigned long *older_than_this) { + LIST_HEAD(tmp); + struct list_head *pos, *node; + struct super_block *sb = NULL; + struct inode *inode; + int do_sb_sort = 0; + while (!list_empty(delaying_queue)) { - struct inode *inode = list_entry(delaying_queue->prev, - struct inode, i_list); + inode = list_entry(delaying_queue->prev, struct inode, i_list); if (older_than_this && inode_dirtied_after(inode, *older_than_this)) break; - list_move(&inode->i_list, dispatch_queue); + if (sb && sb != inode->i_sb) + do_sb_sort = 1; + sb = inode->i_sb; + list_move(&inode->i_list, &tmp); + } + + /* just one sb in list, splice to dispatch_queue and we're done */ + if (!do_sb_sort) { + list_splice(&tmp, dispatch_queue); + return; + } + + /* Move inodes from one superblock together */ + while (!list_empty(&tmp)) { + inode = list_entry(tmp.prev, struct inode, i_list); + sb = inode->i_sb; + list_for_each_prev_safe(pos, node, &tmp) { + inode = list_entry(pos, struct inode, i_list); + if (inode->i_sb == sb) + list_move(&inode->i_list, dispatch_queue); + } } } @@ -439,8 +476,18 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & (I_FREEING | I_CLEAR))) { - if (!(inode->i_state & I_DIRTY) && - mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { + /* + * More pages get dirtied by a fast dirtier. + */ + goto select_queue; + } else if (inode->i_state & I_DIRTY) { + /* + * At least XFS will redirty the inode during the + * writeback (delalloc) and on io completion (isize). + */ + redirty_tail(inode); + } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. Redirty @@ -462,6 +509,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * soon as the queue becomes uncongested. */ inode->i_state |= I_DIRTY_PAGES; +select_queue: if (wbc->nr_to_write <= 0) { /* * slice used up: queue for next turn @@ -484,12 +532,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_DIRTY_PAGES; redirty_tail(inode); } - } else if (inode->i_state & I_DIRTY) { - /* - * Someone redirtied the inode while were writing back - * the pages. - */ - redirty_tail(inode); } else if (atomic_read(&inode->i_count)) { /* * The inode is clean, inuse @@ -506,6 +548,17 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) return ret; } +static void unpin_sb_for_writeback(struct super_block **psb) +{ + struct super_block *sb = *psb; + + if (sb) { + up_read(&sb->s_umount); + put_super(sb); + *psb = NULL; + } +} + /* * For WB_SYNC_NONE writeback, the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't @@ -515,11 +568,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * 1 if we failed. */ static int pin_sb_for_writeback(struct writeback_control *wbc, - struct inode *inode) + struct inode *inode, struct super_block **psb) { struct super_block *sb = inode->i_sb; /* + * If this sb is already pinned, nothing more to do. If not and + * *psb is non-NULL, unpin the old one first + */ + if (sb == *psb) + return 0; + else if (*psb) + unpin_sb_for_writeback(psb); + + /* * Caller must already hold the ref for this */ if (wbc->sync_mode == WB_SYNC_ALL) { @@ -532,7 +594,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, if (down_read_trylock(&sb->s_umount)) { if (sb->s_root) { spin_unlock(&sb_lock); - return 0; + goto pinned; } /* * umounted, drop rwsem again and fall through to failure @@ -543,24 +605,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc, sb->s_count--; spin_unlock(&sb_lock); return 1; -} - -static void unpin_sb_for_writeback(struct writeback_control *wbc, - struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - if (wbc->sync_mode == WB_SYNC_ALL) - return; - - up_read(&sb->s_umount); - put_super(sb); +pinned: + *psb = sb; + return 0; } static void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc) { - struct super_block *sb = wbc->sb; + struct super_block *sb = wbc->sb, *pin_sb = NULL; const int is_blkdev_sb = sb_is_blkdev_sb(sb); const unsigned long start = jiffies; /* livelock avoidance */ @@ -619,7 +672,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, if (inode_dirtied_after(inode, start)) break; - if (pin_sb_for_writeback(wbc, inode)) { + if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { requeue_io(inode); continue; } @@ -628,7 +681,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, __iget(inode); pages_skipped = wbc->pages_skipped; writeback_single_inode(inode, wbc); - unpin_sb_for_writeback(wbc, inode); if (wbc->pages_skipped != pages_skipped) { /* * writeback is not making progress due to locked @@ -648,6 +700,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, wbc->more_io = 1; } + unpin_sb_for_writeback(&pin_sb); + spin_unlock(&inode_lock); /* Leave any unwritten inodes on b_io */ } @@ -706,6 +760,7 @@ static long wb_writeback(struct bdi_writeback *wb, }; unsigned long oldest_jif; long wrote = 0; + struct inode *inode; if (wbc.for_kupdate) { wbc.older_than_this = &oldest_jif; @@ -719,20 +774,16 @@ static long wb_writeback(struct bdi_writeback *wb, for (;;) { /* - * Don't flush anything for non-integrity writeback where - * no nr_pages was given + * Stop writeback when nr_pages has been consumed */ - if (!args->for_kupdate && args->nr_pages <= 0 && - args->sync_mode == WB_SYNC_NONE) + if (args->nr_pages <= 0) break; /* - * If no specific pages were given and this is just a - * periodic background writeout and we are below the - * background dirty threshold, don't do anything + * For background writeout, stop when we are below the + * background dirty threshold */ - if (args->for_kupdate && args->nr_pages <= 0 && - !over_bground_thresh()) + if (args->for_background && !over_bground_thresh()) break; wbc.more_io = 0; @@ -744,13 +795,32 @@ static long wb_writeback(struct bdi_writeback *wb, wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* - * If we ran out of stuff to write, bail unless more_io got set + * If we consumed everything, see if we have more */ - if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { - if (wbc.more_io && !wbc.for_kupdate) - continue; + if (wbc.nr_to_write <= 0) + continue; + /* + * Didn't write everything and we don't have more IO, bail + */ + if (!wbc.more_io) break; + /* + * Did we write something? Try for more + */ + if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) + continue; + /* + * Nothing written. Wait for some inode to + * become available for writeback. Otherwise + * we'll just busyloop. + */ + spin_lock(&inode_lock); + if (!list_empty(&wb->b_more_io)) { + inode = list_entry(wb->b_more_io.prev, + struct inode, i_list); + inode_wait_for_writeback(inode); } + spin_unlock(&inode_lock); } return wrote; @@ -1060,9 +1130,6 @@ EXPORT_SYMBOL(__mark_inode_dirty); * If older_than_this is non-NULL, then only write out inodes which * had their first dirtying at a time earlier than *older_than_this. * - * If we're a pdlfush thread, then implement pdflush collision avoidance - * against the entire list. - * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -1141,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb) nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_writeback_all(sb, nr_to_write); + bdi_start_writeback(sb->s_bdi, sb, nr_to_write); } EXPORT_SYMBOL(writeback_inodes_sb); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 810770f9681..29786d3b932 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1711,6 +1711,8 @@ static int nfs_validate_mount_data(void *options, if (!(data->flags & NFS_MOUNT_TCP)) args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + else + args->nfs_server.protocol = XPRT_TRANSPORT_TCP; /* N.B. caller will free nfs_server.hostname in all cases */ args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); args->namlen = data->namlen; diff --git a/fs/proc/array.c b/fs/proc/array.c index 0c6bc602e6c..07f77a7945c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -322,6 +322,8 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +#ifdef CONFIG_MMU + struct stack_stats { struct vm_area_struct *vma; unsigned long startpage; @@ -402,6 +404,11 @@ static inline void task_show_stack_usage(struct seq_file *m, mmput(mm); } } +#else +static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) +{ +} +#endif /* CONFIG_MMU */ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) |