diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/fcall.c | 21 | ||||
-rw-r--r-- | fs/9p/mux.c | 222 | ||||
-rw-r--r-- | fs/9p/mux.h | 4 | ||||
-rw-r--r-- | fs/9p/vfs_file.c | 13 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 19 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 5 | ||||
-rw-r--r-- | fs/autofs4/root.c | 10 | ||||
-rw-r--r-- | fs/autofs4/waitq.c | 77 | ||||
-rw-r--r-- | fs/binfmt_flat.c | 30 | ||||
-rw-r--r-- | fs/bio.c | 3 | ||||
-rw-r--r-- | fs/compat.c | 177 | ||||
-rw-r--r-- | fs/configfs/dir.c | 137 | ||||
-rw-r--r-- | fs/exportfs/expfs.c | 2 | ||||
-rw-r--r-- | fs/inotify.c | 9 | ||||
-rw-r--r-- | fs/jffs2/nodelist.c | 6 | ||||
-rw-r--r-- | fs/jfs/jfs_metapage.c | 20 | ||||
-rw-r--r-- | fs/namespace.c | 7 | ||||
-rw-r--r-- | fs/nfsd/export.c | 4 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 46 | ||||
-rw-r--r-- | fs/ocfs2/aops.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/extent_map.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 86 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/uptodate.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/vote.c | 6 | ||||
-rw-r--r-- | fs/open.c | 1 | ||||
-rw-r--r-- | fs/partitions/check.c | 3 | ||||
-rw-r--r-- | fs/smbfs/dir.c | 5 | ||||
-rw-r--r-- | fs/smbfs/request.c | 4 |
31 files changed, 576 insertions, 372 deletions
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c index 71742ba150c..6f2617820a4 100644 --- a/fs/9p/fcall.c +++ b/fs/9p/fcall.c @@ -98,23 +98,20 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) { - int fid; + int fid, id; struct v9fs_session_info *v9ses; - if (err) - return; - + id = 0; fid = tc->params.tclunk.fid; - kfree(tc); - - if (!rc) - return; - - v9ses = a; - if (rc->id == RCLUNK) - v9fs_put_idpool(fid, &v9ses->fidpool); + if (rc) + id = rc->id; + kfree(tc); kfree(rc); + if (id == RCLUNK) { + v9ses = a; + v9fs_put_idpool(fid, &v9ses->fidpool); + } } /** diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 3e5b124a721..f4407eb276c 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -50,15 +50,23 @@ enum { Wpending = 8, /* can write */ }; +enum { + None, + Flushing, + Flushed, +}; + struct v9fs_mux_poll_task; struct v9fs_req { + spinlock_t lock; int tag; struct v9fs_fcall *tcall; struct v9fs_fcall *rcall; int err; v9fs_mux_req_callback cb; void *cba; + int flush; struct list_head req_list; }; @@ -96,8 +104,8 @@ struct v9fs_mux_poll_task { struct v9fs_mux_rpc { struct v9fs_mux_data *m; - struct v9fs_req *req; int err; + struct v9fs_fcall *tcall; struct v9fs_fcall *rcall; wait_queue_head_t wqueue; }; @@ -524,10 +532,9 @@ again: static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) { - int ecode, tag; + int ecode; struct v9fs_str *ename; - tag = req->tag; if (!req->err && req->rcall->id == RERROR) { ecode = req->rcall->params.rerror.errno; ename = &req->rcall->params.rerror.error; @@ -553,23 +560,6 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) if (!req->err) req->err = -EIO; } - - if (req->err == ERREQFLUSH) - return; - - if (req->cb) { - dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n", - req->tcall, req->rcall); - - (*req->cb) (req->cba, req->tcall, req->rcall, req->err); - req->cb = NULL; - } else - kfree(req->rcall); - - v9fs_mux_put_tag(m, tag); - - wake_up(&m->equeue); - kfree(req); } /** @@ -669,17 +659,26 @@ static void v9fs_read_work(void *a) list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { if (rreq->tag == rcall->tag) { req = rreq; - req->rcall = rcall; - list_del(&req->req_list); - spin_unlock(&m->lock); - process_request(m, req); + if (req->flush != Flushing) + list_del(&req->req_list); break; } - } + spin_unlock(&m->lock); - if (!req) { - spin_unlock(&m->lock); + if (req) { + req->rcall = rcall; + process_request(m, req); + + if (req->flush != Flushing) { + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } + } else { if (err >= 0 && rcall->id != RFLUSH) dprintk(DEBUG_ERROR, "unexpected response mux %p id %d tag %d\n", @@ -746,7 +745,6 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, return ERR_PTR(-ENOMEM); v9fs_set_tag(tc, n); - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { char buf[150]; @@ -754,12 +752,14 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, printk(KERN_NOTICE "<<< %p %s\n", m, buf); } + spin_lock_init(&req->lock); req->tag = n; req->tcall = tc; req->rcall = NULL; req->err = 0; req->cb = cb; req->cba = cba; + req->flush = None; spin_lock(&m->lock); list_add_tail(&req->req_list, &m->unsent_req_list); @@ -776,72 +776,108 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, return req; } -static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err) +static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req) +{ + v9fs_mux_put_tag(m, req->tag); + kfree(req); +} + +static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a) { v9fs_mux_req_callback cb; int tag; struct v9fs_mux_data *m; - struct v9fs_req *req, *rptr; + struct v9fs_req *req, *rreq, *rptr; m = a; - dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc, - rc, err, tc->params.tflush.oldtag); + dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, + freq->tcall, freq->rcall, freq->err, + freq->tcall->params.tflush.oldtag); spin_lock(&m->lock); cb = NULL; - tag = tc->params.tflush.oldtag; - list_for_each_entry_safe(req, rptr, &m->req_list, req_list) { - if (req->tag == tag) { + tag = freq->tcall->params.tflush.oldtag; + req = NULL; + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == tag) { + req = rreq; list_del(&req->req_list); - if (req->cb) { - cb = req->cb; - req->cb = NULL; - spin_unlock(&m->lock); - (*cb) (req->cba, req->tcall, req->rcall, - req->err); - } - kfree(req); - wake_up(&m->equeue); break; } } + spin_unlock(&m->lock); - if (!cb) - spin_unlock(&m->lock); + if (req) { + spin_lock(&req->lock); + req->flush = Flushed; + spin_unlock(&req->lock); + + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } - v9fs_mux_put_tag(m, tag); - kfree(tc); - kfree(rc); + kfree(freq->tcall); + kfree(freq->rcall); + v9fs_mux_free_request(m, freq); } -static void +static int v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) { struct v9fs_fcall *fc; + struct v9fs_req *rreq, *rptr; dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); + /* if a response was received for a request, do nothing */ + spin_lock(&req->lock); + if (req->rcall || req->err) { + spin_unlock(&req->lock); + dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req); + return 0; + } + + req->flush = Flushing; + spin_unlock(&req->lock); + + spin_lock(&m->lock); + /* if the request is not sent yet, just remove it from the list */ + list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { + if (rreq->tag == req->tag) { + dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req); + list_del(&rreq->req_list); + req->flush = Flushed; + spin_unlock(&m->lock); + if (req->cb) + (*req->cb) (req, req->cba); + return 0; + } + } + spin_unlock(&m->lock); + + clear_thread_flag(TIF_SIGPENDING); fc = v9fs_create_tflush(req->tag); v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); + return 1; } static void -v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) +v9fs_mux_rpc_cb(struct v9fs_req *req, void *a) { struct v9fs_mux_rpc *r; - if (err == ERREQFLUSH) { - kfree(rc); - dprintk(DEBUG_MUX, "err req flush\n"); - return; - } - + dprintk(DEBUG_MUX, "req %p r %p\n", req, a); r = a; - dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req, - tc, rc, err); - r->rcall = rc; - r->err = err; + r->rcall = req->rcall; + r->err = req->err; + + if (req->flush!=None && !req->err) + r->err = -ERESTARTSYS; + wake_up(&r->wqueue); } @@ -856,12 +892,13 @@ int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc) { - int err; + int err, sigpending; unsigned long flags; struct v9fs_req *req; struct v9fs_mux_rpc r; r.err = 0; + r.tcall = tc; r.rcall = NULL; r.m = m; init_waitqueue_head(&r.wqueue); @@ -869,48 +906,50 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, if (rc) *rc = NULL; + sigpending = 0; + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } + req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); if (IS_ERR(req)) { err = PTR_ERR(req); dprintk(DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); + return err; } - r.req = req; - dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc, - req->tag, &r, req); err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); if (r.err < 0) err = r.err; if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { - spin_lock(&m->lock); - req->tcall = NULL; - req->err = ERREQFLUSH; - spin_unlock(&m->lock); + if (v9fs_mux_flush_request(m, req)) { + /* wait until we get response of the flush message */ + do { + clear_thread_flag(TIF_SIGPENDING); + err = wait_event_interruptible(r.wqueue, + r.rcall || r.err); + } while (!r.rcall && !r.err && err==-ERESTARTSYS && + m->trans->status==Connected && !m->err); + } + sigpending = 1; + } - clear_thread_flag(TIF_SIGPENDING); - v9fs_mux_flush_request(m, req); + if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (!err) { - if (r.rcall) - dprintk(DEBUG_MUX, "got response id %d tag %d\n", - r.rcall->id, r.rcall->tag); - - if (rc) - *rc = r.rcall; - else - kfree(r.rcall); - } else { + if (rc) + *rc = r.rcall; + else kfree(r.rcall); - dprintk(DEBUG_MUX, "got error %d\n", err); - if (err > 0) - err = -EIO; - } + + v9fs_mux_free_request(m, req); + if (err > 0) + err = -EIO; return err; } @@ -951,12 +990,15 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) struct v9fs_req *req, *rtmp; LIST_HEAD(cancel_list); - dprintk(DEBUG_MUX, "mux %p err %d\n", m, err); + dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err); m->err = err; spin_lock(&m->lock); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); + } spin_unlock(&m->lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { @@ -965,11 +1007,9 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) req->err = err; if (req->cb) - (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + (*req->cb) (req, req->cba); else kfree(req->rcall); - - kfree(req); } wake_up(&m->equeue); diff --git a/fs/9p/mux.h b/fs/9p/mux.h index e90bfd32ea4..fb10c50186a 100644 --- a/fs/9p/mux.h +++ b/fs/9p/mux.h @@ -24,6 +24,7 @@ */ struct v9fs_mux_data; +struct v9fs_req; /** * v9fs_mux_req_callback - callback function that is called when the @@ -36,8 +37,7 @@ struct v9fs_mux_data; * @rc - response call * @err - error code (non-zero if error occured) */ -typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err); +typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a); int v9fs_mux_global_init(void); void v9fs_mux_global_exit(void); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 083dcfcd158..1a8e46084f0 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -72,11 +72,17 @@ int v9fs_file_open(struct inode *inode, struct file *file) return -ENOSPC; } - err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL); + err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall); if (err < 0) { dprintk(DEBUG_ERROR, "rewalk didn't work\n"); - goto put_fid; + if (fcall && fcall->id == RWALK) + goto clunk_fid; + else { + v9fs_put_idpool(fid, &v9ses->fidpool); + goto free_fcall; + } } + kfree(fcall); /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ /* translate open mode appropriately */ @@ -109,8 +115,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) clunk_fid: v9fs_t_clunk(v9ses, fid); -put_fid: - v9fs_put_idpool(fid, &v9ses->fidpool); +free_fcall: kfree(fcall); return err; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 133db366d30..2cb87ba4b1c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -270,7 +270,10 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm, err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall); if (err < 0) { PRINT_FCALL_ERROR("clone error", fcall); - goto put_fid; + if (fcall && fcall->id == RWALK) + goto clunk_fid; + else + goto put_fid; } kfree(fcall); @@ -322,6 +325,9 @@ v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) &fcall); if (err < 0) { + if (fcall && fcall->id == RWALK) + goto clunk_fid; + PRINT_FCALL_ERROR("walk error", fcall); v9fs_put_idpool(nfid, &v9ses->fidpool); goto error; @@ -640,19 +646,26 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, } result = v9fs_t_walk(v9ses, dirfidnum, newfid, - (char *)dentry->d_name.name, NULL); + (char *)dentry->d_name.name, &fcall); + if (result < 0) { - v9fs_put_idpool(newfid, &v9ses->fidpool); + if (fcall && fcall->id == RWALK) + v9fs_t_clunk(v9ses, newfid); + else + v9fs_put_idpool(newfid, &v9ses->fidpool); + if (result == -ENOENT) { d_add(dentry, NULL); dprintk(DEBUG_VFS, "Return negative dentry %p count %d\n", dentry, atomic_read(&dentry->d_count)); + kfree(fcall); return NULL; } dprintk(DEBUG_ERROR, "walk error:%d\n", result); goto FreeFcall; } + kfree(fcall); result = v9fs_t_stat(v9ses, newfid, &fcall); if (result < 0) { diff --git a/fs/Makefile b/fs/Makefile index 2c22e282c77..c731d2c0f40 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_DNOTIFY) += dnotify.o obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ obj-$(CONFIG_SYSFS) += sysfs/ +obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o @@ -101,6 +102,5 @@ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_DEBUG_FS) += debugfs/ -obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_GFS2_FS) += gfs2/ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 57c4903614e..d6603d02304 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -74,8 +74,8 @@ struct autofs_wait_queue { struct autofs_wait_queue *next; autofs_wqt_t wait_queue_token; /* We use the following to see what we are waiting for */ - int hash; - int len; + unsigned int hash; + unsigned int len; char *name; u32 dev; u64 ino; @@ -85,7 +85,6 @@ struct autofs_wait_queue { pid_t tgid; /* This is for status reporting upon return */ int status; - atomic_t notify; atomic_t wait_ctr; }; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 84e030c8ddd..5100f984783 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -327,6 +327,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); int oz_mode = autofs4_oz_mode(sbi); unsigned int lookup_type; int status; @@ -340,13 +341,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (oz_mode || !lookup_type) goto done; - /* - * If a request is pending wait for it. - * If it's a mount then it won't be expired till at least - * a liitle later and if it's an expire then we might need - * to mount it again. - */ - if (autofs4_ispending(dentry)) { + /* If an expire request is pending wait for it. */ + if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { DPRINTK("waiting for active request %p name=%.*s", dentry, dentry->d_name.len, dentry->d_name.name); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 142ab6aa2aa..ce103e7b0bc 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -189,14 +189,30 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, return len; } +static struct autofs_wait_queue * +autofs4_find_wait(struct autofs_sb_info *sbi, + char *name, unsigned int hash, unsigned int len) +{ + struct autofs_wait_queue *wq; + + for (wq = sbi->queues; wq; wq = wq->next) { + if (wq->hash == hash && + wq->len == len && + wq->name && !memcmp(wq->name, name, len)) + break; + } + return wq; +} + int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, enum autofs_notify notify) { + struct autofs_info *ino; struct autofs_wait_queue *wq; char *name; unsigned int len = 0; unsigned int hash = 0; - int status; + int status, type; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) @@ -223,21 +239,41 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, return -EINTR; } - for (wq = sbi->queues ; wq ; wq = wq->next) { - if (wq->hash == dentry->d_name.hash && - wq->len == len && - wq->name && !memcmp(wq->name, name, len)) - break; - } + wq = autofs4_find_wait(sbi, name, hash, len); + ino = autofs4_dentry_ino(dentry); + if (!wq && ino && notify == NFY_NONE) { + /* + * Either we've betean the pending expire to post it's + * wait or it finished while we waited on the mutex. + * So we need to wait till either, the wait appears + * or the expire finishes. + */ + + while (ino->flags & AUTOFS_INF_EXPIRING) { + mutex_unlock(&sbi->wq_mutex); + schedule_timeout_interruptible(HZ/10); + if (mutex_lock_interruptible(&sbi->wq_mutex)) { + kfree(name); + return -EINTR; + } + wq = autofs4_find_wait(sbi, name, hash, len); + if (wq) + break; + } - if (!wq) { - /* Can't wait for an expire if there's no mount */ - if (notify == NFY_NONE && !d_mountpoint(dentry)) { + /* + * Not ideal but the status has already gone. Of the two + * cases where we wait on NFY_NONE neither depend on the + * return status of the wait. + */ + if (!wq) { kfree(name); mutex_unlock(&sbi->wq_mutex); - return -ENOENT; + return 0; } + } + if (!wq) { /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); if (!wq) { @@ -263,20 +299,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); - atomic_set(&wq->notify, 1); - mutex_unlock(&sbi->wq_mutex); - } else { - atomic_inc(&wq->wait_ctr); mutex_unlock(&sbi->wq_mutex); - kfree(name); - DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); - } - - if (notify != NFY_NONE && atomic_read(&wq->notify)) { - int type; - - atomic_dec(&wq->notify); if (sbi->version < 5) { if (notify == NFY_MOUNT) @@ -299,6 +322,12 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, /* autofs4_notify_daemon() may block */ autofs4_notify_daemon(sbi, wq, type); + } else { + atomic_inc(&wq->wait_ctr); + mutex_unlock(&sbi->wq_mutex); + kfree(name); + DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); } /* wq->name is NULL if and only if the lock is already released */ diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69f44dcdb0b..b1c902e319c 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -428,7 +428,6 @@ static int load_flat_file(struct linux_binprm * bprm, loff_t fpos; unsigned long start_code, end_code; int ret; - int exec_fileno; hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ inode = bprm->file->f_dentry->d_inode; @@ -502,21 +501,12 @@ static int load_flat_file(struct linux_binprm * bprm, goto err; } - /* check file descriptor */ - exec_fileno = get_unused_fd(); - if (exec_fileno < 0) { - ret = -EMFILE; - goto err; - } - get_file(bprm->file); - fd_install(exec_fileno, bprm->file); - /* Flush all traces of the currently running executable */ if (id == 0) { result = flush_old_exec(bprm); if (result) { ret = result; - goto err_close; + goto err; } /* OK, This is the point of no return */ @@ -548,7 +538,7 @@ static int load_flat_file(struct linux_binprm * bprm, textpos = (unsigned long) -ENOMEM; printk("Unable to mmap process text, errno %d\n", (int)-textpos); ret = textpos; - goto err_close; + goto err; } down_write(¤t->mm->mmap_sem); @@ -564,7 +554,7 @@ static int load_flat_file(struct linux_binprm * bprm, (int)-datapos); do_munmap(current->mm, textpos, text_len); ret = realdatastart; - goto err_close; + goto err; } datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); @@ -587,7 +577,7 @@ static int load_flat_file(struct linux_binprm * bprm, do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); ret = result; - goto err_close; + goto err; } reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); @@ -606,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm, printk("Unable to allocate RAM for process text/data, errno %d\n", (int)-textpos); ret = textpos; - goto err_close; + goto err; } realdatastart = textpos + ntohl(hdr->data_start); @@ -652,7 +642,7 @@ static int load_flat_file(struct linux_binprm * bprm, do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); ret = result; - goto err_close; + goto err; } } @@ -717,7 +707,7 @@ static int load_flat_file(struct linux_binprm * bprm, addr = calc_reloc(*rp, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } *rp = addr; } @@ -747,7 +737,7 @@ static int load_flat_file(struct linux_binprm * bprm, rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1); if (rp == (unsigned long *)RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } /* Get the pointer's value. */ @@ -762,7 +752,7 @@ static int load_flat_file(struct linux_binprm * bprm, addr = calc_reloc(addr, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; - goto err_close; + goto err; } /* Write back the relocated pointer. */ @@ -783,8 +773,6 @@ static int load_flat_file(struct linux_binprm * bprm, stack_len); return 0; -err_close: - sys_close(exec_fileno); err: return ret; } @@ -1116,6 +1116,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio1.bi_io_vec = &bp->bv1; bp->bio2.bi_io_vec = &bp->bv2; + bp->bio1.bi_max_vecs = 1; + bp->bio2.bi_max_vecs = 1; + bp->bio1.bi_end_io = bio_pair_end_1; bp->bio2.bi_end_io = bio_pair_end_2; diff --git a/fs/compat.c b/fs/compat.c index 970888aad84..b1f64786a61 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1913,7 +1913,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } if (sigmask) { - if (sigsetsize |= sizeof(compat_sigset_t)) + if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (copy_from_user(&ss32, sigmask, sizeof(ss32))) return -EFAULT; @@ -2030,109 +2030,115 @@ union compat_nfsctl_res { struct knfsd_fh cr32_getfs; }; -static int compat_nfs_svc_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_svc_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg->ca32_svc.svc32_nthreads); - return (err) ? -EFAULT : 0; + if (!access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)) || + get_user(karg->ca_version, &arg->ca32_version) || + __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port) || + __get_user(karg->ca_svc.svc_nthreads, + &arg->ca32_svc.svc32_nthreads)) + return -EFAULT; + return 0; } -static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) -{ - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_client, sizeof(arg->ca32_client)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_client.cl_ident[0], - &arg->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg->ca32_client.cl32_naddr); - err |= __copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg->ca32_client.cl32_fhkeylen); - err |= __copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); +static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) +{ + if (!access_ok(VERIFY_READ, &arg->ca32_client, + sizeof(arg->ca32_client)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_client.cl_ident[0], + &arg->ca32_client.cl32_ident[0], + NFSCLNT_IDMAX) || + __get_user(karg->ca_client.cl_naddr, + &arg->ca32_client.cl32_naddr) || + __copy_from_user(&karg->ca_client.cl_addrlist[0], + &arg->ca32_client.cl32_addrlist[0], + (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)) || + __get_user(karg->ca_client.cl_fhkeytype, + &arg->ca32_client.cl32_fhkeytype) || + __get_user(karg->ca_client.cl_fhkeylen, + &arg->ca32_client.cl32_fhkeylen) || + __copy_from_user(&karg->ca_client.cl_fhkey[0], + &arg->ca32_client.cl32_fhkey[0], + NFSCLNT_KEYMAX)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_exp_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) -{ - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_export, sizeof(arg->ca32_export)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_export.ex_client[0], - &arg->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= __copy_from_user(&karg->ca_export.ex_path[0], - &arg->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg->ca32_export.ex32_anon_gid); +static int compat_nfs_exp_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) +{ + if (!access_ok(VERIFY_READ, &arg->ca32_export, + sizeof(arg->ca32_export)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_export.ex_client[0], + &arg->ca32_export.ex32_client[0], + NFSCLNT_IDMAX) || + __copy_from_user(&karg->ca_export.ex_path[0], + &arg->ca32_export.ex32_path[0], + NFS_MAXPATHLEN) || + __get_user(karg->ca_export.ex_dev, + &arg->ca32_export.ex32_dev) || + __get_user(karg->ca_export.ex_ino, + &arg->ca32_export.ex32_ino) || + __get_user(karg->ca_export.ex_flags, + &arg->ca32_export.ex32_flags) || + __get_user(karg->ca_export.ex_anon_uid, + &arg->ca32_export.ex32_anon_uid) || + __get_user(karg->ca_export.ex_anon_gid, + &arg->ca32_export.ex32_anon_gid)) + return -EFAULT; SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) -{ - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_getfd, sizeof(arg->ca32_getfd)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_getfd.gd_addr, - &arg->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= __copy_from_user(&karg->ca_getfd.gd_path, - &arg->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfd.gd_version, - &arg->ca32_getfd.gd32_version); +static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) +{ + if (!access_ok(VERIFY_READ, &arg->ca32_getfd, + sizeof(arg->ca32_getfd)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_getfd.gd_addr, + &arg->ca32_getfd.gd32_addr, + (sizeof(struct sockaddr))) || + __copy_from_user(&karg->ca_getfd.gd_path, + &arg->ca32_getfd.gd32_path, + (NFS_MAXPATHLEN+1)) || + __get_user(karg->ca_getfd.gd_version, + &arg->ca32_getfd.gd32_version)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } -static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) +static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, + struct compat_nfsctl_arg __user *arg) { - int err; - - err = access_ok(VERIFY_READ, &arg->ca32_getfs, sizeof(arg->ca32_getfs)); - err |= get_user(karg->ca_version, &arg->ca32_version); - err |= __copy_from_user(&karg->ca_getfs.gd_addr, - &arg->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= __copy_from_user(&karg->ca_getfs.gd_path, - &arg->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfs.gd_maxlen, - &arg->ca32_getfs.gd32_maxlen); + if (!access_ok(VERIFY_READ,&arg->ca32_getfs,sizeof(arg->ca32_getfs)) || + get_user(karg->ca_version, &arg->ca32_version) || + __copy_from_user(&karg->ca_getfs.gd_addr, + &arg->ca32_getfs.gd32_addr, + (sizeof(struct sockaddr))) || + __copy_from_user(&karg->ca_getfs.gd_path, + &arg->ca32_getfs.gd32_path, + (NFS_MAXPATHLEN+1)) || + __get_user(karg->ca_getfs.gd_maxlen, + &arg->ca32_getfs.gd32_maxlen)) + return -EFAULT; - return (err) ? -EFAULT : 0; + return 0; } /* This really doesn't need translations, we are only passing * back a union which contains opaque nfs file handle data. */ -static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsctl_res __user *res) +static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, + union compat_nfsctl_res __user *res) { int err; @@ -2141,8 +2147,9 @@ static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsct return (err) ? -EFAULT : 0; } -asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *arg, - union compat_nfsctl_res __user *res) +asmlinkage long compat_sys_nfsservctl(int cmd, + struct compat_nfsctl_arg __user *arg, + union compat_nfsctl_res __user *res) { struct nfsctl_arg *karg; union nfsctl_res *kres; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5638c8f9362..5f952187fc5 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -505,13 +505,15 @@ static int populate_groups(struct config_group *group) int i; if (group->default_groups) { - /* FYI, we're faking mkdir here + /* + * FYI, we're faking mkdir here * I'm not sure we need this semaphore, as we're called * from our parent's mkdir. That holds our parent's * i_mutex, so afaik lookup cannot continue through our * parent to find us, let alone mess with our tree. * That said, taking our i_mutex is closer to mkdir - * emulation, and shouldn't hurt. */ + * emulation, and shouldn't hurt. + */ mutex_lock(&dentry->d_inode->i_mutex); for (i = 0; group->default_groups[i]; i++) { @@ -546,20 +548,34 @@ static void unlink_obj(struct config_item *item) item->ci_group = NULL; item->ci_parent = NULL; + + /* Drop the reference for ci_entry */ config_item_put(item); + /* Drop the reference for ci_parent */ config_group_put(group); } } static void link_obj(struct config_item *parent_item, struct config_item *item) { - /* Parent seems redundant with group, but it makes certain - * traversals much nicer. */ + /* + * Parent seems redundant with group, but it makes certain + * traversals much nicer. + */ item->ci_parent = parent_item; + + /* + * We hold a reference on the parent for the child's ci_parent + * link. + */ item->ci_group = config_group_get(to_config_group(parent_item)); list_add_tail(&item->ci_entry, &item->ci_group->cg_children); + /* + * We hold a reference on the child for ci_entry on the parent's + * cg_children + */ config_item_get(item); } @@ -684,6 +700,10 @@ static void client_drop_item(struct config_item *parent_item, type = parent_item->ci_type; BUG_ON(!type); + /* + * If ->drop_item() exists, it is responsible for the + * config_item_put(). + */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), item); @@ -694,23 +714,28 @@ static void client_drop_item(struct config_item *parent_item, static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int ret; + int ret, module_got = 0; struct config_group *group; struct config_item *item; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct config_item_type *type; - struct module *owner; + struct module *owner = NULL; char *name; - if (dentry->d_parent == configfs_sb->s_root) - return -EPERM; + if (dentry->d_parent == configfs_sb->s_root) { + ret = -EPERM; + goto out; + } sd = dentry->d_parent->d_fsdata; - if (!(sd->s_type & CONFIGFS_USET_DIR)) - return -EPERM; + if (!(sd->s_type & CONFIGFS_USET_DIR)) { + ret = -EPERM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; subsys = to_config_group(parent_item)->cg_subsys; @@ -719,15 +744,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!type || !type->ct_group_ops || (!type->ct_group_ops->make_group && !type->ct_group_ops->make_item)) { - config_item_put(parent_item); - return -EPERM; /* What lack-of-mkdir returns */ + ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ + goto out_put; } name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { - config_item_put(parent_item); - return -ENOMEM; + ret = -ENOMEM; + goto out_put; } + snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); down(&subsys->su_sem); @@ -748,40 +774,67 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(name); if (!item) { - config_item_put(parent_item); - return -ENOMEM; + /* + * If item == NULL, then link_obj() was never called. + * There are no extra references to clean up. + */ + ret = -ENOMEM; + goto out_put; } - ret = -EINVAL; + /* + * link_obj() has been called (via link_group() for groups). + * From here on out, errors must clean that up. + */ + type = item->ci_type; - if (type) { - owner = type->ct_owner; - if (try_module_get(owner)) { - if (group) { - ret = configfs_attach_group(parent_item, - item, - dentry); - } else { - ret = configfs_attach_item(parent_item, - item, - dentry); - } + if (!type) { + ret = -EINVAL; + goto out_unlink; + } - if (ret) { - down(&subsys->su_sem); - if (group) - unlink_group(group); - else - unlink_obj(item); - client_drop_item(parent_item, item); - up(&subsys->su_sem); + owner = type->ct_owner; + if (!try_module_get(owner)) { + ret = -EINVAL; + goto out_unlink; + } - config_item_put(parent_item); - module_put(owner); - } - } + /* + * I hate doing it this way, but if there is + * an error, module_put() probably should + * happen after any cleanup. + */ + module_got = 1; + + if (group) + ret = configfs_attach_group(parent_item, item, dentry); + else + ret = configfs_attach_item(parent_item, item, dentry); + +out_unlink: + if (ret) { + /* Tear down everything we built up */ + down(&subsys->su_sem); + if (group) + unlink_group(group); + else + unlink_obj(item); + client_drop_item(parent_item, item); + up(&subsys->su_sem); + + if (module_got) + module_put(owner); } +out_put: + /* + * link_obj()/link_group() took a reference from child->parent, + * so the parent is safely pinned. We can drop our working + * reference. + */ + config_item_put(parent_item); + +out: return ret; } @@ -801,6 +854,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; + /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); @@ -817,6 +871,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return ret; } + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); /* Drop reference from above, item already holds one. */ diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b06b54f1bbb..4c39009350f 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -102,7 +102,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, if (acceptable(context, result)) return result; if (S_ISDIR(result->d_inode->i_mode)) { - /* there is no other dentry, so fail */ + err = -EACCES; goto err_result; } diff --git a/fs/inotify.c b/fs/inotify.c index 1f50302849c..732ec4bd577 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -848,7 +848,11 @@ static int inotify_release(struct inode *ignored, struct file *file) inode = watch->inode; mutex_lock(&inode->inotify_mutex); mutex_lock(&dev->mutex); - remove_watch_no_event(watch, dev); + + /* make sure we didn't race with another list removal */ + if (likely(idr_find(&dev->idr, watch->wd))) + remove_watch_no_event(watch, dev); + mutex_unlock(&dev->mutex); mutex_unlock(&inode->inotify_mutex); put_inotify_watch(watch); @@ -890,8 +894,7 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd) mutex_lock(&dev->mutex); /* make sure that we did not race */ - watch = idr_find(&dev->idr, wd); - if (likely(watch)) + if (likely(idr_find(&dev->idr, wd) == watch)) remove_watch(watch, dev); mutex_unlock(&dev->mutex); diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index d4d0c41490c..1d46677afd1 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -438,7 +438,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info if (c->mtd->point) { err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); if (!err && retlen < tn->csize) { - JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize); + JFFS2_WARNING("MTD point returned len too short: %zu " + "instead of %u.\n", retlen, tn->csize); c->mtd->unpoint(c->mtd, buffer, ofs, len); } else if (err) JFFS2_WARNING("MTD point failed: error code %d.\n", err); @@ -461,7 +462,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info } if (retlen != len) { - JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len); + JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", + ofs, retlen, len); err = -EIO; goto free_out; } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index f28696f235c..2b220dd6b4e 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -542,7 +542,7 @@ add_failed: static int metapage_releasepage(struct page *page, gfp_t gfp_mask) { struct metapage *mp; - int busy = 0; + int ret = 1; unsigned int offset; for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { @@ -552,30 +552,20 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) continue; jfs_info("metapage_releasepage: mp = 0x%p", mp); - if (mp->count || mp->nohomeok) { + if (mp->count || mp->nohomeok || + test_bit(META_dirty, &mp->flag)) { jfs_info("count = %ld, nohomeok = %d", mp->count, mp->nohomeok); - busy = 1; + ret = 0; continue; } - wait_on_page_writeback(page); - //WARN_ON(test_bit(META_dirty, &mp->flag)); - if (test_bit(META_dirty, &mp->flag)) { - dump_mem("dirty mp in metapage_releasepage", mp, - sizeof(struct metapage)); - dump_mem("page", page, sizeof(struct page)); - dump_stack(); - } if (mp->lsn) remove_from_logsync(mp); remove_metapage(page, mp); INCREMENT(mpStat.pagefree); free_metapage(mp); } - if (busy) - return -1; - - return 0; + return ret; } static void metapage_invalidatepage(struct page *page, unsigned long offset) diff --git a/fs/namespace.c b/fs/namespace.c index 2c5f1f80bdc..bf478addb85 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -899,13 +899,11 @@ static int do_change_type(struct nameidata *nd, int flag) /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags) +static int do_loopback(struct nameidata *nd, char *old_name, int recurse) { struct nameidata old_nd; struct vfsmount *mnt = NULL; - int recurse = flags & MS_REC; int err = mount_is_safe(nd); - if (err) return err; if (!old_name || !*old_name) @@ -939,7 +937,6 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } - mnt->mnt_flags = mnt_flags; out: up_write(&namespace_sem); @@ -1353,7 +1350,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags, mnt_flags); + retval = do_loopback(&nd, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 4e0578121d9..3eec30000f3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1066,9 +1066,11 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, rv = nfserr_perm; else if (IS_ERR(exp)) rv = nfserrno(PTR_ERR(exp)); - else + else { rv = fh_compose(fhp, exp, fsid_key->ek_dentry, NULL); + exp_put(exp); + } cache_put(&fsid_key->h, &svc_expkey_cache); return rv; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6aa92d0e687..1d65f13f458 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1922,11 +1922,10 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; - size = posix_acl_to_xattr(acl, value, size); - if (size < 0) { - error = size; + error = posix_acl_to_xattr(acl, value, size); + if (error < 0) goto getout; - } + size = error; } else size = 0; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0d858d0b25b..47152bf9a7f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) return ret; } +/* This can also be called from ocfs2_write_zero_page() which has done + * it's own cluster locking. */ +int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + int ret; + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + ret = block_prepare_write(page, from, to, ocfs2_get_block); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + return ret; +} + /* * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called * from loopback. It must be able to perform its own locking around * ocfs2_get_block(). */ -int ocfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ocfs2_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) { struct inode *inode = page->mapping->host; int ret; @@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page, goto out; } - down_read(&OCFS2_I(inode)->ip_alloc_sem); - - ret = block_prepare_write(page, from, to, ocfs2_get_block); - - up_read(&OCFS2_I(inode)->ip_alloc_sem); + ret = ocfs2_prepare_write_nolock(inode, page, from, to); ocfs2_meta_unlock(inode, 0); out: @@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw, int ret; mlog_entry_void(); + + /* + * We get PR data locks even for O_DIRECT. This allows + * concurrent O_DIRECT I/O but doesn't let O_DIRECT with + * extending and buffered zeroing writes race. If they did + * race then the buffered zeroing could be written back after + * the O_DIRECT I/O. It's one thing to tell people not to mix + * buffered and O_DIRECT writes, but expecting them to + * understand that file extension is also an implicit buffered + * write is too much. By getting the PR we force writeback of + * the buffered zeroing before proceeding. + */ + ret = ocfs2_data_lock(inode, 0); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + ocfs2_data_unlock(inode, 0); + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io); +out: mlog_exit(ret); return ret; } diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index d40456d509a..e88c3f0b8fa 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,8 +22,8 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H -int ocfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to); +int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, + unsigned from, unsigned to); struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 4601fc256f1..1a5c69071df 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struct inode *inode, ret = -ENOMEM; ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.new_ent) { mlog_errno(ret); return ret; @@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struct inode *inode, if (ctxt.need_left && !ctxt.left_ent) { ctxt.left_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.left_ent) break; } if (ctxt.need_right && !ctxt.right_ent) { ctxt.right_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.right_ent) break; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 581eb451a41..a9559c87453 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -613,7 +613,8 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->commit_write(). */ + * worry about recursive locking in ->prepare_write() and + * ->commit_write(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { @@ -641,7 +642,7 @@ static int ocfs2_write_zero_page(struct inode *inode, goto out; } - ret = ocfs2_prepare_write(NULL, page, offset, offset); + ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); if (ret < 0) { mlog_errno(ret); goto out_unlock; @@ -695,13 +696,26 @@ out: return ret; } +/* + * A tail_to_skip value > 0 indicates that we're being called from + * ocfs2_file_aio_write(). This has the following implications: + * + * - we don't want to update i_size + * - di_bh will be NULL, which is fine because it's only used in the + * case where we want to update i_size. + * - ocfs2_zero_extend() will then only be filling the hole created + * between i_size and the start of the write. + */ static int ocfs2_extend_file(struct inode *inode, struct buffer_head *di_bh, - u64 new_i_size) + u64 new_i_size, + size_t tail_to_skip) { int ret = 0; u32 clusters_to_add; + BUG_ON(!tail_to_skip && !di_bh); + /* setattr sometimes calls us like this. */ if (new_i_size == 0) goto out; @@ -714,27 +728,44 @@ static int ocfs2_extend_file(struct inode *inode, OCFS2_I(inode)->ip_clusters; if (clusters_to_add) { - ret = ocfs2_extend_allocation(inode, clusters_to_add); + /* + * protect the pages that ocfs2_zero_extend is going to + * be pulling into the page cache.. we do this before the + * metadata extend so that we don't get into the situation + * where we've extended the metadata but can't get the data + * lock to zero. + */ + ret = ocfs2_data_lock(inode, 1); if (ret < 0) { mlog_errno(ret); goto out; } - ret = ocfs2_zero_extend(inode, new_i_size); + ret = ocfs2_extend_allocation(inode, clusters_to_add); if (ret < 0) { mlog_errno(ret); - goto out; + goto out_unlock; } - } - /* No allocation required, we just use this helper to - * do a trivial update of i_size. */ - ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); - if (ret < 0) { - mlog_errno(ret); - goto out; + ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; + } + } + + if (!tail_to_skip) { + /* We're being called from ocfs2_setattr() which wants + * us to update i_size */ + ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); + if (ret < 0) + mlog_errno(ret); } +out_unlock: + if (clusters_to_add) /* this is the only case in which we lock */ + ocfs2_data_unlock(inode, 1); + out: return ret; } @@ -793,7 +824,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (i_size_read(inode) > attr->ia_size) status = ocfs2_truncate_file(inode, bh, attr->ia_size); else - status = ocfs2_extend_file(inode, bh, attr->ia_size); + status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -1049,21 +1080,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (!clusters) break; - ret = ocfs2_extend_allocation(inode, clusters); + ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } - - /* Fill any holes which would've been created by this - * write. If we're O_APPEND, this will wind up - * (correctly) being a noop. */ - ret = ocfs2_zero_extend(inode, (u64) newsize - count); - if (ret < 0) { - mlog_errno(ret); - goto out; - } break; } @@ -1146,6 +1168,22 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, ocfs2_iocb_set_rw_locked(iocb); } + /* + * We're fine letting folks race truncates and extending + * writes with read across the cluster, just like they can + * locally. Hence no rw_lock during read. + * + * Take and drop the meta data lock to update inode fields + * like i_size. This allows the checks down below + * generic_file_aio_read() a chance of actually working. + */ + ret = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + ocfs2_meta_unlock(inode, 0); + ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 6a610ae5358..eebc3cfa6be 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb) { struct ocfs2_journal_handle *retval = NULL; - retval = kcalloc(1, sizeof(*retval), GFP_KERNEL); + retval = kcalloc(1, sizeof(*retval), GFP_NOFS); if (!retval) { mlog(ML_ERROR, "Failed to allocate memory for journal " "handle!\n"); @@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(struct inode *inode) if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; + /* We are reading journal data which should not + * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, - inode); + NULL); if (status < 0) { mlog_errno(status); goto bail; @@ -982,7 +984,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, { struct ocfs2_la_recovery_item *item; - item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL); + item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); if (!item) { /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 04a684dfdd9..b8a00a79332 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, (unsigned long long)oi->ip_blkno, (unsigned long long)block, expand_tree); - new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL); + new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); if (!new) { mlog_errno(-ENOMEM); return; @@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, * has no way of tracking that. */ for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, - GFP_KERNEL); + GFP_NOFS); if (!tree[i]) { mlog_errno(-ENOMEM); goto out_free; diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 53049a20419..ee42765a855 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response { struct ocfs2_net_wait_ctxt *w; - w = kcalloc(1, sizeof(*w), GFP_KERNEL); + w = kcalloc(1, sizeof(*w), GFP_NOFS); if (!w) { mlog_errno(-ENOMEM); goto bail; @@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, BUG_ON(!ocfs2_is_valid_vote_request(type)); - request = kcalloc(1, sizeof(*request), GFP_KERNEL); + request = kcalloc(1, sizeof(*request), GFP_NOFS); if (!request) { mlog_errno(-ENOMEM); } else { @@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg, struct ocfs2_super *osb = data; struct ocfs2_vote_work *work; - work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL); + work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); if (!work) { status = -ENOMEM; mlog_errno(status); diff --git a/fs/open.c b/fs/open.c index 53ec28c3677..317b7c7f38a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1124,7 +1124,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, prevent_tail_call(ret); return ret; } -EXPORT_SYMBOL_GPL(sys_openat); #ifndef __alpha__ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 45ae7dd3c65..7ef1f094de9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -533,6 +533,7 @@ void del_gendisk(struct gendisk *disk) devfs_remove_disk(disk); + kobject_uevent(&disk->kobj, KOBJ_REMOVE); if (disk->holder_dir) kobject_unregister(disk->holder_dir); if (disk->slave_dir) @@ -545,7 +546,7 @@ void del_gendisk(struct gendisk *disk) kfree(disk_name); } put_device(disk->driverfs_dev); + disk->driverfs_dev = NULL; } - kobject_uevent(&disk->kobj, KOBJ_REMOVE); kobject_del(&disk->kobj); } diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 34c7a11d91f..70d9c5a37f5 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (dentry->d_name.len > SMB_MAXNAMELEN) goto out; + /* Do not allow lookup of names with backslashes in */ + error = -EINVAL; + if (memchr(dentry->d_name.name, '\\', dentry->d_name.len)) + goto out; + lock_kernel(); error = smb_proc_getattr(dentry, &finfo); #ifdef SMBFS_PARANOIA diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index c71c375863c..c71dd2760d3 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *req) /* * On timeout or on interrupt we want to try and remove the * request from the recvq/xmitq. + * First check if the request is still part of a queue. (May + * have been removed by some error condition) */ smb_lock_server(server); - if (!(req->rq_flags & SMB_REQ_RECEIVED)) { + if (!list_empty(&req->rq_queue)) { list_del_init(&req->rq_queue); smb_rput(req); } |