diff options
Diffstat (limited to 'fs/gfs2')
35 files changed, 1810 insertions, 2749 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 7f7947e3dfb..ab2f57e3fb8 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -14,23 +14,11 @@ config GFS2_FS GFS is perfect consistency -- changes made to the filesystem on one machine show up immediately on all other machines in the cluster. - To use the GFS2 filesystem, you will need to enable one or more of - the below locking modules. Documentation and utilities for GFS2 can + To use the GFS2 filesystem in a cluster, you will need to enable + the locking module below. Documentation and utilities for GFS2 can be found here: http://sources.redhat.com/cluster -config GFS2_FS_LOCKING_NOLOCK - tristate "GFS2 \"nolock\" locking module" - depends on GFS2_FS - help - Single node locking module for GFS2. - - Use this module if you want to use GFS2 on a single node without - its clustering features. You can still take advantage of the - large file support, and upgrade to running a full cluster later on - if required. - - If you will only be using GFS2 in cluster mode, you do not need this - module. + The "nolock" lock module is now built in to GFS2 by default. config GFS2_FS_LOCKING_DLM tristate "GFS2 DLM locking module" diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index e2350df02a0..ec65851ec80 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -5,6 +5,5 @@ gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ ops_fstype.o ops_inode.o ops_super.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o -obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h index 3bb11c0f8b5..ef606e3a5cf 100644 --- a/fs/gfs2/gfs2.h +++ b/fs/gfs2/gfs2.h @@ -16,11 +16,6 @@ enum { }; enum { - NO_WAIT = 0, - WAIT = 1, -}; - -enum { NO_FORCE = 0, FORCE = 1, }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d636b3e80f5..c962283d4e7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -45,21 +45,19 @@ struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; -struct glock_iter { - int hash; /* hash bucket index */ - struct gfs2_sbd *sdp; /* incore superblock */ - struct gfs2_glock *gl; /* current glock struct */ - struct seq_file *seq; /* sequence file for debugfs */ - char string[512]; /* scratch space */ +struct gfs2_glock_iter { + int hash; /* hash bucket index */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + char string[512]; /* scratch space */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); -static void gfs2_glock_drop_th(struct gfs2_glock *gl); -static void run_queue(struct gfs2_glock *gl); +static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); +#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; @@ -123,33 +121,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) #endif /** - * relaxed_state_ok - is a requested lock compatible with the current lock mode? - * @actual: the current state of the lock - * @requested: the lock state that was requested by the caller - * @flags: the modifier flags passed in by the caller - * - * Returns: 1 if the locks are compatible, 0 otherwise - */ - -static inline int relaxed_state_ok(unsigned int actual, unsigned requested, - int flags) -{ - if (actual == requested) - return 1; - - if (flags & GL_EXACT) - return 0; - - if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) - return 1; - - if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) - return 1; - - return 0; -} - -/** * gl_hash() - Turn glock number into hash bucket number * @lock: The glock number * @@ -182,7 +153,7 @@ static void glock_free(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct inode *aspace = gl->gl_aspace; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (sdp->sd_lockstruct.ls_ops->lm_put_lock) sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); if (aspace) @@ -211,17 +182,14 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) int gfs2_glock_put(struct gfs2_glock *gl) { int rv = 0; - struct gfs2_sbd *sdp = gl->gl_sbd; write_lock(gl_lock_addr(gl->gl_hash)); if (atomic_dec_and_test(&gl->gl_ref)) { hlist_del(&gl->gl_list); write_unlock(gl_lock_addr(gl->gl_hash)); - gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); - gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); - gfs2_assert(sdp, list_empty(&gl->gl_holders)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim)); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); glock_free(gl); rv = 1; goto out; @@ -281,22 +249,401 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, return gl; } +/** + * may_grant - check if its ok to grant a new lock + * @gl: The glock + * @gh: The lock request which we wish to grant + * + * Returns: true if its ok to grant the lock + */ + +static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) +{ + const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); + if ((gh->gh_state == LM_ST_EXCLUSIVE || + gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) + return 0; + if (gl->gl_state == gh->gh_state) + return 1; + if (gh->gh_flags & GL_EXACT) + return 0; + if (gl->gl_state == LM_ST_EXCLUSIVE) { + if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) + return 1; + if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) + return 1; + } + if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) + return 1; + return 0; +} + +static void gfs2_holder_wake(struct gfs2_holder *gh) +{ + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb__after_clear_bit(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); +} + +/** + * do_promote - promote as many requests as possible on the current queue + * @gl: The glock + * + * Returns: true if there is a blocked holder at the head of the list + */ + +static int do_promote(struct gfs2_glock *gl) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh, *tmp; + int ret; + +restart: + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (may_grant(gl, gh)) { + if (gh->gh_list.prev == &gl->gl_holders && + glops->go_lock) { + spin_unlock(&gl->gl_spin); + /* FIXME: eliminate this eventually */ + ret = glops->go_lock(gh); + spin_lock(&gl->gl_spin); + if (ret) { + gh->gh_error = ret; + list_del_init(&gh->gh_list); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + gfs2_holder_wake(gh); + continue; + } + if (gh->gh_list.prev == &gl->gl_holders) + return 1; + break; + } + return 0; +} + +/** + * do_error - Something unexpected has happened during a lock request + * + */ + +static inline void do_error(struct gfs2_glock *gl, const int ret) +{ + struct gfs2_holder *gh, *tmp; + + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (ret & LM_OUT_ERROR) + gh->gh_error = -EIO; + else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + gh->gh_error = GLR_TRYFAILED; + else + continue; + list_del_init(&gh->gh_list); + gfs2_holder_wake(gh); + } +} + +/** + * find_first_waiter - find the first gh that's waiting for the glock + * @gl: the glock + */ + +static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) +{ + struct gfs2_holder *gh; + + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; + } + return NULL; +} + +/** + * state_change - record that the glock is now in a different state + * @gl: the glock + * @new_state the new state + * + */ + +static void state_change(struct gfs2_glock *gl, unsigned int new_state) +{ + int held1, held2; + + held1 = (gl->gl_state != LM_ST_UNLOCKED); + held2 = (new_state != LM_ST_UNLOCKED); + + if (held1 != held2) { + if (held2) + gfs2_glock_hold(gl); + else + gfs2_glock_put(gl); + } + + gl->gl_state = new_state; + gl->gl_tchange = jiffies; +} + +static void gfs2_demote_wake(struct gfs2_glock *gl) +{ + gl->gl_demote_state = LM_ST_EXCLUSIVE; + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); +} + +/** + * finish_xmote - The DLM has replied to one of our lock requests + * @gl: The glock + * @ret: The status from the DLM + * + */ + +static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh; + unsigned state = ret & LM_OUT_ST_MASK; + + spin_lock(&gl->gl_spin); + state_change(gl, state); + gh = find_first_waiter(gl); + + /* Demote to UN request arrived during demote to SH or DF */ + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_target = LM_ST_UNLOCKED; + + /* Check for state != intended state */ + if (unlikely(state != gl->gl_target)) { + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { + /* move to back of queue and try next entry */ + if (ret & LM_OUT_CANCELED) { + if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) + list_move_tail(&gh->gh_list, &gl->gl_holders); + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + goto retry; + } + /* Some error or failed "try lock" - report it */ + if ((ret & LM_OUT_ERROR) || + (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { + gl->gl_target = gl->gl_state; + do_error(gl, ret); + goto out; + } + } + switch(state) { + /* Unlocked due to conversion deadlock, try again */ + case LM_ST_UNLOCKED: +retry: + do_xmote(gl, gh, gl->gl_target); + break; + /* Conversion fails, unlock and try again */ + case LM_ST_SHARED: + case LM_ST_DEFERRED: + do_xmote(gl, gh, LM_ST_UNLOCKED); + break; + default: /* Everything else */ + printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); + GLOCK_BUG_ON(gl, 1); + } + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); + return; + } + + /* Fast path - we got what we asked for */ + if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + gfs2_demote_wake(gl); + if (state != LM_ST_UNLOCKED) { + if (glops->go_xmote_bh) { + int rv; + spin_unlock(&gl->gl_spin); + rv = glops->go_xmote_bh(gl, gh); + if (rv == -EAGAIN) + return; + spin_lock(&gl->gl_spin); + if (rv) { + do_error(gl, rv); + goto out; + } + } + do_promote(gl); + } +out: + clear_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); +} + +static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, + unsigned int cur_state, unsigned int req_state, + unsigned int flags) +{ + int ret = LM_OUT_ERROR; + + if (!sdp->sd_lockstruct.ls_ops->lm_lock) + return req_state == LM_ST_UNLOCKED ? 0 : req_state; + + if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, + req_state, flags); + return ret; +} + +/** + * do_xmote - Calls the DLM to change the state of a lock + * @gl: The lock state + * @gh: The holder (only for promotes) + * @target: The target lock state + * + */ + +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int lck_flags = gh ? gh->gh_flags : 0; + int ret; + + lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | + LM_FLAG_PRIORITY); + BUG_ON(gl->gl_state == target); + BUG_ON(gl->gl_state == gl->gl_target); + if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && + glops->go_inval) { + set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + do_error(gl, 0); /* Fail queued try locks */ + } + spin_unlock(&gl->gl_spin); + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + + gfs2_glock_hold(gl); + if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || + gl->gl_state == LM_ST_DEFERRED) && + !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + lck_flags |= LM_FLAG_TRY_1CB; + ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags); + + if (!(ret & LM_OUT_ASYNC)) { + finish_xmote(gl, ret); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); + } else { + GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); + } + spin_lock(&gl->gl_spin); +} + +/** + * find_first_holder - find the first "holder" gh + * @gl: the glock + */ + +static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) +{ + struct gfs2_holder *gh; + + if (!list_empty(&gl->gl_holders)) { + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; + } + return NULL; +} + +/** + * run_queue - do all outstanding tasks related to a glock + * @gl: The glock in question + * @nonblock: True if we must not block in run_queue + * + */ + +static void run_queue(struct gfs2_glock *gl, const int nonblock) +{ + struct gfs2_holder *gh = NULL; + + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + return; + + GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); + + if (test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_demote_state != gl->gl_state) { + if (find_first_holder(gl)) + goto out; + if (nonblock) + goto out_sched; + set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); + gl->gl_target = gl->gl_demote_state; + } else { + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + gfs2_demote_wake(gl); + if (do_promote(gl) == 0) + goto out; + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + do_error(gl, 0); /* Fail queued try locks */ + } + do_xmote(gl, gh, gl->gl_target); + return; + +out_sched: + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); +out: + clear_bit(GLF_LOCK, &gl->gl_flags); +} + static void glock_work_func(struct work_struct *work) { + unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + finish_xmote(gl, gl->gl_reply); spin_lock(&gl->gl_spin); - if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) - set_bit(GLF_DEMOTE, &gl->gl_flags); - run_queue(gl); + if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state != LM_ST_EXCLUSIVE) { + unsigned long holdtime, now = jiffies; + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; + set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); + } + run_queue(gl, 0); spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); + if (!delay || + queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, void **lockp) { int error = -EIO; + if (!sdp->sd_lockstruct.ls_ops->lm_get_lock) + return 0; if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) error = sdp->sd_lockstruct.ls_ops->lm_get_lock( sdp->sd_lockstruct.ls_lockspace, name, lockp); @@ -342,12 +689,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_name = name; atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; + gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; gl->gl_ops = glops; - gl->gl_req_gh = NULL; gl->gl_stamp = jiffies; gl->gl_tchange = jiffies; gl->gl_object = NULL; @@ -447,13 +792,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -static void gfs2_holder_wake(struct gfs2_holder *gh) -{ - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); -} - static int just_schedule(void *word) { schedule(); @@ -466,14 +804,6 @@ static void wait_on_holder(struct gfs2_holder *gh) wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); } -static void gfs2_demote_wake(struct gfs2_glock *gl) -{ - gl->gl_demote_state = LM_ST_EXCLUSIVE; - clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); - wake_up_bit(&gl->gl_flags, GLF_DEMOTE); -} - static void wait_on_demote(struct gfs2_glock *gl) { might_sleep(); @@ -481,217 +811,6 @@ static void wait_on_demote(struct gfs2_glock *gl) } /** - * rq_mutex - process a mutex request in the queue - * @gh: the glock holder - * - * Returns: 1 if the queue is blocked - */ - -static int rq_mutex(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - set_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); - - return 1; -} - -/** - * rq_promote - process a promote request in the queue - * @gh: the glock holder - * - * Acquire a new inter-node lock, or change a lock state to more restrictive. - * - * Returns: 1 if the queue is blocked - */ - -static int rq_promote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - if (list_empty(&gl->gl_holders)) { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gh->gh_gl, gh); - spin_lock(&gl->gl_spin); - } - return 1; - } - - if (list_empty(&gl->gl_holders)) { - set_bit(HIF_FIRST, &gh->gh_iflags); - set_bit(GLF_LOCK, &gl->gl_flags); - } else { - struct gfs2_holder *next_gh; - if (gh->gh_state == LM_ST_EXCLUSIVE) - return 1; - next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, - gh_list); - if (next_gh->gh_state == LM_ST_EXCLUSIVE) - return 1; - } - - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - - gfs2_holder_wake(gh); - - return 0; -} - -/** - * rq_demote - process a demote request in the queue - * @gh: the glock holder - * - * Returns: 1 if the queue is blocked - */ - -static int rq_demote(struct gfs2_glock *gl) -{ - if (!list_empty(&gl->gl_holders)) - return 1; - - if (gl->gl_state == gl->gl_demote_state || - gl->gl_state == LM_ST_UNLOCKED) { - gfs2_demote_wake(gl); - return 0; - } - - set_bit(GLF_LOCK, &gl->gl_flags); - set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - if (gl->gl_demote_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) { - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - } else { - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gl, NULL); - } - - spin_lock(&gl->gl_spin); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - return 0; -} - -/** - * run_queue - process holder structures on a glock - * @gl: the glock - * - */ -static void run_queue(struct gfs2_glock *gl) -{ - struct gfs2_holder *gh; - int blocked = 1; - - for (;;) { - if (test_bit(GLF_LOCK, &gl->gl_flags)) - break; - - if (!list_empty(&gl->gl_waiters1)) { - gh = list_entry(gl->gl_waiters1.next, - struct gfs2_holder, gh_list); - blocked = rq_mutex(gh); - } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - blocked = rq_demote(gl); - if (test_bit(GLF_WAITERS2, &gl->gl_flags) && - !blocked) { - set_bit(GLF_DEMOTE, &gl->gl_flags); - gl->gl_demote_state = LM_ST_UNLOCKED; - } - clear_bit(GLF_WAITERS2, &gl->gl_flags); - } else if (!list_empty(&gl->gl_waiters3)) { - gh = list_entry(gl->gl_waiters3.next, - struct gfs2_holder, gh_list); - blocked = rq_promote(gh); - } else - break; - - if (blocked) - break; - } -} - -/** - * gfs2_glmutex_lock - acquire a local lock on a glock - * @gl: the glock - * - * Gives caller exclusive access to manipulate a glock structure. - */ - -static void gfs2_glmutex_lock(struct gfs2_glock *gl) -{ - spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - struct gfs2_holder gh; - - gfs2_holder_init(gl, 0, 0, &gh); - set_bit(HIF_WAIT, &gh.gh_iflags); - list_add_tail(&gh.gh_list, &gl->gl_waiters1); - spin_unlock(&gl->gl_spin); - wait_on_holder(&gh); - gfs2_holder_uninit(&gh); - } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); - spin_unlock(&gl->gl_spin); - } -} - -/** - * gfs2_glmutex_trylock - try to acquire a local lock on a glock - * @gl: the glock - * - * Returns: 1 if the glock is acquired - */ - -static int gfs2_glmutex_trylock(struct gfs2_glock *gl) -{ - int acquired = 1; - - spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - acquired = 0; - } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); - } - spin_unlock(&gl->gl_spin); - - return acquired; -} - -/** - * gfs2_glmutex_unlock - release a local lock on a glock - * @gl: the glock - * - */ - -static void gfs2_glmutex_unlock(struct gfs2_glock *gl) -{ - struct pid *pid; - - spin_lock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); - pid = gl->gl_owner_pid; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; - run_queue(gl); - spin_unlock(&gl->gl_spin); - - put_pid(pid); -} - -/** * handle_callback - process a demote request * @gl: the glock * @state: the state the caller wants us to change to @@ -705,398 +824,45 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, { int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; - spin_lock(&gl->gl_spin); set_bit(bit, &gl->gl_flags); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && - gl->gl_object) { + gl->gl_object) gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); - return; - } } else if (gl->gl_demote_state != LM_ST_UNLOCKED && gl->gl_demote_state != state) { - if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) - set_bit(GLF_WAITERS2, &gl->gl_flags); - else - gl->gl_demote_state = LM_ST_UNLOCKED; - } - spin_unlock(&gl->gl_spin); -} - -/** - * state_change - record that the glock is now in a different state - * @gl: the glock - * @new_state the new state - * - */ - -static void state_change(struct gfs2_glock *gl, unsigned int new_state) -{ - int held1, held2; - - held1 = (gl->gl_state != LM_ST_UNLOCKED); - held2 = (new_state != LM_ST_UNLOCKED); - - if (held1 != held2) { - if (held2) - gfs2_glock_hold(gl); - else - gfs2_glock_put(gl); - } - - gl->gl_state = new_state; - gl->gl_tchange = jiffies; -} - -/** - * drop_bh - Called after a lock module unlock completes - * @gl: the glock - * @ret: the return status - * - * Doesn't wake up the process waiting on the struct gfs2_holder (if any) - * Doesn't drop the reference on the glock the top half took out - * - */ - -static void drop_bh(struct gfs2_glock *gl, unsigned int ret) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_holder *gh = gl->gl_req_gh; - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !ret); - - state_change(gl, LM_ST_UNLOCKED); - - if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { - spin_lock(&gl->gl_spin); - gh->gh_error = 0; - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gl, gl->gl_req_gh); - gfs2_glock_put(gl); - return; + gl->gl_demote_state = LM_ST_UNLOCKED; } - - spin_lock(&gl->gl_spin); - gfs2_demote_wake(gl); - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); } /** - * xmote_bh - Called after the lock module is done acquiring a lock - * @gl: The glock in question - * @ret: the int returned from the lock module - * - */ - -static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh = gl->gl_req_gh; - int op_done = 1; - - if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { - drop_bh(gl, ret); - return; - } - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); - - state_change(gl, ret & LM_OUT_ST_MASK); - - /* Deal with each possible exit condition */ - - if (!gh) { - gl->gl_stamp = jiffies; - if (ret & LM_OUT_CANCELED) { - op_done = 0; - } else { - spin_lock(&gl->gl_spin); - if (gl->gl_state != gl->gl_demote_state) { - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - gfs2_glock_put(gl); - return; - } - gfs2_demote_wake(gl); - spin_unlock(&gl->gl_spin); - } - } else { - spin_lock(&gl->gl_spin); - if (ret & LM_OUT_CONV_DEADLK) { - gh->gh_error = 0; - set_bit(GLF_CONV_DEADLK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - gfs2_glock_put(gl); - return; - } - list_del_init(&gh->gh_list); - gh->gh_error = -EIO; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - goto out; - gh->gh_error = GLR_CANCELED; - if (ret & LM_OUT_CANCELED) - goto out; - if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - list_add_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - set_bit(HIF_FIRST, &gh->gh_iflags); - op_done = 0; - goto out; - } - gh->gh_error = GLR_TRYFAILED; - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) - goto out; - gh->gh_error = -EINVAL; - if (gfs2_assert_withdraw(sdp, 0) == -1) - fs_err(sdp, "ret = 0x%.8X\n", ret); -out: - spin_unlock(&gl->gl_spin); - } - - if (glops->go_xmote_bh) - glops->go_xmote_bh(gl); - - if (op_done) { - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - } - - gfs2_glock_put(gl); - - if (gh) - gfs2_holder_wake(gh); -} - -static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, - unsigned int cur_state, unsigned int req_state, - unsigned int flags) -{ - int ret = 0; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, - req_state, flags); - return ret; -} - -/** - * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock - * @gl: The glock in question - * @state: the requested state - * @flags: modifier flags to the lock call - * - */ - -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - int flags = gh ? gh->gh_flags : 0; - unsigned state = gh ? gh->gh_state : gl->gl_demote_state; - const struct gfs2_glock_operations *glops = gl->gl_ops; - int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | - LM_FLAG_NOEXP | LM_FLAG_ANY | - LM_FLAG_PRIORITY); - unsigned int lck_ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - if (state == LM_ST_DEFERRED && glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); - gfs2_assert_warn(sdp, state != gl->gl_state); - - gfs2_glock_hold(gl); - - lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); - - if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) - return; - - if (lck_ret & LM_OUT_ASYNC) - gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); - else - xmote_bh(gl, lck_ret); -} - -static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, - unsigned int cur_state) -{ - int ret = 0; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); - return ret; -} - -/** - * gfs2_glock_drop_th - call into the lock module to unlock a lock - * @gl: the glock - * - */ - -static void gfs2_glock_drop_th(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - unsigned int ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - - gfs2_glock_hold(gl); - - ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); - - if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) - return; - - if (!ret) - drop_bh(gl, ret); - else - gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); -} - -/** - * do_cancels - cancel requests for locks stuck waiting on an expire flag - * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock - * - * Don't cancel GL_NOCANCEL requests. - */ - -static void do_cancels(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - - spin_lock(&gl->gl_spin); - - while (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { - spin_unlock(&gl->gl_spin); - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); - msleep(100); - spin_lock(&gl->gl_spin); - } else { - spin_unlock(&gl->gl_spin); - msleep(100); - spin_lock(&gl->gl_spin); - } - } - - spin_unlock(&gl->gl_spin); -} - -/** - * glock_wait_internal - wait on a glock acquisition + * gfs2_glock_wait - wait on a glock acquisition * @gh: the glock holder * * Returns: 0 on success */ -static int glock_wait_internal(struct gfs2_holder *gh) +int gfs2_glock_wait(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (test_bit(HIF_ABORTED, &gh->gh_iflags)) - return -EIO; - - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - if (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - list_del_init(&gh->gh_list); - gh->gh_error = GLR_TRYFAILED; - run_queue(gl); - spin_unlock(&gl->gl_spin); - return gh->gh_error; - } - spin_unlock(&gl->gl_spin); - } - - if (gh->gh_flags & LM_FLAG_PRIORITY) - do_cancels(gh); - wait_on_holder(gh); - if (gh->gh_error) - return gh->gh_error; - - gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); - gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, - gh->gh_flags)); - - if (test_bit(HIF_FIRST, &gh->gh_iflags)) { - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - - if (glops->go_lock) { - gh->gh_error = glops->go_lock(gh); - if (gh->gh_error) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - spin_unlock(&gl->gl_spin); - } - } - - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); - spin_unlock(&gl->gl_spin); - } - return gh->gh_error; } -static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct pid *pid) -{ - struct gfs2_holder *gh; - - list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner_pid == pid) - return gh; - } - - return NULL; -} - -static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { va_list args; va_start(args, fmt); - if (gi) { + if (seq) { + struct gfs2_glock_iter *gi = seq->private; vsprintf(gi->string, fmt, args); - seq_printf(gi->seq, gi->string); - } - else + seq_printf(seq, gi->string); + } else { + printk(KERN_ERR " "); vprintk(fmt, args); + } va_end(args); } @@ -1104,50 +870,76 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...) * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add * + * Eventually we should move the recursive locking trap to a + * debugging option or something like that. This is the fast + * path and needs to have the minimum number of distractions. + * */ -static void add_to_queue(struct gfs2_holder *gh) +static inline void add_to_queue(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_holder *existing; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct list_head *insert_pt = NULL; + struct gfs2_holder *gh2; + int try_lock = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - if (!(gh->gh_flags & GL_FLOCK)) { - existing = find_holder_by_owner(&gl->gl_holders, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(existing->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - existing->gh_gl->gl_name.ln_type, - existing->gh_gl->gl_state); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(gh->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - gl->gl_name.ln_type, gl->gl_state); - BUG(); - } - - existing = find_holder_by_owner(&gl->gl_waiters3, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - BUG(); + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { + if (test_bit(GLF_LOCK, &gl->gl_flags)) + try_lock = 1; + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + goto fail; + } + + list_for_each_entry(gh2, &gl->gl_holders, gh_list) { + if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && + (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) + goto trap_recursive; + if (try_lock && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && + !may_grant(gl, gh)) { +fail: + gh->gh_error = GLR_TRYFAILED; + gfs2_holder_wake(gh); + return; } + if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) + continue; + if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) + insert_pt = &gh2->gh_list; + } + if (likely(insert_pt == NULL)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); + if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) + goto do_cancel; + return; + } + list_add_tail(&gh->gh_list, insert_pt); +do_cancel: + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { + spin_unlock(&gl->gl_spin); + if (sdp->sd_lockstruct.ls_ops->lm_cancel) + sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); + spin_lock(&gl->gl_spin); } + return; - if (gh->gh_flags & LM_FLAG_PRIORITY) - list_add(&gh->gh_list, &gl->gl_waiters3); - else - list_add_tail(&gh->gh_list, &gl->gl_waiters3); +trap_recursive: + print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip); + printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); + printk(KERN_ERR "lock type: %d req lock state : %d\n", + gh2->gh_gl->gl_name.ln_type, gh2->gh_state); + print_symbol(KERN_ERR "new: %s\n", gh->gh_ip); + printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); + printk(KERN_ERR "lock type: %d req lock state : %d\n", + gh->gh_gl->gl_name.ln_type, gh->gh_state); + __dump_glock(NULL, gl); + BUG(); } /** @@ -1165,24 +957,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_sbd *sdp = gl->gl_sbd; int error = 0; -restart: - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { - set_bit(HIF_ABORTED, &gh->gh_iflags); + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; - } spin_lock(&gl->gl_spin); add_to_queue(gh); - run_queue(gl); + run_queue(gl, 1); spin_unlock(&gl->gl_spin); - if (!(gh->gh_flags & GL_ASYNC)) { - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - goto restart; - } - } + if (!(gh->gh_flags & GL_ASYNC)) + error = gfs2_glock_wait(gh); return error; } @@ -1196,48 +980,7 @@ restart: int gfs2_glock_poll(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - int ready = 0; - - spin_lock(&gl->gl_spin); - - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) - ready = 1; - else if (list_empty(&gh->gh_list)) { - if (gh->gh_error == GLR_CANCELED) { - spin_unlock(&gl->gl_spin); - msleep(100); - if (gfs2_glock_nq(gh)) - return 1; - return 0; - } else - ready = 1; - } - - spin_unlock(&gl->gl_spin); - - return ready; -} - -/** - * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC - * @gh: the holder structure - * - * Returns: 0, GLR_TRYFAILED, or errno on failure - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - int error; - - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - gh->gh_flags &= ~GL_ASYNC; - error = gfs2_glock_nq(gh); - } - - return error; + return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } /** @@ -1251,26 +994,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned delay = 0; + int fast_path = 0; + spin_lock(&gl->gl_spin); if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_lock(gl); - - spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); - - if (list_empty(&gl->gl_holders)) { + if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { + GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); spin_unlock(&gl->gl_spin); glops->go_unlock(gh); spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); } gl->gl_stamp = jiffies; + if (list_empty(&gl->gl_holders) && + !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + fast_path = 1; } - - clear_bit(GLF_LOCK, &gl->gl_flags); spin_unlock(&gl->gl_spin); + if (likely(fast_path)) + return; gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -1454,6 +1201,8 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) { int error = -EIO; + if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb) + return 0; if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); return error; @@ -1469,20 +1218,14 @@ int gfs2_lvb_hold(struct gfs2_glock *gl) { int error; - gfs2_glmutex_lock(gl); - if (!atomic_read(&gl->gl_lvb_count)) { error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); - if (error) { - gfs2_glmutex_unlock(gl); + if (error) return error; - } gfs2_glock_hold(gl); } atomic_inc(&gl->gl_lvb_count); - gfs2_glmutex_unlock(gl); - return 0; } @@ -1497,17 +1240,13 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; gfs2_glock_hold(gl); - gfs2_glmutex_lock(gl); - gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); if (atomic_dec_and_test(&gl->gl_lvb_count)) { - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb) sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); gl->gl_lvb = NULL; gfs2_glock_put(gl); } - - gfs2_glmutex_unlock(gl); gfs2_glock_put(gl); } @@ -1526,8 +1265,12 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; + spin_lock(&gl->gl_spin); handle_callback(gl, state, 1, delay); + spin_unlock(&gl->gl_spin); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1568,7 +1311,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) gl = gfs2_glock_find(sdp, &async->lc_name); if (gfs2_assert_warn(sdp, gl)) return; - xmote_bh(gl, async->lc_ret); + gl->gl_reply = async->lc_ret; + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); up_read(&gfs2_umount_flush_sem); @@ -1581,11 +1325,6 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) wake_up_process(sdp->sd_recoverd_process); return; - case LM_CB_DROPLOCKS: - gfs2_gl_hash_clear(sdp, NO_WAIT); - gfs2_quota_scan(sdp); - return; - default: gfs2_assert_warn(sdp, 0); return; @@ -1646,6 +1385,7 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) void gfs2_reclaim_glock(struct gfs2_sbd *sdp) { struct gfs2_glock *gl; + int done_callback = 0; spin_lock(&sdp->sd_reclaim_lock); if (list_empty(&sdp->sd_reclaim_list)) { @@ -1660,14 +1400,16 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) atomic_dec(&sdp->sd_reclaim_count); atomic_inc(&sdp->sd_reclaimed); - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); + spin_lock(&gl->gl_spin); + if (find_first_holder(gl) == NULL && + gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) { + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); + done_callback = 1; } - - gfs2_glock_put(gl); + spin_unlock(&gl->gl_spin); + if (!done_callback || + queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } /** @@ -1724,18 +1466,14 @@ static void scan_glock(struct gfs2_glock *gl) { if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) return; + if (test_bit(GLF_LOCK, &gl->gl_flags)) + return; - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - goto out_schedule; - gfs2_glmutex_unlock(gl); - } - return; - -out_schedule: - gfs2_glmutex_unlock(gl); - gfs2_glock_schedule_for_reclaim(gl); + spin_lock(&gl->gl_spin); + if (find_first_holder(gl) == NULL && + gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) + gfs2_glock_schedule_for_reclaim(gl); + spin_unlock(&gl->gl_spin); } /** @@ -1760,12 +1498,13 @@ static void clear_glock(struct gfs2_glock *gl) spin_unlock(&sdp->sd_reclaim_lock); } - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); - } + spin_lock(&gl->gl_spin); + if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); + spin_unlock(&gl->gl_spin); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } /** @@ -1773,11 +1512,10 @@ static void clear_glock(struct gfs2_glock *gl) * @sdp: the filesystem * @wait: wait until it's all gone * - * Called when unmounting the filesystem, or when inter-node lock manager - * requests DROPLOCKS because it is running out of capacity. + * Called when unmounting the filesystem. */ -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { unsigned long t; unsigned int x; @@ -1792,7 +1530,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) cont = 1; } - if (!wait || !cont) + if (!cont) break; if (time_after_eq(jiffies, @@ -1810,180 +1548,162 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) } } -/* - * Diagnostic routines to help debug distributed deadlock - */ - -static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, - unsigned long address) +static const char *state2str(unsigned state) { - char buffer[KSYM_SYMBOL_LEN]; - - sprint_symbol(buffer, address); - print_dbg(gi, fmt, buffer); + switch(state) { + case LM_ST_UNLOCKED: + return "UN"; + case LM_ST_SHARED: + return "SH"; + case LM_ST_DEFERRED: + return "DF"; + case LM_ST_EXCLUSIVE: + return "EX"; + } + return "??"; +} + +static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) +{ + char *p = buf; + if (flags & LM_FLAG_TRY) + *p++ = 't'; + if (flags & LM_FLAG_TRY_1CB) + *p++ = 'T'; + if (flags & LM_FLAG_NOEXP) + *p++ = 'e'; + if (flags & LM_FLAG_ANY) + *p++ = 'a'; + if (flags & LM_FLAG_PRIORITY) + *p++ = 'p'; + if (flags & GL_ASYNC) + *p++ = 'a'; + if (flags & GL_EXACT) + *p++ = 'E'; + if (flags & GL_NOCACHE) + *p++ = 'c'; + if (test_bit(HIF_HOLDER, &iflags)) + *p++ = 'H'; + if (test_bit(HIF_WAIT, &iflags)) + *p++ = 'W'; + if (test_bit(HIF_FIRST, &iflags)) + *p++ = 'F'; + *p = 0; + return buf; } /** * dump_holder - print information about a glock holder - * @str: a string naming the type of holder + * @seq: the seq_file struct * @gh: the glock holder * * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(struct glock_iter *gi, char *str, - struct gfs2_holder *gh) +static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { - unsigned int x; - struct task_struct *gh_owner; + struct task_struct *gh_owner = NULL; + char buffer[KSYM_SYMBOL_LEN]; + char flags_buf[32]; - print_dbg(gi, " %s\n", str); - if (gh->gh_owner_pid) { - print_dbg(gi, " owner = %ld ", - (long)pid_nr(gh->gh_owner_pid)); + sprint_symbol(buffer, gh->gh_ip); + if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - if (gh_owner) - print_dbg(gi, "(%s)\n", gh_owner->comm); - else - print_dbg(gi, "(ended)\n"); - } else - print_dbg(gi, " owner = -1\n"); - print_dbg(gi, " gh_state = %u\n", gh->gh_state); - print_dbg(gi, " gh_flags ="); - for (x = 0; x < 32; x++) - if (gh->gh_flags & (1 << x)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - print_dbg(gi, " error = %d\n", gh->gh_error); - print_dbg(gi, " gh_iflags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &gh->gh_iflags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); - + gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", + state2str(gh->gh_state), + hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), + gh->gh_error, + gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, + gh_owner ? gh_owner->comm : "(ended)", buffer); return 0; } -/** - * dump_inode - print information about an inode - * @ip: the inode - * - * Returns: 0 on success, -ENOBUFS when we run out of space - */ - -static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) -{ - unsigned int x; - - print_dbg(gi, " Inode:\n"); - print_dbg(gi, " num = %llu/%llu\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr); - print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); - print_dbg(gi, " i_flags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &ip->i_flags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - return 0; +static const char *gflags2str(char *buf, const unsigned long *gflags) +{ + char *p = buf; + if (test_bit(GLF_LOCK, gflags)) + *p++ = 'l'; + if (test_bit(GLF_STICKY, gflags)) + *p++ = 's'; + if (test_bit(GLF_DEMOTE, gflags)) + *p++ = 'D'; + if (test_bit(GLF_PENDING_DEMOTE, gflags)) + *p++ = 'd'; + if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) + *p++ = 'p'; + if (test_bit(GLF_DIRTY, gflags)) + *p++ = 'y'; + if (test_bit(GLF_LFLUSH, gflags)) + *p++ = 'f'; + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) + *p++ = 'i'; + if (test_bit(GLF_REPLY_PENDING, gflags)) + *p++ = 'r'; + *p = 0; + return buf; } /** - * dump_glock - print information about a glock + * __dump_glock - print information about a glock + * @seq: The seq_file struct * @gl: the glock - * @count: where we are in the buffer + * + * The file format is as follows: + * One line per object, capital letters are used to indicate objects + * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, + * other objects are indented by a single space and follow the glock to + * which they are related. Fields are indicated by lower case letters + * followed by a colon and the field value, except for strings which are in + * [] so that its possible to see if they are composed of spaces for + * example. The field's are n = number (id of the object), f = flags, + * t = type, s = state, r = refcount, e = error, p = pid. * * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) +static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { - struct gfs2_holder *gh; - unsigned int x; - int error = -ENOBUFS; - struct task_struct *gl_owner; + const struct gfs2_glock_operations *glops = gl->gl_ops; + unsigned long long dtime; + const struct gfs2_holder *gh; + char gflags_buf[32]; + int error = 0; - spin_lock(&gl->gl_spin); + dtime = jiffies - gl->gl_demote_time; + dtime *= 1000000/HZ; /* demote time in uSec */ + if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) + dtime = 0; + gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n", + state2str(gl->gl_state), + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + gflags2str(gflags_buf, &gl->gl_flags), + state2str(gl->gl_target), + state2str(gl->gl_demote_state), dtime, + atomic_read(&gl->gl_lvb_count), + atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_ref)); - print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - print_dbg(gi, " gl_flags ="); - for (x = 0; x < 32; x++) { - if (test_bit(x, &gl->gl_flags)) - print_dbg(gi, " %u", x); - } - if (!test_bit(GLF_LOCK, &gl->gl_flags)) - print_dbg(gi, " (unlocked)"); - print_dbg(gi, " \n"); - print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); - print_dbg(gi, " gl_state = %u\n", gl->gl_state); - if (gl->gl_owner_pid) { - gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); - if (gl_owner) - print_dbg(gi, " gl_owner = pid %d (%s)\n", - pid_nr(gl->gl_owner_pid), gl_owner->comm); - else - print_dbg(gi, " gl_owner = %d (ended)\n", - pid_nr(gl->gl_owner_pid)); - } else - print_dbg(gi, " gl_owner = -1\n"); - print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); - print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); - print_dbg(gi, " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); - if (gl->gl_aspace) - print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); - else - print_dbg(gi, " aspace = no\n"); - print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); - if (gl->gl_req_gh) { - error = dump_holder(gi, "Request", gl->gl_req_gh); - if (error) - goto out; - } list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder(gi, "Holder", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder(gi, "Waiter1", gh); + error = dump_holder(seq, gh); if (error) goto out; } - list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder(gi, "Waiter3", gh); - if (error) - goto out; - } - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", - gl->gl_demote_state, (unsigned long long) - (jiffies - gl->gl_demote_time)*(1000000/HZ)); - } - if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { - if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gi, gl->gl_object); - if (error) - goto out; - } else { - error = -ENOBUFS; - print_dbg(gi, " Inode: busy\n"); - } - } - - error = 0; - + if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) + error = glops->go_dump(seq, gl); out: - spin_unlock(&gl->gl_spin); return error; } +static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +{ + int ret; + spin_lock(&gl->gl_spin); + ret = __dump_glock(seq, gl); + spin_unlock(&gl->gl_spin); + return ret; +} + /** * gfs2_dump_lockstate - print out the current lockstate * @sdp: the filesystem @@ -2086,7 +1806,7 @@ void gfs2_glock_exit(void) module_param(scand_secs, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); -static int gfs2_glock_iter_next(struct glock_iter *gi) +static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { struct gfs2_glock *gl; @@ -2096,15 +1816,17 @@ restart: if (gl) { gi->gl = hlist_entry(gl->gl_list.next, struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); + } else { + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, + struct gfs2_glock, gl_list); } + if (gi->gl) + gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); if (gl) gfs2_glock_put(gl); - if (gl && gi->gl == NULL) + while (gi->gl == NULL) { gi->hash++; - while(gi->gl == NULL) { if (gi->hash >= GFS2_GL_HASH_SIZE) return 1; read_lock(gl_lock_addr(gi->hash)); @@ -2113,7 +1835,6 @@ restart: if (gi->gl) gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; } if (gi->sdp != gi->gl->gl_sbd) @@ -2122,58 +1843,34 @@ restart: return 0; } -static void gfs2_glock_iter_free(struct glock_iter *gi) +static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) { if (gi->gl) gfs2_glock_put(gi->gl); - kfree(gi); -} - -static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) -{ - struct glock_iter *gi; - - gi = kmalloc(sizeof (*gi), GFP_KERNEL); - if (!gi) - return NULL; - - gi->sdp = sdp; - gi->hash = 0; - gi->seq = NULL; gi->gl = NULL; - memset(gi->string, 0, sizeof(gi->string)); - - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); - return NULL; - } - - return gi; } -static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { - struct glock_iter *gi; + struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - gi = gfs2_glock_iter_init(file->private); - if (!gi) - return NULL; + gi->hash = 0; - while(n--) { + do { if (gfs2_glock_iter_next(gi)) { gfs2_glock_iter_free(gi); return NULL; } - } + } while (n--); - return gi; + return gi->gl; } -static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, +static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { - struct glock_iter *gi = iter_ptr; + struct gfs2_glock_iter *gi = seq->private; (*pos)++; @@ -2182,24 +1879,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, return NULL; } - return gi; + return gi->gl; } -static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { - struct glock_iter *gi = iter_ptr; - if (gi) - gfs2_glock_iter_free(gi); + struct gfs2_glock_iter *gi = seq->private; + gfs2_glock_iter_free(gi); } -static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - struct glock_iter *gi = iter_ptr; - - gi->seq = file; - dump_glock(gi, gi->gl); - - return 0; + return dump_glock(seq, iter_ptr); } static const struct seq_operations gfs2_glock_seq_ops = { @@ -2211,17 +1902,14 @@ static const struct seq_operations gfs2_glock_seq_ops = { static int gfs2_debugfs_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int ret; - - ret = seq_open(file, &gfs2_glock_seq_ops); - if (ret) - return ret; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; + int ret = seq_open_private(file, &gfs2_glock_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + } + return ret; } static const struct file_operations gfs2_debug_fops = { @@ -2229,7 +1917,7 @@ static const struct file_operations gfs2_debug_fops = { .open = gfs2_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index cdad3e6f815..695c6b19361 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -24,13 +24,9 @@ #define GL_ASYNC 0x00000040 #define GL_EXACT 0x00000080 #define GL_SKIP 0x00000100 -#define GL_ATIME 0x00000200 #define GL_NOCACHE 0x00000400 -#define GL_FLOCK 0x00000800 -#define GL_NOCANCEL 0x00001000 #define GLR_TRYFAILED 13 -#define GLR_CANCELED 14 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { @@ -41,6 +37,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * spin_lock(&gl->gl_spin); pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + break; if (gh->gh_owner_pid == pid) goto out; } @@ -70,7 +68,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) { int ret; spin_lock(&gl->gl_spin); - ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); + ret = test_bit(GLF_DEMOTE, &gl->gl_flags); spin_unlock(&gl->gl_spin); return ret; } @@ -98,6 +96,7 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock @@ -130,10 +129,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl); void gfs2_lvb_unhold(struct gfs2_glock *gl); void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); - void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); void gfs2_reclaim_glock(struct gfs2_sbd *sdp); -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); int __init gfs2_glock_init(void); void gfs2_glock_exit(void); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 07d84d16cda..c6c318c2a0f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -13,6 +13,7 @@ #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> +#include <linux/bio.h> #include "gfs2.h" #include "incore.h" @@ -172,26 +173,6 @@ static void inode_go_sync(struct gfs2_glock *gl) } /** - * inode_go_xmote_bh - After promoting/demoting a glock - * @gl: the glock - * - */ - -static void inode_go_xmote_bh(struct gfs2_glock *gl) -{ - struct gfs2_holder *gh = gl->gl_req_gh; - struct buffer_head *bh; - int error; - - if (gl->gl_state != LM_ST_UNLOCKED && - (!gh || !(gh->gh_flags & GL_SKIP))) { - error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh); - if (!error) - brelse(bh); - } -} - -/** * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: @@ -267,6 +248,26 @@ static int inode_go_lock(struct gfs2_holder *gh) } /** + * inode_go_dump - print information about an inode + * @seq: The iterator + * @ip: the inode + * + * Returns: 0 on success, -ENOBUFS when we run out of space + */ + +static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_inode *ip = gl->gl_object; + if (ip == NULL) + return 0; + gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, + IF2DT(ip->i_inode.i_mode), ip->i_flags); + return 0; +} + +/** * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock * @gl: the glock * @@ -306,6 +307,22 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) } /** + * rgrp_go_dump - print out an rgrp + * @seq: The iterator + * @gl: The glock in question + * + */ + +static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_rgrpd *rgd = gl->gl_object; + if (rgd == NULL) + return 0; + gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr); + return 0; +} + +/** * trans_go_sync - promote/demote the transaction glock * @gl: the glock * @state: the requested state @@ -330,7 +347,7 @@ static void trans_go_sync(struct gfs2_glock *gl) * */ -static void trans_go_xmote_bh(struct gfs2_glock *gl) +static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); @@ -338,8 +355,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) struct gfs2_log_header_host head; int error; - if (gl->gl_state != LM_ST_UNLOCKED && - test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); @@ -354,6 +370,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) gfs2_log_pointers_init(sdp, head.lh_blkno); } } + return 0; } /** @@ -375,12 +392,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = { const struct gfs2_glock_operations gfs2_inode_glops = { .go_xmote_th = inode_go_sync, - .go_xmote_bh = inode_go_xmote_bh, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, + .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_min_hold_time = HZ / 10, + .go_min_hold_time = HZ / 5, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { @@ -389,8 +406,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, + .go_dump = rgrp_go_dump, .go_type = LM_TYPE_RGRP, - .go_min_hold_time = HZ / 10, + .go_min_hold_time = HZ / 5, }; const struct gfs2_glock_operations gfs2_trans_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eabe5eac41d..f566ec1b4e8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -77,7 +77,6 @@ struct gfs2_rgrp_host { struct gfs2_rgrpd { struct list_head rd_list; /* Link with superblock */ struct list_head rd_list_mru; - struct list_head rd_recent; /* Recently used rgrps */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ u64 rd_addr; /* grp block disk address */ u64 rd_data0; /* first data location */ @@ -128,20 +127,20 @@ struct gfs2_bufdata { struct gfs2_glock_operations { void (*go_xmote_th) (struct gfs2_glock *gl); - void (*go_xmote_bh) (struct gfs2_glock *gl); + int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); + int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); const int go_type; const unsigned long go_min_hold_time; }; enum { /* States */ - HIF_HOLDER = 6, + HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_FIRST = 7, - HIF_ABORTED = 9, HIF_WAIT = 10, }; @@ -154,20 +153,20 @@ struct gfs2_holder { unsigned gh_flags; int gh_error; - unsigned long gh_iflags; + unsigned long gh_iflags; /* HIF_... */ unsigned long gh_ip; }; enum { - GLF_LOCK = 1, - GLF_STICKY = 2, - GLF_DEMOTE = 3, - GLF_PENDING_DEMOTE = 4, - GLF_DIRTY = 5, - GLF_DEMOTE_IN_PROGRESS = 6, - GLF_LFLUSH = 7, - GLF_WAITERS2 = 8, - GLF_CONV_DEADLK = 9, + GLF_LOCK = 1, + GLF_STICKY = 2, + GLF_DEMOTE = 3, + GLF_PENDING_DEMOTE = 4, + GLF_DEMOTE_IN_PROGRESS = 5, + GLF_DIRTY = 6, + GLF_LFLUSH = 7, + GLF_INVALIDATE_IN_PROGRESS = 8, + GLF_REPLY_PENDING = 9, }; struct gfs2_glock { @@ -179,19 +178,14 @@ struct gfs2_glock { spinlock_t gl_spin; unsigned int gl_state; + unsigned int gl_target; + unsigned int gl_reply; unsigned int gl_hash; unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ - struct pid *gl_owner_pid; - unsigned long gl_ip; struct list_head gl_holders; - struct list_head gl_waiters1; /* HIF_MUTEX */ - struct list_head gl_waiters3; /* HIF_PROMOTE */ const struct gfs2_glock_operations *gl_ops; - - struct gfs2_holder *gl_req_gh; - void *gl_lock; char *gl_lvb; atomic_t gl_lvb_count; @@ -392,20 +386,21 @@ struct gfs2_statfs_change_host { #define GFS2_DATA_ORDERED 2 struct gfs2_args { - char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ - char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ - char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ - int ar_spectator; /* Don't get a journal because we're always RO */ - int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ - int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ - int ar_localcaching; /* Local-style caching (dangerous on multihost) */ - int ar_debug; /* Oops on errors instead of trying to be graceful */ - int ar_upgrade; /* Upgrade ondisk/multihost format */ - unsigned int ar_num_glockd; /* Number of glockd threads */ - int ar_posix_acl; /* Enable posix acls */ - int ar_quota; /* off/account/on */ - int ar_suiddir; /* suiddir support */ - int ar_data; /* ordered/writeback */ + char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ + char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ + char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ + unsigned int ar_spectator:1; /* Don't get a journal */ + unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ + unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ + unsigned int ar_localcaching:1; /* Local caching */ + unsigned int ar_debug:1; /* Oops on errors */ + unsigned int ar_upgrade:1; /* Upgrade ondisk format */ + unsigned int ar_posix_acl:1; /* Enable posix acls */ + unsigned int ar_quota:2; /* off/account/on */ + unsigned int ar_suiddir:1; /* suiddir support */ + unsigned int ar_data:2; /* ordered/writeback */ + unsigned int ar_meta:1; /* mount metafs */ + unsigned int ar_num_glockd; /* Number of glockd threads */ }; struct gfs2_tune { @@ -425,9 +420,7 @@ struct gfs2_tune { unsigned int gt_quota_scale_den; /* Denominator */ unsigned int gt_quota_cache_secs; unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ - unsigned int gt_atime_quantum; /* Min secs between atime updates */ unsigned int gt_new_files_jdata; - unsigned int gt_new_files_directio; unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; @@ -439,7 +432,7 @@ enum { SDF_JOURNAL_CHECKED = 0, SDF_JOURNAL_LIVE = 1, SDF_SHUTDOWN = 2, - SDF_NOATIME = 3, + SDF_NOBARRIERS = 3, }; #define GFS2_FSNAME_LEN 256 @@ -468,7 +461,6 @@ struct gfs2_sb_host { struct gfs2_sbd { struct super_block *sd_vfs; - struct super_block *sd_vfs_meta; struct kobject sd_kobj; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; @@ -506,7 +498,9 @@ struct gfs2_sbd { /* Inode Stuff */ - struct inode *sd_master_dir; + struct dentry *sd_master_dir; + struct dentry *sd_root_dir; + struct inode *sd_jindex; struct inode *sd_inum_inode; struct inode *sd_statfs_inode; @@ -534,7 +528,6 @@ struct gfs2_sbd { struct mutex sd_rindex_mutex; struct list_head sd_rindex_list; struct list_head sd_rindex_mru_list; - struct list_head sd_rindex_recent_list; struct gfs2_rgrpd *sd_rindex_forward; unsigned int sd_rgrps; @@ -642,7 +635,6 @@ struct gfs2_sbd { /* Debugging crud */ unsigned long sd_last_warning; - struct vfsmount *sd_gfs2mnt; struct dentry *debugfs_dir; /* debugfs directory */ struct dentry *debugfs_dentry_glocks; /* for debugfs */ }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 09453d057e4..7cee695fa44 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -18,6 +18,7 @@ #include <linux/crc32.h> #include <linux/lm_interface.h> #include <linux/security.h> +#include <linux/time.h> #include "gfs2.h" #include "incore.h" @@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { struct gfs2_dinode_host *di = &ip->i_di; const struct gfs2_dinode *str = buf; + struct timespec atime; u16 height, depth; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) @@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_size = be64_to_cpu(str->di_size); i_size_write(&ip->i_inode, di->di_size); gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); - ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); - ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); + atime.tv_sec = be64_to_cpu(str->di_atime); + atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); + if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) + ip->i_inode.i_atime = atime; ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); @@ -448,7 +452,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) struct qstr qstr; struct inode *inode; gfs2_str2qstr(&qstr, name); - inode = gfs2_lookupi(dip, &qstr, 1, NULL); + inode = gfs2_lookupi(dip, &qstr, 1); /* gfs2_lookupi has inconsistent callers: vfs * related routines expect NULL for no entry found, * gfs2_lookup_simple callers expect ENOENT @@ -477,7 +481,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) */ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root, struct nameidata *nd) + int is_root) { struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); @@ -504,7 +508,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, } if (!is_root) { - error = permission(dir, MAY_EXEC, NULL); + error = gfs2_permission(dir, MAY_EXEC); if (error) goto out; } @@ -667,7 +671,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, { int error; - error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -789,13 +793,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || gfs2_tune_get(sdp, gt_new_files_jdata)) di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || - gfs2_tune_get(sdp, gt_new_files_directio)) - di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO); } else if (S_ISDIR(mode)) { di->di_flags |= cpu_to_be32(dip->i_di.di_flags & - GFS2_DIF_INHERIT_DIRECTIO); - di->di_flags |= cpu_to_be32(dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA); } @@ -1038,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (bh) brelse(bh); - if (!inode) - return ERR_PTR(-ENOMEM); return inode; fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); - if (inode) + if (inode && !IS_ERR(inode)) iput(inode); fail_gunlock: gfs2_glock_dq(ghs); @@ -1134,7 +1131,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_APPEND(&dip->i_inode)) return -EPERM; - error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -1145,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, return 0; } -/* - * gfs2_ok_to_move - check if it's ok to move a directory to another directory - * @this: move this - * @to: to here - * - * Follow @to back to the root and make sure we don't encounter @this - * Assumes we already hold the rename lock. - * - * Returns: errno - */ - -int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) -{ - struct inode *dir = &to->i_inode; - struct super_block *sb = dir->i_sb; - struct inode *tmp; - struct qstr dotdot; - int error = 0; - - gfs2_str2qstr(&dotdot, ".."); - - igrab(dir); - - for (;;) { - if (dir == &this->i_inode) { - error = -EINVAL; - break; - } - if (dir == sb->s_root->d_inode) { - error = 0; - break; - } - - tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); - if (IS_ERR(tmp)) { - error = PTR_ERR(tmp); - break; - } - - iput(dir); - dir = tmp; - } - - iput(dir); - - return error; -} - /** * gfs2_readlinki - return the contents of a symlink * @ip: the symlink's inode @@ -1212,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) unsigned int x; int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); - error = gfs2_glock_nq_atime(&i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); if (error) { gfs2_holder_uninit(&i_gh); return error; @@ -1248,101 +1197,6 @@ out: return error; } -/** - * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and - * conditionally update the inode's atime - * @gh: the holder to acquire - * - * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap - * Update if the difference between the current time and the inode's current - * atime is greater than an interval specified at mount. - * - * Returns: errno - */ - -int gfs2_glock_nq_atime(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_inode *ip = gl->gl_object; - s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); - unsigned int state; - int flags; - int error; - struct timespec tv = CURRENT_TIME; - - if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || - gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || - gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) - return -EINVAL; - - state = gh->gh_state; - flags = gh->gh_flags; - - error = gfs2_glock_nq(gh); - if (error) - return error; - - if (test_bit(SDF_NOATIME, &sdp->sd_flags) || - (sdp->sd_vfs->s_flags & MS_RDONLY)) - return 0; - - if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { - gfs2_glock_dq(gh); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, - gh); - error = gfs2_glock_nq(gh); - if (error) - return error; - - /* Verify that atime hasn't been updated while we were - trying to get exclusive lock. */ - - tv = CURRENT_TIME; - if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { - struct buffer_head *dibh; - struct gfs2_dinode *di; - - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error == -EROFS) - return 0; - if (error) - goto fail; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - - ip->i_inode.i_atime = tv; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - di = (struct gfs2_dinode *)dibh->b_data; - di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - brelse(dibh); - - gfs2_trans_end(sdp); - } - - /* If someone else has asked for the glock, - unlock and let them have it. Then reacquire - in the original state. */ - if (gfs2_glock_is_blocking(gl)) { - gfs2_glock_dq(gh); - gfs2_holder_reinit(state, flags, gh); - return gfs2_glock_nq(gh); - } - } - - return 0; - -fail_end_trans: - gfs2_trans_end(sdp); -fail: - gfs2_glock_dq(gh); - return error; -} - static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 580da454b38..2d43f69610a 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -72,7 +72,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, } -void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_set_iop(struct inode *inode); struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino, @@ -84,16 +83,15 @@ int gfs2_inode_refresh(struct gfs2_inode *ip); int gfs2_dinode_dealloc(struct gfs2_inode *inode); int gfs2_change_nlink(struct gfs2_inode *ip, int diff); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root, struct nameidata *nd); + int is_root); struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip); int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, const struct gfs2_inode *ip); -int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); +int gfs2_permission(struct inode *inode, int mask); int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); -int gfs2_glock_nq_atime(struct gfs2_holder *gh); int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c index 663fee72878..523243a13a2 100644 --- a/fs/gfs2/locking.c +++ b/fs/gfs2/locking.c @@ -23,12 +23,54 @@ struct lmh_wrapper { const struct lm_lockops *lw_ops; }; +static int nolock_mount(char *table_name, char *host_data, + lm_callback_t cb, void *cb_data, + unsigned int min_lvb_size, int flags, + struct lm_lockstruct *lockstruct, + struct kobject *fskobj); + /* List of registered low-level locking protocols. A file system selects one of them by name at mount time, e.g. lock_nolock, lock_dlm. */ +static const struct lm_lockops nolock_ops = { + .lm_proto_name = "lock_nolock", + .lm_mount = nolock_mount, +}; + +static struct lmh_wrapper nolock_proto = { + .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list), + .lw_ops = &nolock_ops, +}; + static LIST_HEAD(lmh_list); static DEFINE_MUTEX(lmh_lock); +static int nolock_mount(char *table_name, char *host_data, + lm_callback_t cb, void *cb_data, + unsigned int min_lvb_size, int flags, + struct lm_lockstruct *lockstruct, + struct kobject *fskobj) +{ + char *c; + unsigned int jid; + + c = strstr(host_data, "jid="); + if (!c) + jid = 0; + else { + c += 4; + sscanf(c, "%u", &jid); + } + + lockstruct->ls_jid = jid; + lockstruct->ls_first = 1; + lockstruct->ls_lvb_size = min_lvb_size; + lockstruct->ls_ops = &nolock_ops; + lockstruct->ls_flags = LM_LSFLAG_LOCAL; + + return 0; +} + /** * gfs2_register_lockproto - Register a low-level locking protocol * @proto: the protocol definition @@ -116,9 +158,13 @@ int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data, int try = 0; int error, found; + retry: mutex_lock(&lmh_lock); + if (list_empty(&nolock_proto.lw_list)) + list_add(&nolock_proto.lw_list, &lmh_list); + found = 0; list_for_each_entry(lw, &lmh_list, lw_list) { if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { @@ -139,7 +185,8 @@ retry: goto out; } - if (!try_module_get(lw->lw_ops->lm_owner)) { + if (lw->lw_ops->lm_owner && + !try_module_get(lw->lw_ops->lm_owner)) { try = 0; mutex_unlock(&lmh_lock); msleep(1000); @@ -158,7 +205,8 @@ out: void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) { mutex_lock(&lmh_lock); - lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); + if (lockstruct->ls_ops->lm_unmount) + lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); if (lockstruct->ls_ops->lm_owner) module_put(lockstruct->ls_ops->lm_owner); mutex_unlock(&lmh_lock); diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index cf7ea8abec8..2482c904750 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c @@ -11,46 +11,60 @@ static char junk_lvb[GDLM_LVB_SIZE]; -static void queue_complete(struct gdlm_lock *lp) + +/* convert dlm lock-mode to gfs lock-state */ + +static s16 gdlm_make_lmstate(s16 dlmmode) { - struct gdlm_ls *ls = lp->ls; + switch (dlmmode) { + case DLM_LOCK_IV: + case DLM_LOCK_NL: + return LM_ST_UNLOCKED; + case DLM_LOCK_EX: + return LM_ST_EXCLUSIVE; + case DLM_LOCK_CW: + return LM_ST_DEFERRED; + case DLM_LOCK_PR: + return LM_ST_SHARED; + } + gdlm_assert(0, "unknown DLM mode %d", dlmmode); + return -1; +} - clear_bit(LFL_ACTIVE, &lp->flags); +/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm + thread gets to it. */ + +static void queue_submit(struct gdlm_lock *lp) +{ + struct gdlm_ls *ls = lp->ls; spin_lock(&ls->async_lock); - list_add_tail(&lp->clist, &ls->complete); + list_add_tail(&lp->delay_list, &ls->submit); spin_unlock(&ls->async_lock); wake_up(&ls->thread_wait); } -static inline void gdlm_ast(void *astarg) +static void wake_up_ast(struct gdlm_lock *lp) { - queue_complete(astarg); + clear_bit(LFL_AST_WAIT, &lp->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&lp->flags, LFL_AST_WAIT); } -static inline void gdlm_bast(void *astarg, int mode) +static void gdlm_delete_lp(struct gdlm_lock *lp) { - struct gdlm_lock *lp = astarg; struct gdlm_ls *ls = lp->ls; - if (!mode) { - printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number); - return; - } - spin_lock(&ls->async_lock); - if (!lp->bast_mode) { - list_add_tail(&lp->blist, &ls->blocking); - lp->bast_mode = mode; - } else if (lp->bast_mode < mode) - lp->bast_mode = mode; + if (!list_empty(&lp->delay_list)) + list_del_init(&lp->delay_list); + ls->all_locks_count--; spin_unlock(&ls->async_lock); - wake_up(&ls->thread_wait); + + kfree(lp); } -void gdlm_queue_delayed(struct gdlm_lock *lp) +static void gdlm_queue_delayed(struct gdlm_lock *lp) { struct gdlm_ls *ls = lp->ls; @@ -59,6 +73,236 @@ void gdlm_queue_delayed(struct gdlm_lock *lp) spin_unlock(&ls->async_lock); } +static void process_complete(struct gdlm_lock *lp) +{ + struct gdlm_ls *ls = lp->ls; + struct lm_async_cb acb; + + memset(&acb, 0, sizeof(acb)); + + if (lp->lksb.sb_status == -DLM_ECANCEL) { + log_info("complete dlm cancel %x,%llx flags %lx", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number, + lp->flags); + + lp->req = lp->cur; + acb.lc_ret |= LM_OUT_CANCELED; + if (lp->cur == DLM_LOCK_IV) + lp->lksb.sb_lkid = 0; + goto out; + } + + if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { + if (lp->lksb.sb_status != -DLM_EUNLOCK) { + log_info("unlock sb_status %d %x,%llx flags %lx", + lp->lksb.sb_status, lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number, + lp->flags); + return; + } + + lp->cur = DLM_LOCK_IV; + lp->req = DLM_LOCK_IV; + lp->lksb.sb_lkid = 0; + + if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { + gdlm_delete_lp(lp); + return; + } + goto out; + } + + if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) + memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); + + if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { + if (lp->req == DLM_LOCK_PR) + lp->req = DLM_LOCK_CW; + else if (lp->req == DLM_LOCK_CW) + lp->req = DLM_LOCK_PR; + } + + /* + * A canceled lock request. The lock was just taken off the delayed + * list and was never even submitted to dlm. + */ + + if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { + log_info("complete internal cancel %x,%llx", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number); + lp->req = lp->cur; + acb.lc_ret |= LM_OUT_CANCELED; + goto out; + } + + /* + * An error occured. + */ + + if (lp->lksb.sb_status) { + /* a "normal" error */ + if ((lp->lksb.sb_status == -EAGAIN) && + (lp->lkf & DLM_LKF_NOQUEUE)) { + lp->req = lp->cur; + if (lp->cur == DLM_LOCK_IV) + lp->lksb.sb_lkid = 0; + goto out; + } + + /* this could only happen with cancels I think */ + log_info("ast sb_status %d %x,%llx flags %lx", + lp->lksb.sb_status, lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number, + lp->flags); + return; + } + + /* + * This is an AST for an EX->EX conversion for sync_lvb from GFS. + */ + + if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { + wake_up_ast(lp); + return; + } + + /* + * A lock has been demoted to NL because it initially completed during + * BLOCK_LOCKS. Now it must be requested in the originally requested + * mode. + */ + + if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { + gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number); + gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number); + + lp->cur = DLM_LOCK_NL; + lp->req = lp->prev_req; + lp->prev_req = DLM_LOCK_IV; + lp->lkf &= ~DLM_LKF_CONVDEADLK; + + set_bit(LFL_NOCACHE, &lp->flags); + + if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && + !test_bit(LFL_NOBLOCK, &lp->flags)) + gdlm_queue_delayed(lp); + else + queue_submit(lp); + return; + } + + /* + * A request is granted during dlm recovery. It may be granted + * because the locks of a failed node were cleared. In that case, + * there may be inconsistent data beneath this lock and we must wait + * for recovery to complete to use it. When gfs recovery is done this + * granted lock will be converted to NL and then reacquired in this + * granted state. + */ + + if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && + !test_bit(LFL_NOBLOCK, &lp->flags) && + lp->req != DLM_LOCK_NL) { + + lp->cur = lp->req; + lp->prev_req = lp->req; + lp->req = DLM_LOCK_NL; + lp->lkf |= DLM_LKF_CONVERT; + lp->lkf &= ~DLM_LKF_CONVDEADLK; + + log_debug("rereq %x,%llx id %x %d,%d", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number, + lp->lksb.sb_lkid, lp->cur, lp->req); + + set_bit(LFL_REREQUEST, &lp->flags); + queue_submit(lp); + return; + } + + /* + * DLM demoted the lock to NL before it was granted so GFS must be + * told it cannot cache data for this lock. + */ + + if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) + set_bit(LFL_NOCACHE, &lp->flags); + +out: + /* + * This is an internal lock_dlm lock + */ + + if (test_bit(LFL_INLOCK, &lp->flags)) { + clear_bit(LFL_NOBLOCK, &lp->flags); + lp->cur = lp->req; + wake_up_ast(lp); + return; + } + + /* + * Normal completion of a lock request. Tell GFS it now has the lock. + */ + + clear_bit(LFL_NOBLOCK, &lp->flags); + lp->cur = lp->req; + + acb.lc_name = lp->lockname; + acb.lc_ret |= gdlm_make_lmstate(lp->cur); + + ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); +} + +static void gdlm_ast(void *astarg) +{ + struct gdlm_lock *lp = astarg; + clear_bit(LFL_ACTIVE, &lp->flags); + process_complete(lp); +} + +static void process_blocking(struct gdlm_lock *lp, int bast_mode) +{ + struct gdlm_ls *ls = lp->ls; + unsigned int cb = 0; + + switch (gdlm_make_lmstate(bast_mode)) { + case LM_ST_EXCLUSIVE: + cb = LM_CB_NEED_E; + break; + case LM_ST_DEFERRED: + cb = LM_CB_NEED_D; + break; + case LM_ST_SHARED: + cb = LM_CB_NEED_S; + break; + default: + gdlm_assert(0, "unknown bast mode %u", bast_mode); + } + + ls->fscb(ls->sdp, cb, &lp->lockname); +} + + +static void gdlm_bast(void *astarg, int mode) +{ + struct gdlm_lock *lp = astarg; + + if (!mode) { + printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", + lp->lockname.ln_type, + (unsigned long long)lp->lockname.ln_number); + return; + } + + process_blocking(lp, mode); +} + /* convert gfs lock-state to dlm lock-mode */ static s16 make_mode(s16 lmstate) @@ -77,24 +321,6 @@ static s16 make_mode(s16 lmstate) return -1; } -/* convert dlm lock-mode to gfs lock-state */ - -s16 gdlm_make_lmstate(s16 dlmmode) -{ - switch (dlmmode) { - case DLM_LOCK_IV: - case DLM_LOCK_NL: - return LM_ST_UNLOCKED; - case DLM_LOCK_EX: - return LM_ST_EXCLUSIVE; - case DLM_LOCK_CW: - return LM_ST_DEFERRED; - case DLM_LOCK_PR: - return LM_ST_SHARED; - } - gdlm_assert(0, "unknown DLM mode %d", dlmmode); - return -1; -} /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ @@ -134,14 +360,6 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, if (lp->lksb.sb_lkid != 0) { lkf |= DLM_LKF_CONVERT; - - /* Conversion deadlock avoidance by DLM */ - - if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) && - !test_bit(LFL_FORCE_PROMOTE, &lp->flags) && - !(lkf & DLM_LKF_NOQUEUE) && - cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) - lkf |= DLM_LKF_CONVDEADLK; } if (lp->lvb) @@ -173,14 +391,9 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, make_strname(name, &lp->strname); lp->ls = ls; lp->cur = DLM_LOCK_IV; - lp->lvb = NULL; - lp->hold_null = NULL; - INIT_LIST_HEAD(&lp->clist); - INIT_LIST_HEAD(&lp->blist); INIT_LIST_HEAD(&lp->delay_list); spin_lock(&ls->async_lock); - list_add(&lp->all_list, &ls->all_locks); ls->all_locks_count++; spin_unlock(&ls->async_lock); @@ -188,26 +401,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, return 0; } -void gdlm_delete_lp(struct gdlm_lock *lp) -{ - struct gdlm_ls *ls = lp->ls; - - spin_lock(&ls->async_lock); - if (!list_empty(&lp->clist)) - list_del_init(&lp->clist); - if (!list_empty(&lp->blist)) - list_del_init(&lp->blist); - if (!list_empty(&lp->delay_list)) - list_del_init(&lp->delay_list); - gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number); - list_del_init(&lp->all_list); - ls->all_locks_count--; - spin_unlock(&ls->async_lock); - - kfree(lp); -} - int gdlm_get_lock(void *lockspace, struct lm_lockname *name, void **lockp) { @@ -261,7 +454,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { lp->lksb.sb_status = -EAGAIN; - queue_complete(lp); + gdlm_ast(lp); error = 0; } @@ -308,6 +501,12 @@ unsigned int gdlm_lock(void *lock, unsigned int cur_state, { struct gdlm_lock *lp = lock; + if (req_state == LM_ST_UNLOCKED) + return gdlm_unlock(lock, cur_state); + + if (req_state == LM_ST_UNLOCKED) + return gdlm_unlock(lock, cur_state); + clear_bit(LFL_DLM_CANCEL, &lp->flags); if (flags & LM_FLAG_NOEXP) set_bit(LFL_NOBLOCK, &lp->flags); @@ -351,7 +550,7 @@ void gdlm_cancel(void *lock) if (delay_list) { set_bit(LFL_CANCEL, &lp->flags); set_bit(LFL_ACTIVE, &lp->flags); - queue_complete(lp); + gdlm_ast(lp); return; } @@ -507,22 +706,3 @@ void gdlm_submit_delayed(struct gdlm_ls *ls) wake_up(&ls->thread_wait); } -int gdlm_release_all_locks(struct gdlm_ls *ls) -{ - struct gdlm_lock *lp, *safe; - int count = 0; - - spin_lock(&ls->async_lock); - list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) { - list_del_init(&lp->all_list); - - if (lp->lvb && lp->lvb != junk_lvb) - kfree(lp->lvb); - kfree(lp); - count++; - } - spin_unlock(&ls->async_lock); - - return count; -} - diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a243cf69c54..3c98e7c6f93 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -72,19 +72,12 @@ struct gdlm_ls { int recover_jid_done; int recover_jid_status; spinlock_t async_lock; - struct list_head complete; - struct list_head blocking; struct list_head delayed; struct list_head submit; - struct list_head all_locks; u32 all_locks_count; wait_queue_head_t wait_control; - struct task_struct *thread1; - struct task_struct *thread2; + struct task_struct *thread; wait_queue_head_t thread_wait; - unsigned long drop_time; - int drop_locks_count; - int drop_locks_period; }; enum { @@ -117,12 +110,7 @@ struct gdlm_lock { u32 lkf; /* dlm flags DLM_LKF_ */ unsigned long flags; /* lock_dlm flags LFL_ */ - int bast_mode; /* protected by async_lock */ - - struct list_head clist; /* complete */ - struct list_head blist; /* blocking */ struct list_head delay_list; /* delayed */ - struct list_head all_list; /* all locks for the fs */ struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ }; @@ -159,11 +147,7 @@ void gdlm_release_threads(struct gdlm_ls *); /* lock.c */ -s16 gdlm_make_lmstate(s16); -void gdlm_queue_delayed(struct gdlm_lock *); void gdlm_submit_delayed(struct gdlm_ls *); -int gdlm_release_all_locks(struct gdlm_ls *); -void gdlm_delete_lp(struct gdlm_lock *); unsigned int gdlm_do_lock(struct gdlm_lock *); int gdlm_get_lock(void *, struct lm_lockname *, void **); diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 470bdf650b5..0c4cbe6c828 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -22,22 +22,14 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, if (!ls) return NULL; - ls->drop_locks_count = GDLM_DROP_COUNT; - ls->drop_locks_period = GDLM_DROP_PERIOD; ls->fscb = cb; ls->sdp = sdp; ls->fsflags = flags; spin_lock_init(&ls->async_lock); - INIT_LIST_HEAD(&ls->complete); - INIT_LIST_HEAD(&ls->blocking); INIT_LIST_HEAD(&ls->delayed); INIT_LIST_HEAD(&ls->submit); - INIT_LIST_HEAD(&ls->all_locks); init_waitqueue_head(&ls->thread_wait); init_waitqueue_head(&ls->wait_control); - ls->thread1 = NULL; - ls->thread2 = NULL; - ls->drop_time = jiffies; ls->jid = -1; strncpy(buf, table_name, 256); @@ -152,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data, error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), &ls->dlm_lockspace, - DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), + DLM_LSFL_FS | DLM_LSFL_NEWEXCL | + (nodir ? DLM_LSFL_NODIR : 0), GDLM_LVB_SIZE); if (error) { log_error("dlm_new_lockspace error %d", error); @@ -180,7 +173,6 @@ out: static void gdlm_unmount(void *lockspace) { struct gdlm_ls *ls = lockspace; - int rv; log_debug("unmount flags %lx", ls->flags); @@ -194,9 +186,7 @@ static void gdlm_unmount(void *lockspace) gdlm_kobject_release(ls); dlm_release_lockspace(ls->dlm_lockspace, 2); gdlm_release_threads(ls); - rv = gdlm_release_all_locks(ls); - if (rv) - log_info("gdlm_unmount: %d stray locks freed", rv); + BUG_ON(ls->all_locks_count); out: kfree(ls); } @@ -232,7 +222,6 @@ static void gdlm_withdraw(void *lockspace) dlm_release_lockspace(ls->dlm_lockspace, 2); gdlm_release_threads(ls); - gdlm_release_all_locks(ls); gdlm_kobject_release(ls); } diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index a4ff271df9e..4ec571c3d8a 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c @@ -114,17 +114,6 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) return sprintf(buf, "%d\n", ls->recover_jid_status); } -static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) -{ - return sprintf(buf, "%d\n", ls->drop_locks_count); -} - -static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) -{ - ls->drop_locks_count = simple_strtol(buf, NULL, 0); - return len; -} - struct gdlm_attr { struct attribute attr; ssize_t (*show)(struct gdlm_ls *, char *); @@ -144,7 +133,6 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0644, recover_show, recover_store); GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); -GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); static struct attribute *gdlm_attrs[] = { &gdlm_attr_proto_name.attr, @@ -157,7 +145,6 @@ static struct attribute *gdlm_attrs[] = { &gdlm_attr_recover.attr, &gdlm_attr_recover_done.attr, &gdlm_attr_recover_status.attr, - &gdlm_attr_drop_count.attr, NULL, }; diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index e53db6fd28a..38823efd698 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c @@ -9,367 +9,60 @@ #include "lock_dlm.h" -/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm - thread gets to it. */ - -static void queue_submit(struct gdlm_lock *lp) -{ - struct gdlm_ls *ls = lp->ls; - - spin_lock(&ls->async_lock); - list_add_tail(&lp->delay_list, &ls->submit); - spin_unlock(&ls->async_lock); - wake_up(&ls->thread_wait); -} - -static void process_blocking(struct gdlm_lock *lp, int bast_mode) -{ - struct gdlm_ls *ls = lp->ls; - unsigned int cb = 0; - - switch (gdlm_make_lmstate(bast_mode)) { - case LM_ST_EXCLUSIVE: - cb = LM_CB_NEED_E; - break; - case LM_ST_DEFERRED: - cb = LM_CB_NEED_D; - break; - case LM_ST_SHARED: - cb = LM_CB_NEED_S; - break; - default: - gdlm_assert(0, "unknown bast mode %u", lp->bast_mode); - } - - ls->fscb(ls->sdp, cb, &lp->lockname); -} - -static void wake_up_ast(struct gdlm_lock *lp) -{ - clear_bit(LFL_AST_WAIT, &lp->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&lp->flags, LFL_AST_WAIT); -} - -static void process_complete(struct gdlm_lock *lp) -{ - struct gdlm_ls *ls = lp->ls; - struct lm_async_cb acb; - s16 prev_mode = lp->cur; - - memset(&acb, 0, sizeof(acb)); - - if (lp->lksb.sb_status == -DLM_ECANCEL) { - log_info("complete dlm cancel %x,%llx flags %lx", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number, - lp->flags); - - lp->req = lp->cur; - acb.lc_ret |= LM_OUT_CANCELED; - if (lp->cur == DLM_LOCK_IV) - lp->lksb.sb_lkid = 0; - goto out; - } - - if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { - if (lp->lksb.sb_status != -DLM_EUNLOCK) { - log_info("unlock sb_status %d %x,%llx flags %lx", - lp->lksb.sb_status, lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number, - lp->flags); - return; - } - - lp->cur = DLM_LOCK_IV; - lp->req = DLM_LOCK_IV; - lp->lksb.sb_lkid = 0; - - if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { - gdlm_delete_lp(lp); - return; - } - goto out; - } - - if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) - memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); - - if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { - if (lp->req == DLM_LOCK_PR) - lp->req = DLM_LOCK_CW; - else if (lp->req == DLM_LOCK_CW) - lp->req = DLM_LOCK_PR; - } - - /* - * A canceled lock request. The lock was just taken off the delayed - * list and was never even submitted to dlm. - */ - - if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { - log_info("complete internal cancel %x,%llx", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number); - lp->req = lp->cur; - acb.lc_ret |= LM_OUT_CANCELED; - goto out; - } - - /* - * An error occured. - */ - - if (lp->lksb.sb_status) { - /* a "normal" error */ - if ((lp->lksb.sb_status == -EAGAIN) && - (lp->lkf & DLM_LKF_NOQUEUE)) { - lp->req = lp->cur; - if (lp->cur == DLM_LOCK_IV) - lp->lksb.sb_lkid = 0; - goto out; - } - - /* this could only happen with cancels I think */ - log_info("ast sb_status %d %x,%llx flags %lx", - lp->lksb.sb_status, lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number, - lp->flags); - if (lp->lksb.sb_status == -EDEADLOCK && - lp->ls->fsflags & LM_MFLAG_CONV_NODROP) { - lp->req = lp->cur; - acb.lc_ret |= LM_OUT_CONV_DEADLK; - if (lp->cur == DLM_LOCK_IV) - lp->lksb.sb_lkid = 0; - goto out; - } else - return; - } - - /* - * This is an AST for an EX->EX conversion for sync_lvb from GFS. - */ - - if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { - wake_up_ast(lp); - return; - } - - /* - * A lock has been demoted to NL because it initially completed during - * BLOCK_LOCKS. Now it must be requested in the originally requested - * mode. - */ - - if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { - gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number); - gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number); - - lp->cur = DLM_LOCK_NL; - lp->req = lp->prev_req; - lp->prev_req = DLM_LOCK_IV; - lp->lkf &= ~DLM_LKF_CONVDEADLK; - - set_bit(LFL_NOCACHE, &lp->flags); - - if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && - !test_bit(LFL_NOBLOCK, &lp->flags)) - gdlm_queue_delayed(lp); - else - queue_submit(lp); - return; - } - - /* - * A request is granted during dlm recovery. It may be granted - * because the locks of a failed node were cleared. In that case, - * there may be inconsistent data beneath this lock and we must wait - * for recovery to complete to use it. When gfs recovery is done this - * granted lock will be converted to NL and then reacquired in this - * granted state. - */ - - if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && - !test_bit(LFL_NOBLOCK, &lp->flags) && - lp->req != DLM_LOCK_NL) { - - lp->cur = lp->req; - lp->prev_req = lp->req; - lp->req = DLM_LOCK_NL; - lp->lkf |= DLM_LKF_CONVERT; - lp->lkf &= ~DLM_LKF_CONVDEADLK; - - log_debug("rereq %x,%llx id %x %d,%d", - lp->lockname.ln_type, - (unsigned long long)lp->lockname.ln_number, - lp->lksb.sb_lkid, lp->cur, lp->req); - - set_bit(LFL_REREQUEST, &lp->flags); - queue_submit(lp); - return; - } - - /* - * DLM demoted the lock to NL before it was granted so GFS must be - * told it cannot cache data for this lock. - */ - - if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) - set_bit(LFL_NOCACHE, &lp->flags); - -out: - /* - * This is an internal lock_dlm lock - */ - - if (test_bit(LFL_INLOCK, &lp->flags)) { - clear_bit(LFL_NOBLOCK, &lp->flags); - lp->cur = lp->req; - wake_up_ast(lp); - return; - } - - /* - * Normal completion of a lock request. Tell GFS it now has the lock. - */ - - clear_bit(LFL_NOBLOCK, &lp->flags); - lp->cur = lp->req; - - acb.lc_name = lp->lockname; - acb.lc_ret |= gdlm_make_lmstate(lp->cur); - - if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) && - (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL)) - acb.lc_ret |= LM_OUT_CACHEABLE; - - ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); -} - -static inline int no_work(struct gdlm_ls *ls, int blocking) +static inline int no_work(struct gdlm_ls *ls) { int ret; spin_lock(&ls->async_lock); - ret = list_empty(&ls->complete) && list_empty(&ls->submit); - if (ret && blocking) - ret = list_empty(&ls->blocking); + ret = list_empty(&ls->submit); spin_unlock(&ls->async_lock); return ret; } -static inline int check_drop(struct gdlm_ls *ls) -{ - if (!ls->drop_locks_count) - return 0; - - if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) { - ls->drop_time = jiffies; - if (ls->all_locks_count >= ls->drop_locks_count) - return 1; - } - return 0; -} - -static int gdlm_thread(void *data, int blist) +static int gdlm_thread(void *data) { struct gdlm_ls *ls = (struct gdlm_ls *) data; struct gdlm_lock *lp = NULL; - uint8_t complete, blocking, submit, drop; - - /* Only thread1 is allowed to do blocking callbacks since gfs - may wait for a completion callback within a blocking cb. */ while (!kthread_should_stop()) { wait_event_interruptible(ls->thread_wait, - !no_work(ls, blist) || kthread_should_stop()); - - complete = blocking = submit = drop = 0; + !no_work(ls) || kthread_should_stop()); spin_lock(&ls->async_lock); - if (blist && !list_empty(&ls->blocking)) { - lp = list_entry(ls->blocking.next, struct gdlm_lock, - blist); - list_del_init(&lp->blist); - blocking = lp->bast_mode; - lp->bast_mode = 0; - } else if (!list_empty(&ls->complete)) { - lp = list_entry(ls->complete.next, struct gdlm_lock, - clist); - list_del_init(&lp->clist); - complete = 1; - } else if (!list_empty(&ls->submit)) { + if (!list_empty(&ls->submit)) { lp = list_entry(ls->submit.next, struct gdlm_lock, delay_list); list_del_init(&lp->delay_list); - submit = 1; + spin_unlock(&ls->async_lock); + gdlm_do_lock(lp); + spin_lock(&ls->async_lock); } - - drop = check_drop(ls); spin_unlock(&ls->async_lock); - - if (complete) - process_complete(lp); - - else if (blocking) - process_blocking(lp, blocking); - - else if (submit) - gdlm_do_lock(lp); - - if (drop) - ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL); - - schedule(); } return 0; } -static int gdlm_thread1(void *data) -{ - return gdlm_thread(data, 1); -} - -static int gdlm_thread2(void *data) -{ - return gdlm_thread(data, 0); -} - int gdlm_init_threads(struct gdlm_ls *ls) { struct task_struct *p; int error; - p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); - error = IS_ERR(p); - if (error) { - log_error("can't start lock_dlm1 thread %d", error); - return error; - } - ls->thread1 = p; - - p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); + p = kthread_run(gdlm_thread, ls, "lock_dlm"); error = IS_ERR(p); if (error) { - log_error("can't start lock_dlm2 thread %d", error); - kthread_stop(ls->thread1); + log_error("can't start lock_dlm thread %d", error); return error; } - ls->thread2 = p; + ls->thread = p; return 0; } void gdlm_release_threads(struct gdlm_ls *ls) { - kthread_stop(ls->thread1); - kthread_stop(ls->thread2); + kthread_stop(ls->thread); } diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile deleted file mode 100644 index 35e9730bc3a..00000000000 --- a/fs/gfs2/locking/nolock/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o -lock_nolock-y := main.o - diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c deleted file mode 100644 index 284a5ece8d9..00000000000 --- a/fs/gfs2/locking/nolock/main.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/lm_interface.h> - -struct nolock_lockspace { - unsigned int nl_lvb_size; -}; - -static const struct lm_lockops nolock_ops; - -static int nolock_mount(char *table_name, char *host_data, - lm_callback_t cb, void *cb_data, - unsigned int min_lvb_size, int flags, - struct lm_lockstruct *lockstruct, - struct kobject *fskobj) -{ - char *c; - unsigned int jid; - struct nolock_lockspace *nl; - - c = strstr(host_data, "jid="); - if (!c) - jid = 0; - else { - c += 4; - sscanf(c, "%u", &jid); - } - - nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL); - if (!nl) - return -ENOMEM; - - nl->nl_lvb_size = min_lvb_size; - - lockstruct->ls_jid = jid; - lockstruct->ls_first = 1; - lockstruct->ls_lvb_size = min_lvb_size; - lockstruct->ls_lockspace = nl; - lockstruct->ls_ops = &nolock_ops; - lockstruct->ls_flags = LM_LSFLAG_LOCAL; - - return 0; -} - -static void nolock_others_may_mount(void *lockspace) -{ -} - -static void nolock_unmount(void *lockspace) -{ - struct nolock_lockspace *nl = lockspace; - kfree(nl); -} - -static void nolock_withdraw(void *lockspace) -{ -} - -/** - * nolock_get_lock - get a lm_lock_t given a descripton of the lock - * @lockspace: the lockspace the lock lives in - * @name: the name of the lock - * @lockp: return the lm_lock_t here - * - * Returns: 0 on success, -EXXX on failure - */ - -static int nolock_get_lock(void *lockspace, struct lm_lockname *name, - void **lockp) -{ - *lockp = lockspace; - return 0; -} - -/** - * nolock_put_lock - get rid of a lock structure - * @lock: the lock to throw away - * - */ - -static void nolock_put_lock(void *lock) -{ -} - -/** - * nolock_lock - acquire a lock - * @lock: the lock to manipulate - * @cur_state: the current state - * @req_state: the requested state - * @flags: modifier flags - * - * Returns: A bitmap of LM_OUT_* - */ - -static unsigned int nolock_lock(void *lock, unsigned int cur_state, - unsigned int req_state, unsigned int flags) -{ - return req_state | LM_OUT_CACHEABLE; -} - -/** - * nolock_unlock - unlock a lock - * @lock: the lock to manipulate - * @cur_state: the current state - * - * Returns: 0 - */ - -static unsigned int nolock_unlock(void *lock, unsigned int cur_state) -{ - return 0; -} - -static void nolock_cancel(void *lock) -{ -} - -/** - * nolock_hold_lvb - hold on to a lock value block - * @lock: the lock the LVB is associated with - * @lvbp: return the lm_lvb_t here - * - * Returns: 0 on success, -EXXX on failure - */ - -static int nolock_hold_lvb(void *lock, char **lvbp) -{ - struct nolock_lockspace *nl = lock; - int error = 0; - - *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS); - if (!*lvbp) - error = -ENOMEM; - - return error; -} - -/** - * nolock_unhold_lvb - release a LVB - * @lock: the lock the LVB is associated with - * @lvb: the lock value block - * - */ - -static void nolock_unhold_lvb(void *lock, char *lvb) -{ - kfree(lvb); -} - -static int nolock_plock_get(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - posix_test_lock(file, fl); - - return 0; -} - -static int nolock_plock(void *lockspace, struct lm_lockname *name, - struct file *file, int cmd, struct file_lock *fl) -{ - int error; - error = posix_lock_file_wait(file, fl); - return error; -} - -static int nolock_punlock(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - int error; - error = posix_lock_file_wait(file, fl); - return error; -} - -static void nolock_recovery_done(void *lockspace, unsigned int jid, - unsigned int message) -{ -} - -static const struct lm_lockops nolock_ops = { - .lm_proto_name = "lock_nolock", - .lm_mount = nolock_mount, - .lm_others_may_mount = nolock_others_may_mount, - .lm_unmount = nolock_unmount, - .lm_withdraw = nolock_withdraw, - .lm_get_lock = nolock_get_lock, - .lm_put_lock = nolock_put_lock, - .lm_lock = nolock_lock, - .lm_unlock = nolock_unlock, - .lm_cancel = nolock_cancel, - .lm_hold_lvb = nolock_hold_lvb, - .lm_unhold_lvb = nolock_unhold_lvb, - .lm_plock_get = nolock_plock_get, - .lm_plock = nolock_plock, - .lm_punlock = nolock_punlock, - .lm_recovery_done = nolock_recovery_done, - .lm_owner = THIS_MODULE, -}; - -static int __init init_nolock(void) -{ - int error; - - error = gfs2_register_lockproto(&nolock_ops); - if (error) { - printk(KERN_WARNING - "lock_nolock: can't register protocol: %d\n", error); - return error; - } - - printk(KERN_INFO - "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__); - return 0; -} - -static void __exit exit_nolock(void) -{ - gfs2_unregister_lockproto(&nolock_ops); -} - -module_init(init_nolock); -module_exit(exit_nolock); - -MODULE_DESCRIPTION("GFS Nolock Locking Module"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 548264b1836..ad305854bdc 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/bio.h> #include "gfs2.h" #include "incore.h" @@ -87,6 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) */ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +__releases(&sdp->sd_log_lock) +__acquires(&sdp->sd_log_lock) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -582,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); clear_buffer_dirty(bh); - unlock_buffer(bh); gfs2_ail1_empty(sdp, 0); tail = current_tail(sdp); @@ -599,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); lh->lh_hash = cpu_to_be32(hash); - set_buffer_dirty(bh); - if (sync_dirty_buffer(bh)) + bh->b_end_io = end_buffer_write_sync; + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) + goto skip_barrier; + get_bh(bh); + submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); + wait_on_buffer(bh); + if (buffer_eopnotsupp(bh)) { + clear_buffer_eopnotsupp(bh); + set_buffer_uptodate(bh); + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); + lock_buffer(bh); +skip_barrier: + get_bh(bh); + submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); + wait_on_buffer(bh); + } + if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); brelse(bh); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 77115281650..7c64510ccfd 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -21,6 +21,7 @@ */ static inline void gfs2_log_lock(struct gfs2_sbd *sdp) +__acquires(&sdp->sd_log_lock) { spin_lock(&sdp->sd_log_lock); } @@ -32,6 +33,7 @@ static inline void gfs2_log_lock(struct gfs2_sbd *sdp) */ static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) +__releases(&sdp->sd_log_lock) { spin_unlock(&sdp->sd_log_lock); } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 053e2ebbbd5..bb2cc303ac2 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -24,7 +24,7 @@ #include "util.h" #include "glock.h" -static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -33,15 +33,13 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) ip->i_alloc = NULL; } -static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; INIT_HLIST_NODE(&gl->gl_list); spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); - INIT_LIST_HEAD(&gl->gl_waiters1); - INIT_LIST_HEAD(&gl->gl_waiters3); gl->gl_lvb = NULL; atomic_set(&gl->gl_lvb_count, 0); INIT_LIST_HEAD(&gl->gl_reclaim); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 78d75f892f8..09853620c95 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -129,7 +129,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) } /** - * getbuf - Get a buffer with a given address space + * gfs2_getbuf - Get a buffer with a given address space * @gl: the glock * @blkno: the block number (filesystem scope) * @create: 1 if the buffer should be created @@ -137,7 +137,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) * Returns: the buffer */ -static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create) +struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { struct address_space *mapping = gl->gl_aspace->i_mapping; struct gfs2_sbd *sdp = gl->gl_sbd; @@ -205,7 +205,7 @@ static void meta_prep_new(struct buffer_head *bh) struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) { struct buffer_head *bh; - bh = getbuf(gl, blkno, CREATE); + bh = gfs2_getbuf(gl, blkno, CREATE); meta_prep_new(bh); return bh; } @@ -223,7 +223,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - *bhp = getbuf(gl, blkno, CREATE); + *bhp = gfs2_getbuf(gl, blkno, CREATE); if (!buffer_uptodate(*bhp)) { ll_rw_block(READ_META, 1, bhp); if (flags & DIO_WAIT) { @@ -346,7 +346,7 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; while (blen) { - bh = getbuf(ip->i_gl, bstart, NO_CREATE); + bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE); if (bh) { lock_buffer(bh); gfs2_log_lock(sdp); @@ -421,7 +421,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (extlen > max_ra) extlen = max_ra; - first_bh = getbuf(gl, dblock, CREATE); + first_bh = gfs2_getbuf(gl, dblock, CREATE); if (buffer_uptodate(first_bh)) goto out; @@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) extlen--; while (extlen) { - bh = getbuf(gl, dblock, CREATE); + bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) ll_rw_block(READA, 1, &bh); diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 73e3b1c76fe..b1a5f3674d4 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -47,6 +47,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp); int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta); diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f95..f96eb90a2cf 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -42,10 +42,11 @@ enum { Opt_nosuiddir, Opt_data_writeback, Opt_data_ordered, + Opt_meta, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_lockproto, "lockproto=%s"}, {Opt_locktable, "locktable=%s"}, {Opt_hostdata, "hostdata=%s"}, @@ -66,6 +67,7 @@ static match_table_t tokens = { {Opt_nosuiddir, "nosuiddir"}, {Opt_data_writeback, "data=writeback"}, {Opt_data_ordered, "data=ordered"}, + {Opt_meta, "meta"}, {Opt_err, NULL} }; @@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) case Opt_data_ordered: args->ar_data = GFS2_DATA_ORDERED; break; + case Opt_meta: + if (remount && args->ar_meta != 1) + goto cant_remount; + args->ar_meta = 1; + break; case Opt_err: default: fs_info(sdp, "unknown option: %s\n", o); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index f55394e57cb..27563816e1c 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -499,34 +499,34 @@ static int __gfs2_readpage(void *file, struct page *page) * @file: The file to read * @page: The page of the file * - * This deals with the locking required. We use a trylock in order to - * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE - * in the event that we are unable to get the lock. + * This deals with the locking required. We have to unlock and + * relock the page in order to get the locking in the right + * order. */ static int gfs2_readpage(struct file *file, struct page *page) { - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - struct gfs2_holder *gh; + struct address_space *mapping = page->mapping; + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_holder gh; int error; - gh = gfs2_glock_is_locked_by_me(ip->i_gl); - if (!gh) { - gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); - if (!gh) - return -ENOBUFS; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); + unlock_page(page); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + error = gfs2_glock_nq(&gh); + if (unlikely(error)) + goto out; + error = AOP_TRUNCATED_PAGE; + lock_page(page); + if (page->mapping == mapping && !PageUptodate(page)) + error = __gfs2_readpage(file, page); + else unlock_page(page); - error = gfs2_glock_nq_atime(gh); - if (likely(error != 0)) - goto out; - return AOP_TRUNCATED_PAGE; - } - error = __gfs2_readpage(file, page); - gfs2_glock_dq(gh); + gfs2_glock_dq(&gh); out: - gfs2_holder_uninit(gh); - kfree(gh); + gfs2_holder_uninit(&gh); + if (error && error != AOP_TRUNCATED_PAGE) + lock_page(page); return error; } @@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, struct gfs2_holder gh; int ret; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); - ret = gfs2_glock_nq_atime(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + ret = gfs2_glock_nq(&gh); if (unlikely(ret)) goto out_uninit; if (!gfs2_is_stuffed(ip)) @@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, unsigned to = from + len; struct page *page; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); - error = gfs2_glock_nq_atime(&ip->i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) goto out_uninit; @@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) if (gfs2_is_stuffed(ip)) return 0; - if (offset > i_size_read(&ip->i_inode)) + if (offset >= i_size_read(&ip->i_inode)) return 0; return 1; } @@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, * unfortunately have the option of only flushing a range like * the VFS does. */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); - rv = gfs2_glock_nq_atime(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + rv = gfs2_glock_nq(&gh); if (rv) return rv; rv = gfs2_ok_for_dio(ip, rw, offset); diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 990d9f4bc46..9cda8536530 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) struct dentry *dentry; gfs2_str2qstr(&dotdot, ".."); - inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL); + inode = gfs2_lookupi(child->d_inode, &dotdot, 1); if (!inode) return ERR_PTR(-ENOENT); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e1b7d525a06..3a747f8e218 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -15,6 +15,7 @@ #include <linux/uio.h> #include <linux/blkdev.h> #include <linux/mm.h> +#include <linux/mount.h> #include <linux/fs.h> #include <linux/gfs2_ondisk.h> #include <linux/ext2_fs.h> @@ -62,11 +63,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (!error) { - error = remote_llseek(file, offset, origin); + error = generic_file_llseek_unlocked(file, offset, origin); gfs2_glock_dq_uninit(&i_gh); } } else - error = remote_llseek(file, offset, origin); + error = generic_file_llseek_unlocked(file, offset, origin); return error; } @@ -88,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) u64 offset = file->f_pos; int error; - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); - error = gfs2_glock_nq_atime(&d_gh); + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + error = gfs2_glock_nq(&d_gh); if (error) { gfs2_holder_uninit(&d_gh); return error; @@ -133,7 +134,6 @@ static const u32 fsflags_to_gfs2[32] = { [7] = GFS2_DIF_NOATIME, [12] = GFS2_DIF_EXHASH, [14] = GFS2_DIF_INHERIT_JDATA, - [20] = GFS2_DIF_INHERIT_DIRECTIO, }; static const u32 gfs2_to_fsflags[32] = { @@ -142,7 +142,6 @@ static const u32 gfs2_to_fsflags[32] = { [gfs2fl_AppendOnly] = FS_APPEND_FL, [gfs2fl_NoAtime] = FS_NOATIME_FL, [gfs2fl_ExHash] = FS_INDEX_FL, - [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL, [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, }; @@ -154,18 +153,14 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) int error; u32 fsflags; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); - error = gfs2_glock_nq_atime(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + error = gfs2_glock_nq(&gh); if (error) return error; fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); - if (!S_ISDIR(inode->i_mode)) { - if (ip->i_di.di_flags & GFS2_DIF_JDATA) - fsflags |= FS_JOURNAL_DATA_FL; - if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) - fsflags |= FS_DIRECTIO_FL; - } + if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA) + fsflags |= FS_JOURNAL_DATA_FL; if (put_user(fsflags, ptr)) error = -EFAULT; @@ -194,13 +189,11 @@ void gfs2_set_inode_flags(struct inode *inode) /* Flags that can be set by user space */ #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ - GFS2_DIF_DIRECTIO| \ GFS2_DIF_IMMUTABLE| \ GFS2_DIF_APPENDONLY| \ GFS2_DIF_NOATIME| \ GFS2_DIF_SYNC| \ GFS2_DIF_SYSTEM| \ - GFS2_DIF_INHERIT_DIRECTIO| \ GFS2_DIF_INHERIT_JDATA) /** @@ -220,10 +213,14 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) int error; u32 new_flags, flags; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + error = mnt_want_write(filp->f_path.mnt); if (error) return error; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (error) + goto out_drop_write; + flags = ip->i_di.di_flags; new_flags = (flags & ~mask) | (reqflags & mask); if ((new_flags ^ flags) == 0) @@ -242,7 +239,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) !capable(CAP_LINUX_IMMUTABLE)) goto out; if (!IS_IMMUTABLE(inode)) { - error = permission(inode, MAY_WRITE, NULL); + error = gfs2_permission(inode, MAY_WRITE); if (error) goto out; } @@ -272,6 +269,8 @@ out_trans_end: gfs2_trans_end(sdp); out: gfs2_glock_dq_uninit(&gh); +out_drop_write: + mnt_drop_write(filp->f_path.mnt); return error; } @@ -285,8 +284,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) if (!S_ISDIR(inode->i_mode)) { if (gfsflags & GFS2_DIF_INHERIT_JDATA) gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); - if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO) - gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO); return do_gfs2_set_flags(filp, gfsflags, ~0); } return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); @@ -354,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) struct gfs2_alloc *al; int ret; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); - ret = gfs2_glock_nq_atime(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); if (ret) goto out; @@ -437,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) struct gfs2_holder i_gh; int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); - error = gfs2_glock_nq_atime(&i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); if (error) { gfs2_holder_uninit(&i_gh); return error; @@ -487,11 +484,6 @@ static int gfs2_open(struct inode *inode, struct file *file) goto fail_gunlock; } - /* Listen to the Direct I/O flag */ - - if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) - file->f_flags |= O_DIRECT; - gfs2_glock_dq_uninit(&i_gh); } @@ -669,8 +661,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE - | GL_FLOCK; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; mutex_lock(&fp->f_fl_mutex); @@ -683,9 +674,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) gfs2_glock_dq_wait(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { - error = gfs2_glock_get(GFS2_SB(&ip->i_inode), - ip->i_no_addr, &gfs2_flock_glops, - CREATE, &gl); + error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, + &gfs2_flock_glops, CREATE, &gl); if (error) goto out; gfs2_holder_init(gl, state, flags, fl_gh); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b2028c82e8d..b117fcf2c4f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -40,6 +40,44 @@ #define DO 0 #define UNDO 1 +static const u32 gfs2_old_fs_formats[] = { + 0 +}; + +static const u32 gfs2_old_multihost_formats[] = { + 0 +}; + +/** + * gfs2_tune_init - Fill a gfs2_tune structure with default values + * @gt: tune + * + */ + +static void gfs2_tune_init(struct gfs2_tune *gt) +{ + spin_lock_init(>->gt_spin); + + gt->gt_demote_secs = 300; + gt->gt_incore_log_blocks = 1024; + gt->gt_log_flush_secs = 60; + gt->gt_recoverd_secs = 60; + gt->gt_logd_secs = 1; + gt->gt_quotad_secs = 5; + gt->gt_quota_simul_sync = 64; + gt->gt_quota_warn_period = 10; + gt->gt_quota_scale_num = 1; + gt->gt_quota_scale_den = 1; + gt->gt_quota_cache_secs = 300; + gt->gt_quota_quantum = 60; + gt->gt_new_files_jdata = 0; + gt->gt_max_readahead = 1 << 18; + gt->gt_stall_secs = 600; + gt->gt_complain_secs = 10; + gt->gt_statfs_quantum = 30; + gt->gt_statfs_slow = 0; +} + static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; @@ -64,7 +102,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) mutex_init(&sdp->sd_rindex_mutex); INIT_LIST_HEAD(&sdp->sd_rindex_list); INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); - INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); INIT_LIST_HEAD(&sdp->sd_jindex_list); spin_lock_init(&sdp->sd_jindex_spin); @@ -97,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) return sdp; } -static void init_vfs(struct super_block *sb, unsigned noatime) + +/** + * gfs2_check_sb - Check superblock + * @sdp: the filesystem + * @sb: The superblock + * @silent: Don't print a message if the check fails + * + * Checks the version code of the FS is one that we understand how to + * read and that the sizes of the various on-disk structures have not + * changed. + */ + +static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) { - struct gfs2_sbd *sdp = sb->s_fs_info; + unsigned int x; - sb->s_magic = GFS2_MAGIC; - sb->s_op = &gfs2_super_ops; - sb->s_export_op = &gfs2_export_ops; - sb->s_time_gran = 1; - sb->s_maxbytes = MAX_LFS_FILESIZE; + if (sb->sb_magic != GFS2_MAGIC || + sb->sb_type != GFS2_METATYPE_SB) { + if (!silent) + printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); + return -EINVAL; + } + + /* If format numbers match exactly, we're done. */ + + if (sb->sb_fs_format == GFS2_FORMAT_FS && + sb->sb_multihost_format == GFS2_FORMAT_MULTI) + return 0; + + if (sb->sb_fs_format != GFS2_FORMAT_FS) { + for (x = 0; gfs2_old_fs_formats[x]; x++) + if (gfs2_old_fs_formats[x] == sb->sb_fs_format) + break; + + if (!gfs2_old_fs_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + for (x = 0; gfs2_old_multihost_formats[x]; x++) + if (gfs2_old_multihost_formats[x] == + sb->sb_multihost_format) + break; + + if (!gfs2_old_multihost_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_INFO + "GFS2: Use the \"upgrade\" mount option to upgrade " + "the FS\n"); + printk(KERN_INFO "GFS2: See the manual for more details\n"); + return -EINVAL; + } + + return 0; +} + +static void end_bio_io_page(struct bio *bio, int error) +{ + struct page *page = bio->bi_private; - if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) - set_bit(noatime, &sdp->sd_flags); + if (!error) + SetPageUptodate(page); + else + printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); + unlock_page(page); +} + +static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) +{ + const struct gfs2_sb *str = buf; + + sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); + sb->sb_type = be32_to_cpu(str->sb_header.mh_type); + sb->sb_format = be32_to_cpu(str->sb_header.mh_format); + sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); + sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); + sb->sb_bsize = be32_to_cpu(str->sb_bsize); + sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); + sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); + sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); + sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); + sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); + + memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); + memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); +} + +/** + * gfs2_read_super - Read the gfs2 super block from disk + * @sdp: The GFS2 super block + * @sector: The location of the super block + * @error: The error code to return + * + * This uses the bio functions to read the super block from disk + * because we want to be 100% sure that we never read cached data. + * A super block is read twice only during each GFS2 mount and is + * never written to by the filesystem. The first time its read no + * locks are held, and the only details which are looked at are those + * relating to the locking protocol. Once locking is up and working, + * the sb is read again under the lock to establish the location of + * the master directory (contains pointers to journals etc) and the + * root directory. + * + * Returns: 0 on success or error + */ + +static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) +{ + struct super_block *sb = sdp->sd_vfs; + struct gfs2_sb *p; + struct page *page; + struct bio *bio; + + page = alloc_page(GFP_NOFS); + if (unlikely(!page)) + return -ENOBUFS; + + ClearPageUptodate(page); + ClearPageDirty(page); + lock_page(page); + + bio = bio_alloc(GFP_NOFS, 1); + if (unlikely(!bio)) { + __free_page(page); + return -ENOBUFS; + } - /* Don't let the VFS update atimes. GFS2 handles this itself. */ - sb->s_flags |= MS_NOATIME | MS_NODIRATIME; + bio->bi_sector = sector * (sb->s_blocksize >> 9); + bio->bi_bdev = sb->s_bdev; + bio_add_page(bio, page, PAGE_SIZE, 0); + + bio->bi_end_io = end_bio_io_page; + bio->bi_private = page; + submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); + wait_on_page_locked(page); + bio_put(bio); + if (!PageUptodate(page)) { + __free_page(page); + return -EIO; + } + p = kmap(page); + gfs2_sb_in(&sdp->sd_sb, p); + kunmap(page); + __free_page(page); + return 0; +} +/** + * gfs2_read_sb - Read super block + * @sdp: The GFS2 superblock + * @gl: the glock for the superblock (assumed to be held) + * @silent: Don't print message if mount fails + * + */ + +static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) +{ + u32 hash_blocks, ind_blocks, leaf_blocks; + u32 tmp_blocks; + unsigned int x; + int error; + + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + if (error) { + if (!silent) + fs_err(sdp, "can't read superblock\n"); + return error; + } + + error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); + if (error) + return error; + + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - + GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) / sizeof(u64); + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / sizeof(u64); + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); + sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_quota_change); + + /* Compute maximum reservation required to add a entry to a directory */ + + hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), + sdp->sd_jbsize); + + ind_blocks = 0; + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { + tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); + ind_blocks += tmp_blocks; + } + + leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; + + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; + + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; + for (x = 2;; x++) { + u64 space, d; + u32 m; + + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_heightsize[x - 1] || m) + break; + sdp->sd_heightsize[x] = space; + } + sdp->sd_max_height = x; + sdp->sd_heightsize[x] = ~0; + gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); + + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; + for (x = 2;; x++) { + u64 space, d; + u32 m; + + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_jheightsize[x - 1] || m) + break; + sdp->sd_jheightsize[x] = space; + } + sdp->sd_max_jheight = x; + sdp->sd_jheightsize[x] = ~0; + gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); + + return 0; } static int init_names(struct gfs2_sbd *sdp, int silent) @@ -225,51 +512,59 @@ fail: return error; } -static inline struct inode *gfs2_lookup_root(struct super_block *sb, - u64 no_addr) +static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, + u64 no_addr, const char *name) { - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); + struct gfs2_sbd *sdp = sb->s_fs_info; + struct dentry *dentry; + struct inode *inode; + + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); + if (IS_ERR(inode)) { + fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); + return PTR_ERR(inode); + } + dentry = d_alloc_root(inode); + if (!dentry) { + fs_err(sdp, "can't alloc %s dentry\n", name); + iput(inode); + return -ENOMEM; + } + dentry->d_op = &gfs2_dops; + *dptr = dentry; + return 0; } -static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) +static int init_sb(struct gfs2_sbd *sdp, int silent) { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder sb_gh; u64 no_addr; - struct inode *inode; - int error = 0; + int ret; - if (undo) { - if (sb->s_root) { - dput(sb->s_root); - sb->s_root = NULL; - } - return 0; + ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, + LM_ST_SHARED, 0, &sb_gh); + if (ret) { + fs_err(sdp, "can't acquire superblock glock: %d\n", ret); + return ret; } - error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, - LM_ST_SHARED, 0, &sb_gh); - if (error) { - fs_err(sdp, "can't acquire superblock glock: %d\n", error); - return error; - } - - error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); - if (error) { - fs_err(sdp, "can't read superblock: %d\n", error); + ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); + if (ret) { + fs_err(sdp, "can't read superblock: %d\n", ret); goto out; } /* Set up the buffer cache and SB for real */ if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { - error = -EINVAL; + ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too small for device " "block size (%u)\n", sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); goto out; } if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { - error = -EINVAL; + ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too big for machine " "page size (%u)\n", sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); @@ -279,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) /* Get the root inode */ no_addr = sdp->sd_sb.sb_root_dir.no_addr; - if (sb->s_type == &gfs2meta_fs_type) - no_addr = sdp->sd_sb.sb_master_dir.no_addr; - inode = gfs2_lookup_root(sb, no_addr); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - fs_err(sdp, "can't read in root inode: %d\n", error); + ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); + if (ret) goto out; - } - sb->s_root = d_alloc_root(inode); - if (!sb->s_root) { - fs_err(sdp, "can't get root dentry\n"); - error = -ENOMEM; - iput(inode); - } else - sb->s_root->d_op = &gfs2_dops; - + /* Get the master inode */ + no_addr = sdp->sd_sb.sb_master_dir.no_addr; + ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); + if (ret) { + dput(sdp->sd_root_dir); + goto out; + } + sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); out: gfs2_glock_dq_uninit(&sb_gh); - return error; + return ret; } /** @@ -364,6 +654,8 @@ static int map_journal_extents(struct gfs2_sbd *sdp) static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) { + if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount) + return; if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) sdp->sd_lockstruct.ls_ops->lm_others_may_mount( sdp->sd_lockstruct.ls_lockspace); @@ -371,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) static int init_journal(struct gfs2_sbd *sdp, int undo) { + struct inode *master = sdp->sd_master_dir->d_inode; struct gfs2_holder ji_gh; struct task_struct *p; struct gfs2_inode *ip; @@ -382,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) goto fail_recoverd; } - sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); + sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); if (IS_ERR(sdp->sd_jindex)) { fs_err(sdp, "can't lookup journal index: %d\n", error); return PTR_ERR(sdp->sd_jindex); @@ -505,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) { int error = 0; struct gfs2_inode *ip; - struct inode *inode; + struct inode *master = sdp->sd_master_dir->d_inode; if (undo) goto fail_qinode; - inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - fs_err(sdp, "can't read in master directory: %d\n", error); - goto fail; - } - sdp->sd_master_dir = inode; - error = init_journal(sdp, undo); if (error) - goto fail_master; + goto fail; /* Read in the master inode number inode */ - sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); + sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); if (IS_ERR(sdp->sd_inum_inode)) { error = PTR_ERR(sdp->sd_inum_inode); fs_err(sdp, "can't read in inum inode: %d\n", error); @@ -532,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) /* Read in the master statfs inode */ - sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); + sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); if (IS_ERR(sdp->sd_statfs_inode)) { error = PTR_ERR(sdp->sd_statfs_inode); fs_err(sdp, "can't read in statfs inode: %d\n", error); @@ -540,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) } /* Read in the resource index inode */ - sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); + sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); if (IS_ERR(sdp->sd_rindex)) { error = PTR_ERR(sdp->sd_rindex); fs_err(sdp, "can't get resource index inode: %d\n", error); @@ -551,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) sdp->sd_rindex_uptodate = 0; /* Read in the quota inode */ - sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); + sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); if (IS_ERR(sdp->sd_quota_inode)) { error = PTR_ERR(sdp->sd_quota_inode); fs_err(sdp, "can't get quota file inode: %d\n", error); @@ -570,8 +855,6 @@ fail_inum: iput(sdp->sd_inum_inode); fail_journal: init_journal(sdp, UNDO); -fail_master: - iput(sdp->sd_master_dir); fail: return error; } @@ -582,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) char buf[30]; int error = 0; struct gfs2_inode *ip; + struct inode *master = sdp->sd_master_dir->d_inode; if (sdp->sd_args.ar_spectator) return 0; @@ -589,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_qc_gh; - pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); + pn = gfs2_lookup_simple(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -741,8 +1025,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) goto out; } - if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || - gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || + if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= GFS2_MIN_LVB_SIZE)) { gfs2_unmount_lockproto(&sdp->sd_lockstruct); @@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) goto fail; } - init_vfs(sb, SDF_NOATIME); + sb->s_magic = GFS2_MAGIC; + sb->s_op = &gfs2_super_ops; + sb->s_export_op = &gfs2_export_ops; + sb->s_time_gran = 1; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* Set up the buffer cache and fill in some fake block size values to allow us to read-in the on-disk superblock. */ @@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) if (error) goto fail_lm; - error = init_sb(sdp, silent, DO); + error = init_sb(sdp, silent); if (error) goto fail_locking; @@ -869,11 +1156,15 @@ fail_per_node: fail_inodes: init_inodes(sdp, UNDO); fail_sb: - init_sb(sdp, 0, UNDO); + if (sdp->sd_root_dir) + dput(sdp->sd_root_dir); + if (sdp->sd_master_dir) + dput(sdp->sd_master_dir); + sb->s_root = NULL; fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: - gfs2_gl_hash_clear(sdp, WAIT); + gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); while (invalidate_inodes(sb)) yield(); @@ -887,151 +1178,63 @@ fail: } static int gfs2_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) + const char *dev_name, void *data, struct vfsmount *mnt) { - struct super_block *sb; - struct gfs2_sbd *sdp; - int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); - if (error) - goto out; - sb = mnt->mnt_sb; - sdp = sb->s_fs_info; - sdp->sd_gfs2mnt = mnt; -out: - return error; + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); } -static int fill_super_meta(struct super_block *sb, struct super_block *new, - void *data, int silent) +static struct super_block *get_gfs2_sb(const char *dev_name) { - struct gfs2_sbd *sdp = sb->s_fs_info; - struct inode *inode; - int error = 0; - - new->s_fs_info = sdp; - sdp->sd_vfs_meta = sb; - - init_vfs(new, SDF_NOATIME); - - /* Get the master inode */ - inode = igrab(sdp->sd_master_dir); - - new->s_root = d_alloc_root(inode); - if (!new->s_root) { - fs_err(sdp, "can't get root dentry\n"); - error = -ENOMEM; - iput(inode); - } else - new->s_root->d_op = &gfs2_dops; - - return error; -} - -static int set_bdev_super(struct super_block *s, void *data) -{ - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} - -static int test_bdev_super(struct super_block *s, void *data) -{ - return s->s_bdev == data; -} - -static struct super_block* get_gfs2_sb(const char *dev_name) -{ - struct kstat stat; + struct super_block *sb; struct nameidata nd; - struct super_block *sb = NULL, *s; int error; error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); if (error) { - printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", - dev_name); - goto out; - } - error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); - - list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { - if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || - (S_ISDIR(stat.mode) && - s == nd.path.dentry->d_inode->i_sb)) { - sb = s; - goto free_nd; - } + printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", + dev_name, error); + return NULL; } - - printk(KERN_WARNING "GFS2: Unrecognized block device or " - "mount point %s\n", dev_name); - -free_nd: + sb = nd.path.dentry->d_inode->i_sb; + if (sb && (sb->s_type == &gfs2_fs_type)) + atomic_inc(&sb->s_active); + else + sb = NULL; path_put(&nd.path); -out: return sb; } static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - int error = 0; - struct super_block *sb = NULL, *new; + struct super_block *sb = NULL; struct gfs2_sbd *sdp; sb = get_gfs2_sb(dev_name); if (!sb) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); - error = -ENOENT; - goto error; + return -ENOENT; } sdp = sb->s_fs_info; - if (sdp->sd_vfs_meta) { - printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); - error = -EBUSY; - goto error; - } - down(&sb->s_bdev->bd_mount_sem); - new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); - up(&sb->s_bdev->bd_mount_sem); - if (IS_ERR(new)) { - error = PTR_ERR(new); - goto error; - } - new->s_flags = flags; - strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); - sb_set_blocksize(new, sb->s_blocksize); - error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&new->s_umount); - deactivate_super(new); - goto error; - } - - new->s_flags |= MS_ACTIVE; - - /* Grab a reference to the gfs2 mount point */ - atomic_inc(&sdp->sd_gfs2mnt->mnt_count); - return simple_set_mnt(mnt, new); -error: - return error; + mnt->mnt_sb = sb; + mnt->mnt_root = dget(sdp->sd_master_dir); + return 0; } static void gfs2_kill_sb(struct super_block *sb) { - if (sb->s_fs_info) { - gfs2_delete_debugfs_file(sb->s_fs_info); - gfs2_meta_syncfs(sb->s_fs_info); + struct gfs2_sbd *sdp = sb->s_fs_info; + if (sdp) { + gfs2_meta_syncfs(sdp); + dput(sdp->sd_root_dir); + dput(sdp->sd_master_dir); + sdp->sd_root_dir = NULL; + sdp->sd_master_dir = NULL; } + shrink_dcache_sb(sb); kill_block_super(sb); -} - -static void gfs2_kill_sb_meta(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - generic_shutdown_super(sb); - sdp->sd_vfs_meta = NULL; - atomic_dec(&sdp->sd_gfs2mnt->mnt_count); + if (sdp) + gfs2_delete_debugfs_file(sdp); } struct file_system_type gfs2_fs_type = { @@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", .fs_flags = FS_REQUIRES_DEV, .get_sb = gfs2_get_sb_meta, - .kill_sb = gfs2_kill_sb_meta, .owner = THIS_MODULE, }; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 2686ad4c002..534e1e2c65c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, return PTR_ERR(inode); } - inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); + inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode) { if (!IS_ERR(inode)) { gfs2_holder_uninit(ghs); @@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &gfs2_dops; - inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); + inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) return ERR_CAST(inode); @@ -159,11 +159,15 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + error = gfs2_glock_nq(ghs); /* parent */ if (error) - goto out; + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; - error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); + error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -245,8 +249,10 @@ out_alloc: if (alloc_required) gfs2_alloc_put(dip); out_gunlock: - gfs2_glock_dq_m(2, ghs); -out: + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_glock_dq(ghs); +out_parent: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); if (!error) { @@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) error = gfs2_unlink_ok(dip, &dentry->d_name, ip); if (error) - goto out_rgrp; + goto out_gunlock; error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); if (error) @@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) out_end_trans: gfs2_trans_end(sdp); +out_gunlock: gfs2_glock_dq(ghs + 2); out_rgrp: gfs2_holder_uninit(ghs + 2); @@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) struct gfs2_holder ri_gh; int error; - error = gfs2_rindex_hold(sdp, &ri_gh); if (error) return error; @@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - error = gfs2_glock_nq_m(3, ghs); + error = gfs2_glock_nq(ghs); /* parent */ if (error) - goto out; + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; + + error = gfs2_glock_nq(ghs + 2); /* rgrp */ + if (error) + goto out_rgrp; error = gfs2_unlink_ok(dip, &dentry->d_name, ip); if (error) @@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(3, ghs); -out: - gfs2_holder_uninit(ghs); - gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs + 2); +out_rgrp: gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs); +out_parent: + gfs2_holder_uninit(ghs); gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, return 0; } +/* + * gfs2_ok_to_move - check if it's ok to move a directory to another directory + * @this: move this + * @to: to here + * + * Follow @to back to the root and make sure we don't encounter @this + * Assumes we already hold the rename lock. + * + * Returns: errno + */ + +static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) +{ + struct inode *dir = &to->i_inode; + struct super_block *sb = dir->i_sb; + struct inode *tmp; + struct qstr dotdot; + int error = 0; + + gfs2_str2qstr(&dotdot, ".."); + + igrab(dir); + + for (;;) { + if (dir == &this->i_inode) { + error = -EINVAL; + break; + } + if (dir == sb->s_root->d_inode) { + error = 0; + break; + } + + tmp = gfs2_lookupi(dir, &dotdot, 1); + if (IS_ERR(tmp)) { + error = PTR_ERR(tmp); + break; + } + + iput(dir); + dir = tmp; + } + + iput(dir); + + return error; +} + /** * gfs2_rename - Rename a file * @odir: Parent directory of old file name @@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh; + struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; @@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, return 0; } - /* Make sure we aren't trying to move a dirctory into it's subdir */ - if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { - dir_rename = 1; - - error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, - &r_gh); + if (odip != ndip) { + error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, + 0, &r_gh); if (error) goto out; - error = gfs2_ok_to_move(ip, ndip); - if (error) - goto out_gunlock_r; + if (S_ISDIR(ip->i_inode.i_mode)) { + dir_rename = 1; + /* don't move a dirctory into it's subdir */ + error = gfs2_ok_to_move(ip, ndip); + if (error) + goto out_gunlock_r; + } } num_gh = 1; @@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } - error = gfs2_glock_nq_m(num_gh, ghs); - if (error) - goto out_uninit; + for (x = 0; x < num_gh; x++) { + error = gfs2_glock_nq(ghs + x); + if (error) + goto out_gunlock; + } /* Check out the old directory */ @@ -669,7 +738,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } } } else { - error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); + error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -704,7 +773,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Check out the dir to be renamed */ if (dir_rename) { - error = permission(odentry->d_inode, MAY_WRITE, NULL); + error = gfs2_permission(odentry->d_inode, MAY_WRITE); if (error) goto out_gunlock; } @@ -804,12 +873,12 @@ out_alloc: if (alloc_required) gfs2_alloc_put(ndip); out_gunlock: - gfs2_glock_dq_m(num_gh, ghs); -out_uninit: - for (x = 0; x < num_gh; x++) + while (x--) { + gfs2_glock_dq(ghs + x); gfs2_holder_uninit(ghs + x); + } out_gunlock_r: - if (dir_rename) + if (r_gh.gh_gl) gfs2_glock_dq_uninit(&r_gh); out: return error; @@ -891,7 +960,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) * Returns: errno */ -static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; @@ -905,7 +974,10 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) unlock = 1; } - error = generic_permission(inode, mask, gfs2_check_acl); + if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) + error = -EACCES; + else + error = generic_permission(inode, mask, gfs2_check_acl); if (unlock) gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0b7cc920eb8..d5355d9b592 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -20,6 +20,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/lm_interface.h> +#include <linux/time.h> #include "gfs2.h" #include "incore.h" @@ -38,6 +39,7 @@ #include "dir.h" #include "eattr.h" #include "bmap.h" +#include "meta_io.h" /** * gfs2_write_inode - Make sure the inode is stable on the disk @@ -50,16 +52,74 @@ static int gfs2_write_inode(struct inode *inode, int sync) { struct gfs2_inode *ip = GFS2_I(inode); - - /* Check this is a "normal" inode */ - if (test_bit(GIF_USER, &ip->i_flags)) { - if (current->flags & PF_MEMALLOC) - return 0; - if (sync) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_holder gh; + struct buffer_head *bh; + struct timespec atime; + struct gfs2_dinode *di; + int ret = 0; + + /* Check this is a "normal" inode, etc */ + if (!test_bit(GIF_USER, &ip->i_flags) || + (current->flags & PF_MEMALLOC)) + return 0; + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto do_flush; + ret = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (ret) + goto do_unlock; + ret = gfs2_meta_inode_buffer(ip, &bh); + if (ret == 0) { + di = (struct gfs2_dinode *)bh->b_data; + atime.tv_sec = be64_to_cpu(di->di_atime); + atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); + if (timespec_compare(&inode->i_atime, &atime) > 0) { + gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_dinode_out(ip, bh->b_data); + } + brelse(bh); } + gfs2_trans_end(sdp); +do_unlock: + gfs2_glock_dq_uninit(&gh); +do_flush: + if (sync != 0) + gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + return ret; +} - return 0; +/** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_quota_cleanup(sdp); + + return error; } /** @@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; int error; - if (!sdp) - return; - - if (!strncmp(sb->s_type->name, "gfs2meta", 8)) - return; /* Nothing to do */ - /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) /* Release stuff */ - iput(sdp->sd_master_dir); iput(sdp->sd_jindex); iput(sdp->sd_inum_inode); iput(sdp->sd_statfs_inode); @@ -126,7 +179,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_clear_rgrpd(sdp); gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ - gfs2_gl_hash_clear(sdp, WAIT); + gfs2_gl_hash_clear(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); @@ -152,10 +205,11 @@ static void gfs2_write_super(struct super_block *sb) * * Flushes the log to disk. */ + static int gfs2_sync_fs(struct super_block *sb, int wait) { sb->s_dirt = 0; - if (wait) + if (wait && sb->s_fs_info) gfs2_log_flush(sb->s_fs_info, NULL); return 0; } @@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) } } - if (*flags & (MS_NOATIME | MS_NODIRATIME)) - set_bit(SDF_NOATIME, &sdp->sd_flags); - else - clear_bit(SDF_NOATIME, &sdp->sd_flags); - - /* Don't let the VFS update atimes. GFS2 handles this itself. */ - *flags |= MS_NOATIME | MS_NODIRATIME; - return error; } @@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) * inode's blocks, or alternatively pass the baton on to another * node for later deallocation. */ + static void gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); @@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) } } +static int is_ancestor(const struct dentry *d1, const struct dentry *d2) +{ + do { + if (d1 == d2) + return 1; + d1 = d1->d_parent; + } while (!IS_ROOT(d1)); + return 0; +} + /** * gfs2_show_options - Show mount options for /proc/mounts * @s: seq_file structure @@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; + if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) + seq_printf(s, ",meta"); if (args->ar_lockproto[0]) seq_printf(s, ",lockproto=%s", args->ar_lockproto); if (args->ar_locktable[0]) @@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) * conversion on the iopen lock, but we can change that later. This * is safe, just less efficient. */ + static void gfs2_delete_inode(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; @@ -478,8 +538,6 @@ out: clear_inode(inode); } - - static struct inode *gfs2_alloc_inode(struct super_block *sb) { struct gfs2_inode *ip; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 56aaf915c59..3e073f5144f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -904,7 +904,7 @@ static int need_sync(struct gfs2_quota_data *qd) do_sync = 0; else { value *= gfs2_jindex_size(sdp) * num; - do_div(value, den); + value = div_s64(value, den); value += (s64)be64_to_cpu(qd->qd_qb.qb_value); if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) do_sync = 0; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2888e4b4b1c..d5e91f4f6a0 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -428,6 +428,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, unsigned int message) { + if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done) + return; + if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) sdp->sd_lockstruct.ls_ops->lm_recovery_done( sdp->sd_lockstruct.ls_lockspace, jid, message); @@ -505,7 +508,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_NOCANCEL | GL_NOCACHE, &t_gh); + GL_NOCACHE, &t_gh); if (error) goto fail_gunlock_ji; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3401628d742..2d90fb25350 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -371,11 +371,6 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_rindex_spin); sdp->sd_rindex_forward = NULL; - head = &sdp->sd_rindex_recent_list; - while (!list_empty(head)) { - rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); - list_del(&rgd->rd_recent); - } spin_unlock(&sdp->sd_rindex_spin); head = &sdp->sd_rindex_list; @@ -945,107 +940,30 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) } /** - * recent_rgrp_first - get first RG from "recent" list - * @sdp: The GFS2 superblock - * @rglast: address of the rgrp used last - * - * Returns: The first rgrp in the recent list - */ - -static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, - u64 rglast) -{ - struct gfs2_rgrpd *rgd; - - spin_lock(&sdp->sd_rindex_spin); - - if (rglast) { - list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { - if (rgrp_contains_block(rgd, rglast)) - goto out; - } - } - rgd = NULL; - if (!list_empty(&sdp->sd_rindex_recent_list)) - rgd = list_entry(sdp->sd_rindex_recent_list.next, - struct gfs2_rgrpd, rd_recent); -out: - spin_unlock(&sdp->sd_rindex_spin); - return rgd; -} - -/** * recent_rgrp_next - get next RG from "recent" list * @cur_rgd: current rgrp - * @remove: * * Returns: The next rgrp in the recent list */ -static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, - int remove) +static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) { struct gfs2_sbd *sdp = cur_rgd->rd_sbd; struct list_head *head; struct gfs2_rgrpd *rgd; spin_lock(&sdp->sd_rindex_spin); - - head = &sdp->sd_rindex_recent_list; - - list_for_each_entry(rgd, head, rd_recent) { - if (rgd == cur_rgd) { - if (cur_rgd->rd_recent.next != head) - rgd = list_entry(cur_rgd->rd_recent.next, - struct gfs2_rgrpd, rd_recent); - else - rgd = NULL; - - if (remove) - list_del(&cur_rgd->rd_recent); - - goto out; - } + head = &sdp->sd_rindex_mru_list; + if (unlikely(cur_rgd->rd_list_mru.next == head)) { + spin_unlock(&sdp->sd_rindex_spin); + return NULL; } - - rgd = NULL; - if (!list_empty(head)) - rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); - -out: + rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); spin_unlock(&sdp->sd_rindex_spin); return rgd; } /** - * recent_rgrp_add - add an RG to tail of "recent" list - * @new_rgd: The rgrp to add - * - */ - -static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd) -{ - struct gfs2_sbd *sdp = new_rgd->rd_sbd; - struct gfs2_rgrpd *rgd; - unsigned int count = 0; - unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp); - - spin_lock(&sdp->sd_rindex_spin); - - list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { - if (rgd == new_rgd) - goto out; - - if (++count >= max) - goto out; - } - list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list); - -out: - spin_unlock(&sdp->sd_rindex_spin); -} - -/** * forward_rgrp_get - get an rgrp to try next from full list * @sdp: The GFS2 superblock * @@ -1112,9 +1030,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) int loops = 0; int error, rg_locked; - /* Try recently successful rgrps */ - - rgd = recent_rgrp_first(sdp, ip->i_goal); + rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); while (rgd) { rg_locked = 0; @@ -1136,11 +1052,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) gfs2_glock_dq_uninit(&al->al_rgd_gh); if (inode) return inode; - rgd = recent_rgrp_next(rgd, 1); - break; - + /* fall through */ case GLR_TRYFAILED: - rgd = recent_rgrp_next(rgd, 0); + rgd = recent_rgrp_next(rgd); break; default: @@ -1199,7 +1113,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) out: if (begin) { - recent_rgrp_add(rgd); + spin_lock(&sdp->sd_rindex_spin); + list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); + spin_unlock(&sdp->sd_rindex_spin); rgd = gfs2_rgrpd_get_next(rgd); if (!rgd) rgd = gfs2_rgrpd_get_first(sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7aeacbc65f3..c3ba3d9d0aa 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -33,314 +33,6 @@ #include "trans.h" #include "util.h" -static const u32 gfs2_old_fs_formats[] = { - 0 -}; - -static const u32 gfs2_old_multihost_formats[] = { - 0 -}; - -/** - * gfs2_tune_init - Fill a gfs2_tune structure with default values - * @gt: tune - * - */ - -void gfs2_tune_init(struct gfs2_tune *gt) -{ - spin_lock_init(>->gt_spin); - - gt->gt_demote_secs = 300; - gt->gt_incore_log_blocks = 1024; - gt->gt_log_flush_secs = 60; - gt->gt_recoverd_secs = 60; - gt->gt_logd_secs = 1; - gt->gt_quotad_secs = 5; - gt->gt_quota_simul_sync = 64; - gt->gt_quota_warn_period = 10; - gt->gt_quota_scale_num = 1; - gt->gt_quota_scale_den = 1; - gt->gt_quota_cache_secs = 300; - gt->gt_quota_quantum = 60; - gt->gt_atime_quantum = 3600; - gt->gt_new_files_jdata = 0; - gt->gt_new_files_directio = 0; - gt->gt_max_readahead = 1 << 18; - gt->gt_stall_secs = 600; - gt->gt_complain_secs = 10; - gt->gt_statfs_quantum = 30; - gt->gt_statfs_slow = 0; -} - -/** - * gfs2_check_sb - Check superblock - * @sdp: the filesystem - * @sb: The superblock - * @silent: Don't print a message if the check fails - * - * Checks the version code of the FS is one that we understand how to - * read and that the sizes of the various on-disk structures have not - * changed. - */ - -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) -{ - unsigned int x; - - if (sb->sb_magic != GFS2_MAGIC || - sb->sb_type != GFS2_METATYPE_SB) { - if (!silent) - printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); - return -EINVAL; - } - - /* If format numbers match exactly, we're done. */ - - if (sb->sb_fs_format == GFS2_FORMAT_FS && - sb->sb_multihost_format == GFS2_FORMAT_MULTI) - return 0; - - if (sb->sb_fs_format != GFS2_FORMAT_FS) { - for (x = 0; gfs2_old_fs_formats[x]; x++) - if (gfs2_old_fs_formats[x] == sb->sb_fs_format) - break; - - if (!gfs2_old_fs_formats[x]) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_WARNING - "GFS2: I don't know how to upgrade this FS\n"); - return -EINVAL; - } - } - - if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { - for (x = 0; gfs2_old_multihost_formats[x]; x++) - if (gfs2_old_multihost_formats[x] == - sb->sb_multihost_format) - break; - - if (!gfs2_old_multihost_formats[x]) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_WARNING - "GFS2: I don't know how to upgrade this FS\n"); - return -EINVAL; - } - } - - if (!sdp->sd_args.ar_upgrade) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_INFO - "GFS2: Use the \"upgrade\" mount option to upgrade " - "the FS\n"); - printk(KERN_INFO "GFS2: See the manual for more details\n"); - return -EINVAL; - } - - return 0; -} - - -static void end_bio_io_page(struct bio *bio, int error) -{ - struct page *page = bio->bi_private; - - if (!error) - SetPageUptodate(page); - else - printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); - unlock_page(page); -} - -static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) -{ - const struct gfs2_sb *str = buf; - - sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); - sb->sb_type = be32_to_cpu(str->sb_header.mh_type); - sb->sb_format = be32_to_cpu(str->sb_header.mh_format); - sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); - sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); - sb->sb_bsize = be32_to_cpu(str->sb_bsize); - sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); - sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); - sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); - sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); - sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); - - memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); - memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); -} - -/** - * gfs2_read_super - Read the gfs2 super block from disk - * @sdp: The GFS2 super block - * @sector: The location of the super block - * @error: The error code to return - * - * This uses the bio functions to read the super block from disk - * because we want to be 100% sure that we never read cached data. - * A super block is read twice only during each GFS2 mount and is - * never written to by the filesystem. The first time its read no - * locks are held, and the only details which are looked at are those - * relating to the locking protocol. Once locking is up and working, - * the sb is read again under the lock to establish the location of - * the master directory (contains pointers to journals etc) and the - * root directory. - * - * Returns: 0 on success or error - */ - -int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) -{ - struct super_block *sb = sdp->sd_vfs; - struct gfs2_sb *p; - struct page *page; - struct bio *bio; - - page = alloc_page(GFP_NOFS); - if (unlikely(!page)) - return -ENOBUFS; - - ClearPageUptodate(page); - ClearPageDirty(page); - lock_page(page); - - bio = bio_alloc(GFP_NOFS, 1); - if (unlikely(!bio)) { - __free_page(page); - return -ENOBUFS; - } - - bio->bi_sector = sector * (sb->s_blocksize >> 9); - bio->bi_bdev = sb->s_bdev; - bio_add_page(bio, page, PAGE_SIZE, 0); - - bio->bi_end_io = end_bio_io_page; - bio->bi_private = page; - submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); - wait_on_page_locked(page); - bio_put(bio); - if (!PageUptodate(page)) { - __free_page(page); - return -EIO; - } - p = kmap(page); - gfs2_sb_in(&sdp->sd_sb, p); - kunmap(page); - __free_page(page); - return 0; -} - -/** - * gfs2_read_sb - Read super block - * @sdp: The GFS2 superblock - * @gl: the glock for the superblock (assumed to be held) - * @silent: Don't print message if mount fails - * - */ - -int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) -{ - u32 hash_blocks, ind_blocks, leaf_blocks; - u32 tmp_blocks; - unsigned int x; - int error; - - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); - if (error) { - if (!silent) - fs_err(sdp, "can't read superblock\n"); - return error; - } - - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - return error; - - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; - sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; - sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode)) / sizeof(u64); - sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_meta_header)) / sizeof(u64); - sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); - sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; - sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; - sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); - sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_meta_header)) / - sizeof(struct gfs2_quota_change); - - /* Compute maximum reservation required to add a entry to a directory */ - - hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), - sdp->sd_jbsize); - - ind_blocks = 0; - for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { - tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); - ind_blocks += tmp_blocks; - } - - leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; - - sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; - - sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode); - sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; - for (x = 2;; x++) { - u64 space, d; - u32 m; - - space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; - d = space; - m = do_div(d, sdp->sd_inptrs); - - if (d != sdp->sd_heightsize[x - 1] || m) - break; - sdp->sd_heightsize[x] = space; - } - sdp->sd_max_height = x; - sdp->sd_heightsize[x] = ~0; - gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); - - sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode); - sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; - for (x = 2;; x++) { - u64 space, d; - u32 m; - - space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; - d = space; - m = do_div(d, sdp->sd_inptrs); - - if (d != sdp->sd_jheightsize[x - 1] || m) - break; - sdp->sd_jheightsize[x] = space; - } - sdp->sd_max_jheight = x; - sdp->sd_jheightsize[x] = ~0; - gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); - - return 0; -} - /** * gfs2_jindex_hold - Grab a lock on the jindex * @sdp: The GFS2 superblock @@ -390,7 +82,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); - jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); + jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) error = -ENOENT; @@ -582,39 +274,6 @@ fail: return error; } -/** - * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one - * @sdp: the filesystem - * - * Returns: errno - */ - -int gfs2_make_fs_ro(struct gfs2_sbd *sdp) -{ - struct gfs2_holder t_gh; - int error; - - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return error; - - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); - - gfs2_quota_cleanup(sdp); - - return error; -} - static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; @@ -941,8 +600,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, - LM_FLAG_PRIORITY | GL_NOCACHE, - t_gh); + GL_NOCACHE, t_gh); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f..50a4c9b1215 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -12,11 +12,6 @@ #include "incore.h" -void gfs2_tune_init(struct gfs2_tune *gt); - -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); -int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); -int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); void gfs2_lm_unmount(struct gfs2_sbd *sdp); static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) @@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, struct gfs2_inode **ipp); int gfs2_make_fs_rw(struct gfs2_sbd *sdp); -int gfs2_make_fs_ro(struct gfs2_sbd *sdp); int gfs2_statfs_init(struct gfs2_sbd *sdp); void gfs2_statfs_change(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9ab9fc85ecd..7e1879f1a02 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -110,18 +110,6 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, return len; } -static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len) -{ - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (simple_strtol(buf, NULL, 0) != 1) - return -EINVAL; - - gfs2_gl_hash_clear(sdp, NO_WAIT); - return len; -} - static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { @@ -175,7 +163,6 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) GFS2_ATTR(id, 0444, id_show, NULL); GFS2_ATTR(fsname, 0444, fsname_show, NULL); GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); -GFS2_ATTR(shrink, 0200, NULL, shrink_store); GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); @@ -186,7 +173,6 @@ static struct attribute *gfs2_attrs[] = { &gfs2_attr_id.attr, &gfs2_attr_fsname.attr, &gfs2_attr_freeze.attr, - &gfs2_attr_shrink.attr, &gfs2_attr_withdraw.attr, &gfs2_attr_statfs_sync.attr, &gfs2_attr_quota_sync.attr, @@ -283,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); ARGS_ATTR(suiddir, "%d\n"); ARGS_ATTR(data, "%d\n"); -/* one oddball doesn't fit the macro mold */ -static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", - !!test_bit(SDF_NOATIME, &sdp->sd_flags)); -} -static struct args_attr args_attr_noatime = __ATTR_RO(noatime); - static struct attribute *args_attrs[] = { &args_attr_lockproto.attr, &args_attr_locktable.attr, @@ -306,7 +284,6 @@ static struct attribute *args_attrs[] = { &args_attr_quota.attr, &args_attr_suiddir.attr, &args_attr_data.attr, - &args_attr_noatime.attr, NULL, }; @@ -421,12 +398,10 @@ TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); TUNE_ATTR(quota_warn_period, 0); TUNE_ATTR(quota_quantum, 0); -TUNE_ATTR(atime_quantum, 0); TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); -TUNE_ATTR(new_files_directio, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(quota_cache_secs, 1); TUNE_ATTR(stall_secs, 1); @@ -442,7 +417,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_log_flush_secs.attr, &tune_attr_quota_warn_period.attr, &tune_attr_quota_quantum.attr, - &tune_attr_atime_quantum.attr, &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, @@ -455,7 +429,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quotad_secs.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, - &tune_attr_new_files_directio.attr, NULL, }; |