diff options
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/Makefile | 1 | ||||
-rw-r--r-- | fs/dlm/config.c | 50 | ||||
-rw-r--r-- | fs/dlm/config.h | 3 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 9 | ||||
-rw-r--r-- | fs/dlm/lock.c | 5 | ||||
-rw-r--r-- | fs/dlm/lock.h | 1 | ||||
-rw-r--r-- | fs/dlm/main.c | 7 | ||||
-rw-r--r-- | fs/dlm/member.c | 34 | ||||
-rw-r--r-- | fs/dlm/plock.c | 439 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 1 |
10 files changed, 526 insertions, 24 deletions
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index d248e60951b..ca1c9124c8c 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -10,6 +10,7 @@ dlm-y := ast.o \ midcomms.o \ netlink.o \ lowcomms.o \ + plock.o \ rcom.o \ recover.o \ recoverd.o \ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c3ad1dff3b2..eac23bd288b 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -114,7 +114,7 @@ struct cluster_attribute { }; static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, - unsigned int *info_field, int check_zero, + int *info_field, int check_zero, const char *buf, size_t len) { unsigned int x; @@ -284,6 +284,7 @@ struct node { struct list_head list; /* space->members */ int nodeid; int weight; + int new; }; static struct configfs_group_operations clusters_ops = { @@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name) config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; nd->weight = 1; /* default weight of 1 if none is set */ + nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */ mutex_lock(&sp->members_lock); list_add(&nd->list, &sp->members); @@ -805,12 +807,13 @@ static void put_comm(struct comm *cm) } /* caller must free mem */ -int dlm_nodeid_list(char *lsname, int **ids_out) +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out) { struct space *sp; struct node *nd; - int i = 0, rv = 0; - int *ids; + int i = 0, rv = 0, ids_count = 0, new_count = 0; + int *ids, *new; sp = get_space(lsname); if (!sp) @@ -818,23 +821,50 @@ int dlm_nodeid_list(char *lsname, int **ids_out) mutex_lock(&sp->members_lock); if (!sp->members_count) { - rv = 0; + rv = -EINVAL; + printk(KERN_ERR "dlm: zero members_count\n"); goto out; } - ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); + ids_count = sp->members_count; + + ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); if (!ids) { rv = -ENOMEM; goto out; } - rv = sp->members_count; - list_for_each_entry(nd, &sp->members, list) + list_for_each_entry(nd, &sp->members, list) { ids[i++] = nd->nodeid; + if (nd->new) + new_count++; + } + + if (ids_count != i) + printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); + + if (!new_count) + goto out_ids; + + new = kcalloc(new_count, sizeof(int), GFP_KERNEL); + if (!new) { + kfree(ids); + rv = -ENOMEM; + goto out; + } - if (rv != i) - printk("bad nodeid count %d %d\n", rv, i); + i = 0; + list_for_each_entry(nd, &sp->members, list) { + if (nd->new) { + new[i++] = nd->nodeid; + nd->new = 0; + } + } + *new_count_out = new_count; + *new_out = new; + out_ids: + *ids_count_out = ids_count; *ids_out = ids; out: mutex_unlock(&sp->members_lock); diff --git a/fs/dlm/config.h b/fs/dlm/config.h index a3170fe2209..4f1d6fce58c 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config; int dlm_config_init(void); void dlm_config_exit(void); int dlm_node_weight(char *lsname, int nodeid); -int dlm_nodeid_list(char *lsname, int **ids_out); +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out); int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d30ea8b433a..5a7ac33b629 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -37,14 +37,11 @@ #include <linux/jhash.h> #include <linux/miscdevice.h> #include <linux/mutex.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> #include <linux/dlm.h> #include "config.h" -#define DLM_LOCKSPACE_LEN 64 - /* Size of the temp buffer midcomms allocates on the stack. We try to make this large enough so most messages fit. FIXME: should sctp make this unnecessary? */ @@ -133,8 +130,10 @@ struct dlm_member { struct dlm_recover { struct list_head list; - int *nodeids; + int *nodeids; /* nodeids of all members */ int node_count; + int *new; /* nodeids of new members */ + int new_count; uint64_t seq; }; @@ -580,6 +579,8 @@ static inline int dlm_no_directory(struct dlm_ls *ls) int dlm_netlink_init(void); void dlm_netlink_exit(void); void dlm_timeout_warn(struct dlm_lkb *lkb); +int dlm_plock_init(void); +void dlm_plock_exit(void); #ifdef CONFIG_DLM_DEBUG int dlm_register_debugfs(void); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 8f250ac8b92..2d3d1027ce2 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb) lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); } -void dlm_print_rsb(struct dlm_rsb *r) +static void dlm_print_rsb(struct dlm_rsb *r) { printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", r->res_nodeid, r->res_flags, r->res_first_lkid, @@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rsb *r, int error) list_del_init(&lkb->lkb_rsb_lookup); r->res_first_lkid = lkb->lkb_id; _request_lock(r, lkb); - } else - r->res_nodeid = -1; + } break; default: diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 05d9c82e646..88e93c80cc2 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -13,7 +13,6 @@ #ifndef __LOCK_DOT_H__ #define __LOCK_DOT_H__ -void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 58487fb95a4..b80e0aa3cfa 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -46,10 +46,16 @@ static int __init init_dlm(void) if (error) goto out_user; + error = dlm_plock_init(); + if (error) + goto out_netlink; + printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; + out_netlink: + dlm_netlink_exit(); out_user: dlm_user_exit(); out_debug: @@ -66,6 +72,7 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { + dlm_plock_exit(); dlm_netlink_exit(); dlm_user_exit(); dlm_config_exit(); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index fa17f5a2788..26133f05ae3 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) } } + /* Add an entry to ls_nodes_gone for members that were removed and + then added again, so that previous state for these nodes will be + cleared during recovery. */ + + for (i = 0; i < rv->new_count; i++) { + if (!dlm_is_member(ls, rv->new[i])) + continue; + log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); + + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + if (!memb) + return -ENOMEM; + memb->nodeid = rv->new[i]; + list_add_tail(&memb->list, &ls->ls_nodes_gone); + neg++; + } + /* add new members to ls_nodes */ for (i = 0; i < rv->node_count; i++) { @@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL; - int error, count; + int *ids = NULL, *new = NULL; + int error, ids_count = 0, new_count = 0; rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - error = count = dlm_nodeid_list(ls->ls_name, &ids); - if (error <= 0) + error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, + &new, &new_count); + if (error < 0) goto fail; spin_lock(&ls->ls_recover_lock); @@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) } rv->nodeids = ids; - rv->node_count = count; + rv->node_count = ids_count; + rv->new = new; + rv->new_count = new_count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; spin_unlock(&ls->ls_recover_lock); if (rv_old) { + log_error(ls, "unused recovery %llx %d", + (unsigned long long)rv_old->seq, rv_old->node_count); kfree(rv_old->nodeids); + kfree(rv_old->new); kfree(rv_old); } @@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); kfree(ids); + kfree(new); return error; } diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c new file mode 100644 index 00000000000..d6d6e370f89 --- /dev/null +++ b/fs/dlm/plock.c @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/poll.h> +#include <linux/dlm.h> +#include <linux/dlm_plock.h> + +#include "dlm_internal.h" +#include "lockspace.h" + +static spinlock_t ops_lock; +static struct list_head send_list; +static struct list_head recv_list; +static wait_queue_head_t send_wq; +static wait_queue_head_t recv_wq; + +struct plock_op { + struct list_head list; + int done; + struct dlm_plock_info info; +}; + +struct plock_xop { + struct plock_op xop; + void *callback; + void *fl; + void *file; + struct file_lock flc; +}; + + +static inline void set_version(struct dlm_plock_info *info) +{ + info->version[0] = DLM_PLOCK_VERSION_MAJOR; + info->version[1] = DLM_PLOCK_VERSION_MINOR; + info->version[2] = DLM_PLOCK_VERSION_PATCH; +} + +static int check_version(struct dlm_plock_info *info) +{ + if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || + (DLM_PLOCK_VERSION_MINOR < info->version[1])) { + log_print("plock device version mismatch: " + "kernel (%u.%u.%u), user (%u.%u.%u)", + DLM_PLOCK_VERSION_MAJOR, + DLM_PLOCK_VERSION_MINOR, + DLM_PLOCK_VERSION_PATCH, + info->version[0], + info->version[1], + info->version[2]); + return -EINVAL; + } + return 0; +} + +static void send_op(struct plock_op *op) +{ + set_version(&op->info); + INIT_LIST_HEAD(&op->list); + spin_lock(&ops_lock); + list_add_tail(&op->list, &send_list); + spin_unlock(&ops_lock); + wake_up(&send_wq); +} + +int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + int cmd, struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + struct plock_xop *xop; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + xop = kzalloc(sizeof(*xop), GFP_KERNEL); + if (!xop) { + rv = -ENOMEM; + goto out; + } + + op = &xop->xop; + op->info.optype = DLM_PLOCK_OP_LOCK; + op->info.pid = fl->fl_pid; + op->info.ex = (fl->fl_type == F_WRLCK); + op->info.wait = IS_SETLKW(cmd); + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + /* fl_owner is lockd which doesn't distinguish + processes on the nfs client */ + op->info.owner = (__u64) fl->fl_pid; + xop->callback = fl->fl_lmops->fl_grant; + locks_init_lock(&xop->flc); + locks_copy_lock(&xop->flc, fl); + xop->fl = fl; + xop->file = file; + } else { + op->info.owner = (__u64)(long) fl->fl_owner; + xop->callback = NULL; + } + + send_op(op); + + if (xop->callback == NULL) + wait_event(recv_wq, (op->done != 0)); + else { + rv = -EINPROGRESS; + goto out; + } + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_lock: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + rv = op->info.rv; + + if (!rv) { + if (posix_lock_file_wait(file, fl) < 0) + log_error(ls, "dlm_posix_lock: vfs lock error %llx", + (unsigned long long)number); + } + + kfree(xop); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_lock); + +/* Returns failure iff a succesful lock operation should be canceled */ +static int dlm_plock_callback(struct plock_op *op) +{ + struct file *file; + struct file_lock *fl; + struct file_lock *flc; + int (*notify)(void *, void *, int) = NULL; + struct plock_xop *xop = (struct plock_xop *)op; + int rv = 0; + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_print("dlm_plock_callback: op on list %llx", + (unsigned long long)op->info.number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + /* check if the following 2 are still valid or make a copy */ + file = xop->file; + flc = &xop->flc; + fl = xop->fl; + notify = xop->callback; + + if (op->info.rv) { + notify(flc, NULL, op->info.rv); + goto out; + } + + /* got fs lock; bookkeep locally as well: */ + flc->fl_flags &= ~FL_SLEEP; + if (posix_lock_file(file, flc, NULL)) { + /* + * This can only happen in the case of kmalloc() failure. + * The filesystem's own lock is the authoritative lock, + * so a failure to get the lock locally is not a disaster. + * As long as the fs cannot reliably cancel locks (especially + * in a low-memory situation), we're better off ignoring + * this failure than trying to recover. + */ + log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", + (unsigned long long)op->info.number, file, fl); + } + + rv = notify(flc, NULL, 0); + if (rv) { + /* XXX: We need to cancel the fs lock here: */ + log_print("dlm_plock_callback: lock granted after lock request " + "failed; dangling lock!\n"); + goto out; + } + +out: + kfree(xop); + return rv; +} + +int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + rv = -ENOMEM; + goto out; + } + + if (posix_lock_file_wait(file, fl) < 0) + log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", + (unsigned long long)number); + + op->info.optype = DLM_PLOCK_OP_UNLOCK; + op->info.pid = fl->fl_pid; + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; + + send_op(op); + wait_event(recv_wq, (op->done != 0)); + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_unlock: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + rv = op->info.rv; + + if (rv == -ENOENT) + rv = 0; + + kfree(op); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_unlock); + +int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + rv = -ENOMEM; + goto out; + } + + op->info.optype = DLM_PLOCK_OP_GET; + op->info.pid = fl->fl_pid; + op->info.ex = (fl->fl_type == F_WRLCK); + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; + + send_op(op); + wait_event(recv_wq, (op->done != 0)); + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_get: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + /* info.rv from userspace is 1 for conflict, 0 for no-conflict, + -ENOENT if there are no locks on the file */ + + rv = op->info.rv; + + fl->fl_type = F_UNLCK; + if (rv == -ENOENT) + rv = 0; + else if (rv > 0) { + fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; + fl->fl_pid = op->info.pid; + fl->fl_start = op->info.start; + fl->fl_end = op->info.end; + rv = 0; + } + + kfree(op); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_get); + +/* a read copies out one plock request from the send list */ +static ssize_t dev_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) +{ + struct dlm_plock_info info; + struct plock_op *op = NULL; + + if (count < sizeof(info)) + return -EINVAL; + + spin_lock(&ops_lock); + if (!list_empty(&send_list)) { + op = list_entry(send_list.next, struct plock_op, list); + list_move(&op->list, &recv_list); + memcpy(&info, &op->info, sizeof(info)); + } + spin_unlock(&ops_lock); + + if (!op) + return -EAGAIN; + + if (copy_to_user(u, &info, sizeof(info))) + return -EFAULT; + return sizeof(info); +} + +/* a write copies in one plock result that should match a plock_op + on the recv list */ +static ssize_t dev_write(struct file *file, const char __user *u, size_t count, + loff_t *ppos) +{ + struct dlm_plock_info info; + struct plock_op *op; + int found = 0; + + if (count != sizeof(info)) + return -EINVAL; + + if (copy_from_user(&info, u, sizeof(info))) + return -EFAULT; + + if (check_version(&info)) + return -EINVAL; + + spin_lock(&ops_lock); + list_for_each_entry(op, &recv_list, list) { + if (op->info.fsid == info.fsid && op->info.number == info.number && + op->info.owner == info.owner) { + list_del_init(&op->list); + found = 1; + op->done = 1; + memcpy(&op->info, &info, sizeof(info)); + break; + } + } + spin_unlock(&ops_lock); + + if (found) { + struct plock_xop *xop; + xop = (struct plock_xop *)op; + if (xop->callback) + count = dlm_plock_callback(op); + else + wake_up(&recv_wq); + } else + log_print("dev_write no op %x %llx", info.fsid, + (unsigned long long)info.number); + return count; +} + +static unsigned int dev_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &send_wq, wait); + + spin_lock(&ops_lock); + if (!list_empty(&send_list)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&ops_lock); + + return mask; +} + +static const struct file_operations dev_fops = { + .read = dev_read, + .write = dev_write, + .poll = dev_poll, + .owner = THIS_MODULE +}; + +static struct miscdevice plock_dev_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = DLM_PLOCK_MISC_NAME, + .fops = &dev_fops +}; + +int dlm_plock_init(void) +{ + int rv; + + spin_lock_init(&ops_lock); + INIT_LIST_HEAD(&send_list); + INIT_LIST_HEAD(&recv_list); + init_waitqueue_head(&send_wq); + init_waitqueue_head(&recv_wq); + + rv = misc_register(&plock_dev_misc); + if (rv) + log_print("dlm_plock_init: misc_register failed %d", rv); + return rv; +} + +void dlm_plock_exit(void) +{ + if (misc_deregister(&plock_dev_misc) < 0) + log_print("dlm_plock_exit: misc_deregister failed"); +} + diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 997f9531d59..fd677c8c3d3 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls) if (rv) { ls_recover(ls, rv); kfree(rv->nodeids); + kfree(rv->new); kfree(rv); } } |