aboutsummaryrefslogtreecommitdiff
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c384
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c9
-rw-r--r--fs/dlm/util.c4
14 files changed, 493 insertions, 480 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fe..6fa7b0d5c04 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
config DLM
tristate "Distributed Lock Manager (DLM)"
- depends on IPV6 || IPV6=n
+ depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP if DLM_SCTP
help
- A general purpose distributed lock manager for kernel or userspace
- applications.
+ A general purpose distributed lock manager for kernel or userspace
+ applications.
choice
prompt "Select DLM communications protocol"
depends on DLM
default DLM_TCP
help
- The DLM Can use TCP or SCTP for it's network communications.
- SCTP supports multi-homed operations whereas TCP doesn't.
- However, SCTP seems to have stability problems at the moment.
+ The DLM Can use TCP or SCTP for it's network communications.
+ SCTP supports multi-homed operations whereas TCP doesn't.
+ However, SCTP seems to have stability problems at the moment.
config DLM_TCP
bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
- Under the debugfs mount point, the name of each lockspace will
- appear as a file in the "dlm" directory. The output is the
- list of resource and locks the local node knows about.
+ Under the debugfs mount point, the name of each lockspace will
+ appear as a file in the "dlm" directory. The output is the
+ list of resource and locks the local node knows about.
endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbf..8665c88e5af 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
static void drop_node(struct config_group *, struct config_item *);
static void release_node(struct config_item *);
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf);
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len);
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
static ssize_t node_weight_read(struct node *nd, char *buf);
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
+struct cluster {
+ struct config_group group;
+ unsigned int cl_tcp_port;
+ unsigned int cl_buffer_size;
+ unsigned int cl_rsbtbl_size;
+ unsigned int cl_lkbtbl_size;
+ unsigned int cl_dirtbl_size;
+ unsigned int cl_recover_timer;
+ unsigned int cl_toss_secs;
+ unsigned int cl_scan_secs;
+ unsigned int cl_log_debug;
+};
+
+enum {
+ CLUSTER_ATTR_TCP_PORT = 0,
+ CLUSTER_ATTR_BUFFER_SIZE,
+ CLUSTER_ATTR_RSBTBL_SIZE,
+ CLUSTER_ATTR_LKBTBL_SIZE,
+ CLUSTER_ATTR_DIRTBL_SIZE,
+ CLUSTER_ATTR_RECOVER_TIMER,
+ CLUSTER_ATTR_TOSS_SECS,
+ CLUSTER_ATTR_SCAN_SECS,
+ CLUSTER_ATTR_LOG_DEBUG,
+};
+
+struct cluster_attribute {
+ struct configfs_attribute attr;
+ ssize_t (*show)(struct cluster *, char *);
+ ssize_t (*store)(struct cluster *, const char *, size_t);
+};
+
+static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
+ unsigned int *info_field, int check_zero,
+ const char *buf, size_t len)
+{
+ unsigned int x;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ x = simple_strtoul(buf, NULL, 0);
+
+ if (check_zero && !x)
+ return -EINVAL;
+
+ *cl_field = x;
+ *info_field = x;
+
+ return len;
+}
+
+#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
+ .attr = { .ca_name = __stringify(_name), \
+ .ca_mode = _mode, \
+ .ca_owner = THIS_MODULE }, \
+ .show = _read, \
+ .store = _write, \
+}
+
+#define CLUSTER_ATTR(name, check_zero) \
+static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
+{ \
+ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
+ check_zero, buf, len); \
+} \
+static ssize_t name##_read(struct cluster *cl, char *buf) \
+{ \
+ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
+} \
+static struct cluster_attribute cluster_attr_##name = \
+__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
+
+CLUSTER_ATTR(tcp_port, 1);
+CLUSTER_ATTR(buffer_size, 1);
+CLUSTER_ATTR(rsbtbl_size, 1);
+CLUSTER_ATTR(lkbtbl_size, 1);
+CLUSTER_ATTR(dirtbl_size, 1);
+CLUSTER_ATTR(recover_timer, 1);
+CLUSTER_ATTR(toss_secs, 1);
+CLUSTER_ATTR(scan_secs, 1);
+CLUSTER_ATTR(log_debug, 0);
+
+static struct configfs_attribute *cluster_attrs[] = {
+ [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
+ [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
+ [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
+ [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
+ [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
+ [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
+ [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
+ [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
+ [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
+ NULL,
+};
+
enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
struct configfs_subsystem subsys;
};
-struct cluster {
- struct config_group group;
-};
-
struct spaces {
struct config_group ss_group;
};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
static struct configfs_item_operations cluster_ops = {
.release = release_cluster,
+ .show_attribute = show_cluster,
+ .store_attribute = store_cluster,
};
static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
static struct config_item_type cluster_type = {
.ct_item_ops = &cluster_ops,
+ .ct_attrs = cluster_attrs,
.ct_owner = THIS_MODULE,
};
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
cl->group.default_groups[1] = &cms->cs_group;
cl->group.default_groups[2] = NULL;
+ cl->cl_tcp_port = dlm_config.ci_tcp_port;
+ cl->cl_buffer_size = dlm_config.ci_buffer_size;
+ cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
+ cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
+ cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
+ cl->cl_recover_timer = dlm_config.ci_recover_timer;
+ cl->cl_toss_secs = dlm_config.ci_toss_secs;
+ cl->cl_scan_secs = dlm_config.ci_scan_secs;
+ cl->cl_log_debug = dlm_config.ci_log_debug;
+
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
* Functions for user space to read/write attributes
*/
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->show ? cla->show(cl, buf) : 0;
+}
+
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->store ? cla->store(cl, buf, len) : -EINVAL;
+}
+
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
+#define DEFAULT_LOG_DEBUG 0
struct dlm_config_info dlm_config = {
- .tcp_port = DEFAULT_TCP_PORT,
- .buffer_size = DEFAULT_BUFFER_SIZE,
- .rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .lkbtbl_size = DEFAULT_LKBTBL_SIZE,
- .dirtbl_size = DEFAULT_DIRTBL_SIZE,
- .recover_timer = DEFAULT_RECOVER_TIMER,
- .toss_secs = DEFAULT_TOSS_SECS,
- .scan_secs = DEFAULT_SCAN_SECS
+ .ci_tcp_port = DEFAULT_TCP_PORT,
+ .ci_buffer_size = DEFAULT_BUFFER_SIZE,
+ .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
+ .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
+ .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
+ .ci_recover_timer = DEFAULT_RECOVER_TIMER,
+ .ci_toss_secs = DEFAULT_TOSS_SECS,
+ .ci_scan_secs = DEFAULT_SCAN_SECS,
+ .ci_log_debug = DEFAULT_LOG_DEBUG
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a..1e978611a96 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
- int tcp_port;
- int buffer_size;
- int rsbtbl_size;
- int lkbtbl_size;
- int dirtbl_size;
- int recover_timer;
- int toss_secs;
- int scan_secs;
+ int ci_tcp_port;
+ int ci_buffer_size;
+ int ci_rsbtbl_size;
+ int ci_lkbtbl_size;
+ int ci_dirtbl_size;
+ int ci_recover_timer;
+ int ci_toss_secs;
+ int ci_scan_secs;
+ int ci_log_debug;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc..61d93201e1b 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
+#include "config.h"
#define DLM_LOCKSPACE_LEN 64
@@ -69,12 +70,12 @@ struct dlm_mhandle;
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
-#define DLM_LOG_DEBUG
-#ifdef DLM_LOG_DEBUG
-#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args)
-#else
-#define log_debug(ls, fmt, args...)
-#endif
+#define log_debug(ls, fmt, args...) \
+do { \
+ if (dlm_config.ci_log_debug) \
+ printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
+ (ls)->ls_name , ##args); \
+} while (0)
#define DLM_ASSERT(x, do) \
{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
/* dlm_header is first element of all structs sent between nodes */
-#define DLM_HEADER_MAJOR 0x00020000
-#define DLM_HEADER_MINOR 0x00000001
+#define DLM_HEADER_MAJOR 0x00030000
+#define DLM_HEADER_MINOR 0x00000000
#define DLM_MSG 1
#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
+ uint64_t rc_seq; /* sender's ls_recover_seq */
+ uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
@@ -523,6 +526,7 @@ struct dlm_user_proc {
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
+ struct list_head unlocking;
wait_queue_head_t wait;
};
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb..e725005fafd 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
mutex_unlock(&ls->ls_waiters_mutex);
}
+/* We clear the RESEND flag because we might be taking an lkb off the waiters
+ list as part of process_requestqueue (e.g. a lookup that has an optimized
+ request reply on the requestqueue) between dlm_recover_waiters_pre() which
+ set RESEND and dlm_recover_waiters_post() */
+
static int _remove_from_waiters(struct dlm_lkb *lkb)
{
int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
goto out;
}
lkb->lkb_wait_type = 0;
+ lkb->lkb_flags &= ~DLM_IFL_RESEND;
list_del(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
res_hashchain) {
if (!time_after_eq(jiffies, r->res_toss_time +
- dlm_config.toss_secs * HZ))
+ dlm_config.ci_toss_secs * HZ))
continue;
found = 1;
break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
if (lkb->lkb_astaddr)
ms->m_asts |= AST_COMP;
- if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP)
- memcpy(ms->m_extra, r->res_name, r->res_length);
+ /* compare with switch in create_message; send_remove() doesn't
+ use send_args() */
- else if (lkb->lkb_lvbptr)
+ switch (ms->m_type) {
+ case DLM_MSG_REQUEST:
+ case DLM_MSG_LOOKUP:
+ memcpy(ms->m_extra, r->res_name, r->res_length);
+ break;
+ case DLM_MSG_CONVERT:
+ case DLM_MSG_UNLOCK:
+ case DLM_MSG_REQUEST_REPLY:
+ case DLM_MSG_CONVERT_REPLY:
+ case DLM_MSG_GRANT:
+ if (!lkb->lkb_lvbptr)
+ break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
-
+ break;
+ }
}
static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
- if (receive_lvb(ls, lkb, ms))
- return -ENOMEM;
+ if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
+ /* lkb was just created so there won't be an lvb yet */
+ lkb->lkb_lvbptr = allocate_lvb(ls);
+ if (!lkb->lkb_lvbptr)
+ return -ENOMEM;
+ }
return 0;
}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
- int error;
+ int error = 0;
if (!recovery)
dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
- return 0;
+ return error;
}
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
if (middle_conversion(lkb)) {
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -EINPROGRESS;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_convert_reply(lkb, &ls->ls_stub_ms);
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_unlock_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_CANCEL:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_ECANCEL;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_cancel_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
lock_rsb(r);
switch (error) {
+ case -EBADR:
+ /* There's a chance the new master received our lock before
+ dlm_recover_master_reply(), this wouldn't happen if we did
+ a barrier between recover_masters and recover_locks. */
+ log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
+ (unsigned long)r, r->res_name);
+ dlm_send_rcom_lock(r, lkb);
+ goto out;
case -EEXIST:
log_debug(ls, "master copy exists %x", lkb->lkb_id);
/* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
/* an ack for dlm_recover_locks() which waits for replies from
all the locks it sends to new masters */
dlm_recovered_lock(r);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
}
if (flags & DLM_LKF_VALBLK) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
__put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua = (struct dlm_user_args *)lkb->lkb_astparam;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
error = -ENOMEM;
goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out_put;
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ /* dlm_user_add_ast() may have already taken lkb off the proc list */
+ if (!list_empty(&lkb->lkb_ownqueue))
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
-
- /* this removes the reference for the proc->locks list added by
- dlm_user_request */
- unhold_lkb(lkb);
out_put:
dlm_put_lkb(lkb);
out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
/* this lkb was removed from the WAITING queue */
if (lkb->lkb_grmode == DLM_LOCK_IV) {
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
- unhold_lkb(lkb);
}
out_put:
dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
mutex_lock(&ls->ls_clear_proc_locks);
list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
- if (lkb->lkb_ast_type) {
- list_del(&lkb->lkb_astqueue);
- unhold_lkb(lkb);
- }
-
list_del_init(&lkb->lkb_ownqueue);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
+
+ /* in-progress unlocks */
+ list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
+ list_del_init(&lkb->lkb_ownqueue);
+ lkb->lkb_flags |= DLM_IFL_DEAD;
+ dlm_put_lkb(lkb);
+ }
+
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_del(&lkb->lkb_astqueue);
+ dlm_put_lkb(lkb);
+ }
+
mutex_unlock(&ls->ls_clear_proc_locks);
unlock_recovery(ls);
}
+
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8..f40817b53c6 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
while (!kthread_should_stop()) {
list_for_each_entry(ls, &lslist, ls_list)
dlm_scan_rsbs(ls);
- schedule_timeout_interruptible(dlm_config.scan_secs * HZ);
+ schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_count = 0;
ls->ls_flags = 0;
- size = dlm_config.rsbtbl_size;
+ size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
rwlock_init(&ls->ls_rsbtbl[i].lock);
}
- size = dlm_config.lkbtbl_size;
+ size = dlm_config.ci_lkbtbl_size;
ls->ls_lkbtbl_size = size;
ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lkbtbl[i].counter = 1;
}
- size = dlm_config.dirtbl_size;
+ size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
- ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (!ls->ls_recover_buf)
goto out_dirfree;
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a928..dc83a9d979b 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
+ struct work_struct swork; /* Send workqueue */
+ struct work_struct lwork; /* Locking workqueue */
};
static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
+ struct work_struct work; /* Send workqueue */
};
/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock);
+
/* Maximum number of incoming messages to process before
* doing a schedule()
*/
#define MAX_RX_MSG_COUNT 25
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
+static struct workqueue_struct *lock_workqueue;
/* The SCTP connection */
static struct connection sctp_con;
+static void process_send_sockets(struct work_struct *work);
+static void process_recv_sockets(struct work_struct *work);
+static void process_lock_request(struct work_struct *work);
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
spin_lock_init(&ni->lock);
INIT_LIST_HEAD(&ni->writequeue);
spin_lock_init(&ni->writequeue_lock);
+ INIT_WORK(&ni->lwork, process_lock_request);
+ INIT_WORK(&ni->swork, process_send_sockets);
ni->nodeid = nodeid;
if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
/* Data or notification available on socket */
static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
- atomic_inc(&sctp_con.waiting_requests);
if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
- return;
-
- wake_up_interruptible(&lowcomms_recv_wait);
+ queue_work(recv_workqueue, &sctp_con.work);
}
@@ -361,10 +367,10 @@ static void init_failed(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
}
}
- wake_up_process(send_task);
}
/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
break;
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
}
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
return 0;
cbuf_add(&sctp_con.cb, ret);
+ // PJC: TODO: Add to node's workqueue....can we ??
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
if (result < 0)
log_print("Can't bind to port %d addr number %d",
- dlm_config.tcp_port, num);
+ dlm_config.ci_tcp_port, num);
return result;
}
@@ -711,7 +718,7 @@ static int init_sock(void)
/* Bind to all interfaces. */
for (i = 0; i < dlm_local_count; i++) {
memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
- make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
result = add_bind_addr(&localaddr, addr_len, num);
if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
- wake_up_process(send_task);
+
+ queue_work(send_workqueue, &ni->swork);
}
return;
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
return;
}
- make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
+ make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
outmessage.msg_name = &rem_addr;
outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
return 0;
}
-static int write_list_empty(void)
+// PJC: The work queue function for receiving.
+static void process_recv_sockets(struct work_struct *work)
{
- int status;
-
- spin_lock_bh(&write_nodes_lock);
- status = list_empty(&write_nodes);
- spin_unlock_bh(&write_nodes_lock);
-
- return status;
-}
-
-static int dlm_recvd(void *data)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- while (!kthread_should_stop()) {
+ if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
+ int ret;
int count = 0;
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&lowcomms_recv_wait, &wait);
- if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
- cond_resched();
- remove_wait_queue(&lowcomms_recv_wait, &wait);
- set_current_state(TASK_RUNNING);
-
- if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
- int ret;
-
- do {
- ret = receive_from_sock();
+ do {
+ ret = receive_from_sock();
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- } while (!kthread_should_stop() && ret >=0);
- }
- cond_resched();
+ /* Don't starve out everyone else */
+ if (++count >= MAX_RX_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ } while (!kthread_should_stop() && ret >=0);
}
-
- return 0;
+ cond_resched();
}
-static int dlm_sendd(void *data)
+// PJC: the work queue function for sending
+static void process_send_sockets(struct work_struct *work)
{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- if (sctp_con.eagain_flag) {
- sctp_con.eagain_flag = 0;
- refill_write_queue();
- }
- process_output_queue();
+ if (sctp_con.eagain_flag) {
+ sctp_con.eagain_flag = 0;
+ refill_write_queue();
}
+ process_output_queue();
+}
- remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- return 0;
+// PJC: Process lock requests from a particular node.
+// TODO: can we optimise this out on UP ??
+static void process_lock_request(struct work_struct *work)
+{
}
static void daemons_stop(void)
{
- kthread_stop(recv_task);
- kthread_stop(send_task);
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(lock_workqueue);
}
static int daemons_start(void)
{
- struct task_struct *p;
int error;
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
+ if (error) {
+ log_print("can't start dlm_recv %d", error);
+ return error;
+ }
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ lock_workqueue = create_workqueue("dlm_rlock");
+ error = IS_ERR(lock_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_rlock %d", error);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
{
int error;
+ INIT_WORK(&sctp_con.work, process_recv_sockets);
+
error = init_sock();
if (error)
goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
}
-
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42..07e0a122c32 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
struct connection {
struct socket *sock; /* NULL if not connected */
uint32_t nodeid; /* So we know who we are in the list */
- struct rw_semaphore sock_sem; /* Stop connect races */
- struct list_head read_list; /* On this list when ready for reading */
- struct list_head write_list; /* On this list when ready for writing */
- struct list_head state_list; /* On this list when ready to connect */
+ struct mutex sock_mutex;
unsigned long flags; /* bit 1,2 = We are on the read/write lists */
#define CF_READ_PENDING 1
#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
struct page *rx_page;
struct cbuf cb;
int retries;
- atomic_t waiting_requests;
#define MAX_CONNECT_RETRIES 3
struct connection *othercon;
+ struct work_struct rwork; /* Receive workqueue */
+ struct work_struct swork; /* Send workqueue */
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -131,14 +129,9 @@ struct writequeue_entry {
static struct sockaddr_storage dlm_local_addr;
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-
-static wait_queue_t lowcomms_send_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
-static wait_queue_t lowcomms_recv_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
/* An array of pointers to connections, indexed by NODEID */
static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
static struct kmem_cache *con_cache;
static int conn_array_size;
-/* List of sockets that have reads pending */
-static LIST_HEAD(read_sockets);
-static DEFINE_SPINLOCK(read_sockets_lock);
-
-/* List of sockets which have writes pending */
-static LIST_HEAD(write_sockets);
-static DEFINE_SPINLOCK(write_sockets_lock);
-
-/* List of sockets which have connects pending */
-static LIST_HEAD(state_sockets);
-static DEFINE_SPINLOCK(state_sockets_lock);
+static void process_recv_sockets(struct work_struct *work);
+static void process_send_sockets(struct work_struct *work);
static struct connection *nodeid2con(int nodeid, gfp_t allocation)
{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
goto finish;
con->nodeid = nodeid;
- init_rwsem(&con->sock_sem);
+ mutex_init(&con->sock_mutex);
INIT_LIST_HEAD(&con->writequeue);
spin_lock_init(&con->writequeue_lock);
+ INIT_WORK(&con->swork, process_send_sockets);
+ INIT_WORK(&con->rwork, process_recv_sockets);
connections[nodeid] = con;
}
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
struct connection *con = sock2con(sk);
- atomic_inc(&con->waiting_requests);
- if (test_and_set_bit(CF_READ_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&read_sockets_lock);
- list_add_tail(&con->read_list, &read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- wake_up_interruptible(&lowcomms_recv_waitq);
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
}
static void lowcomms_write_space(struct sock *sk)
{
struct connection *con = sock2con(sk);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static inline void lowcomms_connect_sock(struct connection *con)
{
- if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&state_sockets_lock);
- list_add_tail(&con->state_list, &state_sockets);
- spin_unlock_bh(&state_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
/* Close a remote connection and tidy up */
static void close_connection(struct connection *con, bool and_other)
{
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock) {
sock_release(con->sock);
@@ -294,24 +261,27 @@ static void close_connection(struct connection *con, bool and_other)
con->rx_page = NULL;
}
con->retries = 0;
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
}
/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
int ret = 0;
- struct msghdr msg;
- struct iovec iov[2];
- mm_segment_t fs;
+ struct msghdr msg = {};
+ struct kvec iov[2];
unsigned len;
int r;
int call_again_soon = 0;
+ int nvec;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
+
+ if (con->sock == NULL) {
+ ret = -EAGAIN;
+ goto out_close;
+ }
- if (con->sock == NULL)
- goto out;
if (con->rx_page == NULL) {
/*
* This doesn't need to be atomic, but I think it should
@@ -323,21 +293,13 @@ static int receive_from_sock(struct connection *con)
cbuf_init(&con->cb, PAGE_CACHE_SIZE);
}
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_iovlen = 1;
- msg.msg_iov = iov;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_flags = 0;
-
/*
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
*/
iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
- iov[1].iov_len = 0;
+ nvec = 1;
/*
* iov[1] is the bit of the circular buffer between the start of the
@@ -347,18 +309,18 @@ static int receive_from_sock(struct connection *con)
iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
iov[1].iov_len = con->cb.base;
iov[1].iov_base = page_address(con->rx_page);
- msg.msg_iovlen = 2;
+ nvec = 2;
}
len = iov[0].iov_len + iov[1].iov_len;
- fs = get_fs();
- set_fs(get_ds());
- r = ret = sock_recvmsg(con->sock, &msg, len,
+ r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
MSG_DONTWAIT | MSG_NOSIGNAL);
- set_fs(fs);
if (ret <= 0)
goto out_close;
+ if (ret == -EAGAIN)
+ goto out_resched;
+
if (ret == len)
call_again_soon = 1;
cbuf_add(&con->cb, ret);
@@ -381,24 +343,26 @@ static int receive_from_sock(struct connection *con)
con->rx_page = NULL;
}
-out:
if (call_again_soon)
goto out_resched;
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return 0;
out_resched:
- lowcomms_data_ready(con->sock->sk, 0);
- up_read(&con->sock_sem);
- cond_resched();
- return 0;
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
+ mutex_unlock(&con->sock_mutex);
+ return -EAGAIN;
out_close:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
close_connection(con, false);
/* Reconnect when there is something to send */
}
+ /* Don't return success if we really got EOF */
+ if (ret == 0)
+ ret = -EAGAIN;
return ret;
}
@@ -412,6 +376,7 @@ static int accept_from_sock(struct connection *con)
int len;
int nodeid;
struct connection *newcon;
+ struct connection *addcon;
memset(&peeraddr, 0, sizeof(peeraddr));
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +384,7 @@ static int accept_from_sock(struct connection *con)
if (result < 0)
return -ENOMEM;
- down_read(&con->sock_sem);
+ mutex_lock_nested(&con->sock_mutex, 0);
result = -ENOTCONN;
if (con->sock == NULL)
@@ -445,7 +410,7 @@ static int accept_from_sock(struct connection *con)
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
printk("dlm: connect from non cluster node\n");
sock_release(newsock);
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return -1;
}
@@ -462,7 +427,7 @@ static int accept_from_sock(struct connection *con)
result = -ENOMEM;
goto accept_err;
}
- down_write(&newcon->sock_sem);
+ mutex_lock_nested(&newcon->sock_mutex, 1);
if (newcon->sock) {
struct connection *othercon = newcon->othercon;
@@ -470,41 +435,45 @@ static int accept_from_sock(struct connection *con)
othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
if (!othercon) {
printk("dlm: failed to allocate incoming socket\n");
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
result = -ENOMEM;
goto accept_err;
}
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
- init_rwsem(&othercon->sock_sem);
+ mutex_init(&othercon->sock_mutex);
+ INIT_WORK(&othercon->swork, process_send_sockets);
+ INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
newcon->othercon = othercon;
}
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
add_sock(newsock, othercon);
+ addcon = othercon;
}
else {
newsock->sk->sk_user_data = newcon;
newcon->rx_action = receive_from_sock;
add_sock(newsock, newcon);
-
+ addcon = newcon;
}
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
/*
* Add it to the active queue in case we got data
* beween processing the accept adding the socket
* to the read_sockets list
*/
- lowcomms_data_ready(newsock->sk, 0);
- up_read(&con->sock_sem);
+ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
+ queue_work(recv_workqueue, &addcon->rwork);
+ mutex_unlock(&con->sock_mutex);
return 0;
accept_err:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
sock_release(newsock);
if (result != -EAGAIN)
@@ -525,7 +494,7 @@ static void connect_to_sock(struct connection *con)
return;
}
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@@ -548,7 +517,7 @@ static void connect_to_sock(struct connection *con)
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
- make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
add_sock(sock, con);
@@ -577,7 +546,7 @@ out_err:
result = 0;
}
out:
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
}
@@ -616,10 +585,10 @@ static struct socket *create_listen_sock(struct connection *con,
con->sock = sock;
/* Bind to our port */
- make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
if (result < 0) {
- printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
con->sock = NULL;
@@ -638,7 +607,7 @@ static struct socket *create_listen_sock(struct connection *con,
result = sock->ops->listen(sock, 5);
if (result < 0) {
- printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
goto create_out;
@@ -709,6 +678,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
if (!con)
return NULL;
+ spin_lock(&con->writequeue_lock);
e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
if ((&e->list == &con->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +717,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
struct connection *con = e->con;
int users;
+ spin_lock(&con->writequeue_lock);
users = --e->users;
if (users)
goto out;
@@ -754,12 +725,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
kunmap(e->page);
spin_unlock(&con->writequeue_lock);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
+ queue_work(send_workqueue, &con->swork);
}
return;
@@ -783,7 +750,7 @@ static void send_to_sock(struct connection *con)
struct writequeue_entry *e;
int len, offset;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock == NULL)
goto out_connect;
@@ -800,6 +767,7 @@ static void send_to_sock(struct connection *con)
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
spin_unlock(&con->writequeue_lock);
+ kmap(e->page);
ret = 0;
if (len) {
@@ -828,18 +796,18 @@ static void send_to_sock(struct connection *con)
}
spin_unlock(&con->writequeue_lock);
out:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
send_error:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
close_connection(con, false);
lowcomms_connect_sock(con);
return;
out_connect:
- up_read(&con->sock_sem);
- lowcomms_connect_sock(con);
+ mutex_unlock(&con->sock_mutex);
+ connect_to_sock(con);
return;
}
@@ -872,7 +840,6 @@ int dlm_lowcomms_close(int nodeid)
if (con) {
clean_one_writequeue(con);
close_connection(con, true);
- atomic_set(&con->waiting_requests, 0);
}
return 0;
@@ -880,102 +847,29 @@ out:
return -1;
}
-/* API send message call, may queue the request */
-/* N.B. This is the old interface - use the new one for new calls */
-int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
-{
- struct writequeue_entry *e;
- char *b;
-
- e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
- if (e) {
- memcpy(b, buf, len);
- dlm_lowcomms_commit_buffer(e);
- return 0;
- }
- return -ENOBUFS;
-}
-
/* Look for activity on active sockets */
-static void process_sockets(void)
+static void process_recv_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
- int count = 0;
-
- spin_lock_bh(&read_sockets_lock);
- list_for_each_safe(list, temp, &read_sockets) {
-
- struct connection *con =
- list_entry(list, struct connection, read_list);
- list_del(&con->read_list);
- clear_bit(CF_READ_PENDING, &con->flags);
-
- spin_unlock_bh(&read_sockets_lock);
-
- /* This can reach zero if we are processing requests
- * as they come in.
- */
- if (atomic_read(&con->waiting_requests) == 0) {
- spin_lock_bh(&read_sockets_lock);
- continue;
- }
-
- do {
- con->rx_action(con);
-
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
+ struct connection *con = container_of(work, struct connection, rwork);
+ int err;
- } while (!atomic_dec_and_test(&con->waiting_requests) &&
- !kthread_should_stop());
-
- spin_lock_bh(&read_sockets_lock);
- }
- spin_unlock_bh(&read_sockets_lock);
+ clear_bit(CF_READ_PENDING, &con->flags);
+ do {
+ err = con->rx_action(con);
+ } while (!err);
}
-/* Try to send any messages that are pending
- */
-static void process_output_queue(void)
-{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&write_sockets_lock);
- list_for_each_safe(list, temp, &write_sockets) {
- struct connection *con =
- list_entry(list, struct connection, write_list);
- clear_bit(CF_WRITE_PENDING, &con->flags);
- list_del(&con->write_list);
- spin_unlock_bh(&write_sockets_lock);
- send_to_sock(con);
- spin_lock_bh(&write_sockets_lock);
- }
- spin_unlock_bh(&write_sockets_lock);
-}
-
-static void process_state_queue(void)
+static void process_send_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&state_sockets_lock);
- list_for_each_safe(list, temp, &state_sockets) {
- struct connection *con =
- list_entry(list, struct connection, state_list);
- list_del(&con->state_list);
- clear_bit(CF_CONNECT_PENDING, &con->flags);
- spin_unlock_bh(&state_sockets_lock);
+ struct connection *con = container_of(work, struct connection, swork);
+ if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
connect_to_sock(con);
- spin_lock_bh(&state_sockets_lock);
}
- spin_unlock_bh(&state_sockets_lock);
+
+ clear_bit(CF_WRITE_PENDING, &con->flags);
+ send_to_sock(con);
}
@@ -992,109 +886,33 @@ static void clean_writequeues(void)
}
}
-static int read_list_empty(void)
-{
- int status;
-
- spin_lock_bh(&read_sockets_lock);
- status = list_empty(&read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport comms receive daemon */
-static int dlm_recvd(void *data)
+static void work_stop(void)
{
- init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
- add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (read_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_sockets();
- }
-
- return 0;
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
}
-static int write_and_state_lists_empty(void)
+static int work_start(void)
{
- int status;
-
- spin_lock_bh(&write_sockets_lock);
- status = list_empty(&write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- spin_lock_bh(&state_sockets_lock);
- if (list_empty(&state_sockets) == 0)
- status = 0;
- spin_unlock_bh(&state_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport send daemon */
-static int dlm_sendd(void *data)
-{
- init_waitqueue_entry(&lowcomms_send_waitq_head, current);
- add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_and_state_lists_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_state_queue();
- process_output_queue();
- }
-
- return 0;
-}
-
-static void daemons_stop(void)
-{
- kthread_stop(recv_task);
- kthread_stop(send_task);
-}
-
-static int daemons_start(void)
-{
- struct task_struct *p;
int error;
-
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_recv %d", error);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
-/*
- * Return the largest buffer size we can cope with.
- */
-int lowcomms_max_buffer_size(void)
-{
- return PAGE_CACHE_SIZE;
-}
-
void dlm_lowcomms_stop(void)
{
int i;
@@ -1107,7 +925,7 @@ void dlm_lowcomms_stop(void)
connections[i]->flags |= 0xFF;
}
- daemons_stop();
+ work_stop();
clean_writequeues();
for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +977,7 @@ int dlm_lowcomms_start(void)
if (error)
goto fail_unlisten;
- error = daemons_start();
+ error = work_start();
if (error)
goto fail_unlisten;
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f..a5126e0c68a 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen < sizeof(struct dlm_header))
break;
err = -E2BIG;
- if (msglen > dlm_config.buffer_size) {
+ if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen > sizeof(__tmp) &&
msg == (struct dlm_header *) __tmp) {
- msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (msg == NULL)
return ret;
}
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9..6bfbd615380 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
rc->rc_type = type;
+ spin_lock(&ls->ls_recover_lock);
+ rc->rc_seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+
*mh_ret = mh;
*rc_ret = rc;
return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
rf->rf_lsflags = ls->ls_exflags;
}
-static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
+static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
+ struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
+
+ if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
+ log_error(ls, "version mismatch: %x nodeid %d: %x",
+ DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
+ rc->rc_header.h_version);
+ return -EINVAL;
+ }
+
if (rf->rf_lvblen != ls->ls_lvblen ||
rf->rf_lsflags != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
goto out;
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
} else
- error = check_config(ls, (struct rcom_config *) rc->rc_buf,
- nodeid);
+ error = check_config(ls, rc, nodeid);
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = dlm_recover_status(ls);
make_config(ls, (struct rcom_config *) rc->rc_buf);
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
if (nodeid == dlm_our_nodeid()) {
dlm_copy_master_names(ls, last_name, last_len,
ls->ls_recover_buf + len,
- dlm_config.buffer_size - len, nodeid);
+ dlm_config.ci_buffer_size - len, nodeid);
goto out;
}
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
- int error, inlen, outlen;
- int nodeid = rc_in->rc_header.h_nodeid;
- uint32_t status = dlm_recover_status(ls);
-
- /*
- * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
- * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
- * It could only happen in rare cases where we get a late NAMES
- * message from a previous instance of recovery.
- */
-
- if (!(status & DLM_RS_NODES)) {
- log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
- return;
- }
+ int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
- outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
+ outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
- uint32_t status = dlm_recover_status(ls);
-
- if (!(status & DLM_RS_DIR)) {
- log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
- rc_in->rc_header.h_nodeid);
- return;
- }
-
dlm_recover_process_copy(ls, rc_in);
}
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
return 0;
}
+static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+ uint64_t seq;
+ int rv = 0;
+
+ switch (rc->rc_type) {
+ case DLM_RCOM_STATUS_REPLY:
+ case DLM_RCOM_NAMES_REPLY:
+ case DLM_RCOM_LOOKUP_REPLY:
+ case DLM_RCOM_LOCK_REPLY:
+ spin_lock(&ls->ls_recover_lock);
+ seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+ if (rc->rc_seq_reply != seq) {
+ log_debug(ls, "ignoring old reply %x from %d "
+ "seq_reply %llx expect %llx",
+ rc->rc_type, rc->rc_header.h_nodeid,
+ (unsigned long long)rc->rc_seq_reply,
+ (unsigned long long)seq);
+ rv = 1;
+ }
+ }
+ return rv;
+}
+
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
- log_error(ls, "ignoring recovery message %x from %d",
+ log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
goto out;
}
+ if (is_old_reply(ls, rc))
+ goto out;
+
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab..c2cc7694cd1 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
static void dlm_wait_timer_fn(unsigned long data)
{
struct dlm_ls *ls = (struct dlm_ls *) data;
- mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
+ mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
wake_up(&ls->ls_wait_general);
}
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
init_timer(&ls->ls_timer);
ls->ls_timer.function = dlm_wait_timer_fn;
ls->ls_timer.data = (long) ls;
- ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
+ ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
add_timer(&ls->ls_timer);
wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
if (dlm_no_directory(ls))
count += recover_master_static(r);
- else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
+ else if (!is_master(r) &&
+ (dlm_is_removed(ls, r->res_nodeid) ||
+ rsb_flag(r, RSB_NEW_MASTER))) {
recover_master(r);
count++;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa513..3cb636d6024 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_members(ls, rv, &neg);
if (error) {
- log_error(ls, "recover_members failed %d", error);
+ log_debug(ls, "recover_members failed %d", error);
goto fail;
}
start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory(ls);
if (error) {
- log_error(ls, "recover_directory failed %d", error);
+ log_debug(ls, "recover_directory failed %d", error);
goto fail;
}
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory_wait(ls);
if (error) {
- log_error(ls, "recover_directory_wait failed %d", error);
+ log_debug(ls, "recover_directory_wait failed %d", error);
goto fail;
}
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_masters(ls);
if (error) {
- log_error(ls, "recover_masters failed %d", error);
+ log_debug(ls, "recover_masters failed %d", error);
goto fail;
}
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks(ls);
if (error) {
- log_error(ls, "recover_locks failed %d", error);
+ log_debug(ls, "recover_locks failed %d", error);
goto fail;
}
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
}
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
- log_error(ls, "recover_done_wait failed %d", error);
+ log_debug(ls, "recover_done_wait failed %d", error);
goto fail;
}
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = enable_locking(ls, rv->seq);
if (error) {
- log_error(ls, "enable_locking failed %d", error);
+ log_debug(ls, "enable_locking failed %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
- log_error(ls, "process_requestqueue failed %d", error);
+ log_debug(ls, "process_requestqueue failed %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
- log_error(ls, "recover_waiters_post failed %d", error);
+ log_debug(ls, "recover_waiters_post failed %d", error);
goto fail;
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2d..d378b7fe2a1 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
remove_ownqueue = 1;
+ /* unlocks or cancels of waiting requests need to be removed from the
+ proc's unlocking list, again there must be a better way... */
+
+ if (ua->lksb.sb_status == -DLM_EUNLOCK ||
+ (ua->lksb.sb_status == -DLM_ECANCEL &&
+ lkb->lkb_grmode == DLM_LOCK_IV))
+ remove_ownqueue = 1;
+
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
+ INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db994..963889cf674 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
+ rc->rc_seq = cpu_to_le64(rc->rc_seq);
+ rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
if (type == DLM_RCOM_LOCK)
rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
+ rc->rc_seq = le64_to_cpu(rc->rc_seq);
+ rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
if (rc->rc_type == DLM_RCOM_LOCK)
rcom_lock_in((struct rcom_lock *) rc->rc_buf);