From 44465a7daf7c4e34199b2b0ebb3c5101619dcb9d Mon Sep 17 00:00:00 2001 From: Kurt Hackel Date: Wed, 18 Jan 2006 17:05:38 -0800 Subject: [PATCH] ocfs2: add dlm_wait_for_node_death * add dlm_wait_for_node_death function to be used after receiving a network error. this will wait for the given timeout to allow the heartbeat callbacks to update the domain map. without this, some paths may spin and consume enough cpu that the heartbeat gets starved and never updates. Signed-off-by: Kurt Hackel Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmconvert.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/ocfs2/dlm/dlmconvert.c') diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index f5c2f1979ad..f66e2d818cc 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, } else { mlog_errno(tmpret); if (dlm_is_host_down(tmpret)) { + /* instead of logging the same network error over + * and over, sleep here and wait for the heartbeat + * to notice the node is dead. times out after 5s. */ + dlm_wait_for_node_death(dlm, res->owner, + DLM_NODE_DEATH_WAIT_MAX); ret = DLM_RECOVERING; mlog(0, "node %u died so returning DLM_RECOVERING " "from convert message!\n", res->owner); -- cgit v1.2.3