aboutsummaryrefslogtreecommitdiff
path: root/src/im-sandbox.c
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2016-12-05 10:10:24 +0100
committerThomas White <taw@physics.org>2017-02-02 11:45:16 +0100
commitc7abdfb3e404a8a238e8352ea5e195f864efefd2 (patch)
treec0c1dd6e68062b1561cb2d97e3a6dd2475d002ae /src/im-sandbox.c
parent526e5ab3d6fd21b8fe3c435f3984866c7e145a84 (diff)
indexamajig: Add ping mechanism to avoid timing out when trying lots of indexers
Diffstat (limited to 'src/im-sandbox.c')
-rw-r--r--src/im-sandbox.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/src/im-sandbox.c b/src/im-sandbox.c
index 979fc136..df8ed1e2 100644
--- a/src/im-sandbox.c
+++ b/src/im-sandbox.c
@@ -78,6 +78,7 @@ struct sandbox
pid_t *pids;
int *running;
time_t *last_response;
+ int last_ping[MAX_NUM_WORKERS];
/* Streams to read from (NB not the same indices as the above) */
int n_read;
@@ -123,6 +124,7 @@ static time_t get_monotonic_seconds()
static void stamp_response(struct sandbox *sb, int n)
{
sb->last_response[n] = get_monotonic_seconds();
+ sb->last_ping[n] = sb->shared->pings[n];
}
@@ -131,13 +133,20 @@ static void check_hung_workers(struct sandbox *sb)
int i;
time_t tnow = get_monotonic_seconds();
for ( i=0; i<sb->n_proc; i++ ) {
+
if ( !sb->running[i] ) continue;
+
+ if ( sb->shared->pings[i] != sb->last_ping[i] ) {
+ stamp_response(sb, i);
+ }
+
if ( tnow - sb->last_response[i] > 240 ) {
STATUS("Worker %i did not respond for 240 seconds - "
"sending it SIGKILL.\n", i);
kill(sb->pids[i], SIGKILL);
stamp_response(sb, i);
}
+
}
}
@@ -534,6 +543,9 @@ static void start_worker_process(struct sandbox *sb, int slot)
return;
}
+ sb->shared->pings[slot] = 0;
+ sb->last_ping[slot] = 0;
+
p = fork();
if ( p == -1 ) {
ERROR("fork() failed!\n");