mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-28 04:48:16 +00:00
dump: add timeout for collecting processes
Currently criu dump may hang indefinitely. E.g. in wait for task that blocked in vfork() or task could be in D state for some other reason. This patch adds time limit on collecting tasks during the dump operation. If collecting processes takes too long, the dump process will be terminated. Timeout is 5 seconds by default, but it could be changed via parameter. Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
This commit is contained in:
parent
9bfd62c52b
commit
d0ff73077d
41
cr-dump.c
41
cr-dump.c
@ -1384,6 +1384,25 @@ err_cure_imgset:
|
||||
goto err;
|
||||
}
|
||||
|
||||
typedef void (*sa_handler_t)(int);
|
||||
|
||||
static int setup_alarm_handler(sa_handler_t handler)
|
||||
{
|
||||
struct sigaction sa = {
|
||||
.sa_handler = handler,
|
||||
.sa_flags = 0,
|
||||
};
|
||||
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sigaddset(&sa.sa_mask, SIGALRM);
|
||||
if (sigaction(SIGALRM, &sa, NULL)) {
|
||||
pr_perror("Unable to setup SIGALRM handler");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cr_pre_dump_finish(struct list_head *ctls, int ret)
|
||||
{
|
||||
struct parasite_ctl *ctl, *n;
|
||||
@ -1436,6 +1455,15 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pre_dump_alarm_handler(int signum)
|
||||
{
|
||||
LIST_HEAD(empty_list);
|
||||
|
||||
pr_err("Timeout reached\n");
|
||||
cr_pre_dump_finish(&empty_list, -1);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
int cr_pre_dump_tasks(pid_t pid)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
@ -1470,6 +1498,9 @@ int cr_pre_dump_tasks(pid_t pid)
|
||||
if (connect_to_page_server())
|
||||
goto err;
|
||||
|
||||
if (setup_alarm_handler(pre_dump_alarm_handler))
|
||||
goto err;
|
||||
|
||||
if (collect_pstree(pid))
|
||||
goto err;
|
||||
|
||||
@ -1570,6 +1601,13 @@ static int cr_dump_finish(int ret)
|
||||
return post_dump_ret ? : (ret != 0);
|
||||
}
|
||||
|
||||
void dump_alarm_handler(int signum)
|
||||
{
|
||||
pr_err("Timeout reached\n");
|
||||
cr_dump_finish(-1);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
int cr_dump_tasks(pid_t pid)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
@ -1617,6 +1655,9 @@ int cr_dump_tasks(pid_t pid)
|
||||
if (connect_to_page_server())
|
||||
goto err;
|
||||
|
||||
if (setup_alarm_handler(dump_alarm_handler))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* The collect_pstree will also stop (PTRACE_SEIZE) the tasks
|
||||
* thus ensuring that they don't modify anything we collect
|
||||
|
@ -65,6 +65,7 @@ void init_opts(void)
|
||||
opts.manage_cgroups = CG_MODE_DEFAULT;
|
||||
opts.ps_socket = -1;
|
||||
opts.ghost_limit = DEFAULT_GHOST_LIMIT;
|
||||
opts.timeout = DEFAULT_TIMEOUT;
|
||||
}
|
||||
|
||||
static int parse_ns_string(const char *ptr)
|
||||
@ -255,6 +256,7 @@ int main(int argc, char *argv[], char *envp[])
|
||||
{ "ghost-limit", required_argument, 0, 1069 },
|
||||
{ "irmap-scan-path", required_argument, 0, 1070 },
|
||||
{ "lsm-profile", required_argument, 0, 1071 },
|
||||
{ "timeout", required_argument, 0, 1072 },
|
||||
{ },
|
||||
};
|
||||
|
||||
@ -503,6 +505,8 @@ int main(int argc, char *argv[], char *envp[])
|
||||
case 1071:
|
||||
if (parse_lsm_arg(optarg) < 0)
|
||||
return -1;
|
||||
case 1072:
|
||||
opts.timeout = atoi(optarg);
|
||||
break;
|
||||
case 'M':
|
||||
{
|
||||
|
@ -38,6 +38,8 @@ struct cg_root_opt {
|
||||
*/
|
||||
#define DEFAULT_GHOST_LIMIT (1 << 20)
|
||||
|
||||
#define DEFAULT_TIMEOUT 5
|
||||
|
||||
struct irmap;
|
||||
|
||||
struct irmap_path_opt {
|
||||
@ -97,6 +99,7 @@ struct cr_options {
|
||||
struct list_head irmap_scan_paths;
|
||||
bool lsm_supplied;
|
||||
char *lsm_profile;
|
||||
unsigned int timeout;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
|
9
seize.c
9
seize.c
@ -640,6 +640,13 @@ int collect_pstree(pid_t pid)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* wait4() may hang for some reason. Enable timer and fire SIGALRM
|
||||
* if timeout reached. SIGALRM handler will do the necessary
|
||||
* cleanups and terminate current process.
|
||||
*/
|
||||
alarm(opts.timeout);
|
||||
|
||||
ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
@ -658,6 +665,8 @@ int collect_pstree(pid_t pid)
|
||||
timing_start(TIME_FROZEN);
|
||||
|
||||
err:
|
||||
/* Freezing stage finished in time - disable timer. */
|
||||
alarm(0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user