2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-28 12:57:57 +00:00

dump: add timeout for collecting processes

Currently criu dump may hang indefinitely. E.g. in wait for task
that blocked in vfork() or task could be in D state for some other
reason. This patch adds time limit on collecting tasks during the
dump operation. If collecting processes takes too long, the dump
process will be terminated. Timeout is 5 seconds by default, but
it could be changed via parameter.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
This commit is contained in:
Andrey Ryabinin 2015-12-16 14:59:51 +03:00 committed by Pavel Emelyanov
parent 9bfd62c52b
commit d0ff73077d
4 changed files with 57 additions and 0 deletions

View File

@ -1384,6 +1384,25 @@ err_cure_imgset:
goto err; goto err;
} }
typedef void (*sa_handler_t)(int);
static int setup_alarm_handler(sa_handler_t handler)
{
struct sigaction sa = {
.sa_handler = handler,
.sa_flags = 0,
};
sigemptyset(&sa.sa_mask);
sigaddset(&sa.sa_mask, SIGALRM);
if (sigaction(SIGALRM, &sa, NULL)) {
pr_perror("Unable to setup SIGALRM handler");
return -1;
}
return 0;
}
static int cr_pre_dump_finish(struct list_head *ctls, int ret) static int cr_pre_dump_finish(struct list_head *ctls, int ret)
{ {
struct parasite_ctl *ctl, *n; struct parasite_ctl *ctl, *n;
@ -1436,6 +1455,15 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
return ret; return ret;
} }
void pre_dump_alarm_handler(int signum)
{
LIST_HEAD(empty_list);
pr_err("Timeout reached\n");
cr_pre_dump_finish(&empty_list, -1);
exit(-1);
}
int cr_pre_dump_tasks(pid_t pid) int cr_pre_dump_tasks(pid_t pid)
{ {
struct pstree_item *item; struct pstree_item *item;
@ -1470,6 +1498,9 @@ int cr_pre_dump_tasks(pid_t pid)
if (connect_to_page_server()) if (connect_to_page_server())
goto err; goto err;
if (setup_alarm_handler(pre_dump_alarm_handler))
goto err;
if (collect_pstree(pid)) if (collect_pstree(pid))
goto err; goto err;
@ -1570,6 +1601,13 @@ static int cr_dump_finish(int ret)
return post_dump_ret ? : (ret != 0); return post_dump_ret ? : (ret != 0);
} }
void dump_alarm_handler(int signum)
{
pr_err("Timeout reached\n");
cr_dump_finish(-1);
exit(-1);
}
int cr_dump_tasks(pid_t pid) int cr_dump_tasks(pid_t pid)
{ {
struct pstree_item *item; struct pstree_item *item;
@ -1617,6 +1655,9 @@ int cr_dump_tasks(pid_t pid)
if (connect_to_page_server()) if (connect_to_page_server())
goto err; goto err;
if (setup_alarm_handler(dump_alarm_handler))
goto err;
/* /*
* The collect_pstree will also stop (PTRACE_SEIZE) the tasks * The collect_pstree will also stop (PTRACE_SEIZE) the tasks
* thus ensuring that they don't modify anything we collect * thus ensuring that they don't modify anything we collect

View File

@ -65,6 +65,7 @@ void init_opts(void)
opts.manage_cgroups = CG_MODE_DEFAULT; opts.manage_cgroups = CG_MODE_DEFAULT;
opts.ps_socket = -1; opts.ps_socket = -1;
opts.ghost_limit = DEFAULT_GHOST_LIMIT; opts.ghost_limit = DEFAULT_GHOST_LIMIT;
opts.timeout = DEFAULT_TIMEOUT;
} }
static int parse_ns_string(const char *ptr) static int parse_ns_string(const char *ptr)
@ -255,6 +256,7 @@ int main(int argc, char *argv[], char *envp[])
{ "ghost-limit", required_argument, 0, 1069 }, { "ghost-limit", required_argument, 0, 1069 },
{ "irmap-scan-path", required_argument, 0, 1070 }, { "irmap-scan-path", required_argument, 0, 1070 },
{ "lsm-profile", required_argument, 0, 1071 }, { "lsm-profile", required_argument, 0, 1071 },
{ "timeout", required_argument, 0, 1072 },
{ }, { },
}; };
@ -503,6 +505,8 @@ int main(int argc, char *argv[], char *envp[])
case 1071: case 1071:
if (parse_lsm_arg(optarg) < 0) if (parse_lsm_arg(optarg) < 0)
return -1; return -1;
case 1072:
opts.timeout = atoi(optarg);
break; break;
case 'M': case 'M':
{ {

View File

@ -38,6 +38,8 @@ struct cg_root_opt {
*/ */
#define DEFAULT_GHOST_LIMIT (1 << 20) #define DEFAULT_GHOST_LIMIT (1 << 20)
#define DEFAULT_TIMEOUT 5
struct irmap; struct irmap;
struct irmap_path_opt { struct irmap_path_opt {
@ -97,6 +99,7 @@ struct cr_options {
struct list_head irmap_scan_paths; struct list_head irmap_scan_paths;
bool lsm_supplied; bool lsm_supplied;
char *lsm_profile; char *lsm_profile;
unsigned int timeout;
}; };
extern struct cr_options opts; extern struct cr_options opts;

View File

@ -640,6 +640,13 @@ int collect_pstree(pid_t pid)
goto err; goto err;
} }
/*
* wait4() may hang for some reason. Enable timer and fire SIGALRM
* if timeout reached. SIGALRM handler will do the necessary
* cleanups and terminate current process.
*/
alarm(opts.timeout);
ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds); ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
if (ret < 0) if (ret < 0)
goto err; goto err;
@ -658,6 +665,8 @@ int collect_pstree(pid_t pid)
timing_start(TIME_FROZEN); timing_start(TIME_FROZEN);
err: err:
/* Freezing stage finished in time - disable timer. */
alarm(0);
return ret; return ret;
} }