mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 09:58:09 +00:00
restore: zombies should collect their children
Consider when there is a double fork of helpers or zombies, e.g. when a zombie has a session id which doesn't match its pid. If the child dies and exits before the grandchild, the grandchild reparents to init, and when the task dies init doesn't have it in the helper list, so init dies as well, viz. the log below. (00.118789) Add a helper 293 for restoring SID 293 (00.118792) Attach 294 to the temporary task 293 ... (01.394403) 294: Restoring zombie with 0 code ... pie: Task 294 exited, status= 0 (01.434279) Error (cr-restore.c:1308): 12097 killed by signal 19 (01.434420) Error (cr-restore.c:1308): 12097 killed by signal 19 (01.450258) Switching to new ns to clean ghosts (01.450324) Error (cr-restore.c:2138): Restoring FAILED. Let's have the helpers reap their children before they exit to avoid this. v2: block SIGCHLD when waiting on helpers so that it doesn't race with the SICGHLD handler v3: * only helpers should collect their children, zombies can't have kids * don't double decrement nr_tasks in zombie case Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
This commit is contained in:
parent
a6c6ec89ad
commit
03e13d475d
@ -1008,6 +1008,45 @@ static inline int sig_fatal(int sig)
|
||||
struct task_entries *task_entries;
|
||||
static unsigned long task_entries_pos;
|
||||
|
||||
static int wait_on_helpers_zombies(void)
|
||||
{
|
||||
struct pstree_item *pi;
|
||||
sigset_t blockmask, oldmask;
|
||||
|
||||
sigemptyset(&blockmask);
|
||||
sigaddset(&blockmask, SIGCHLD);
|
||||
|
||||
if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) {
|
||||
pr_perror("Can not set mask of blocked signals");
|
||||
return -1;
|
||||
}
|
||||
|
||||
list_for_each_entry(pi, ¤t->children, sibling) {
|
||||
pid_t pid = pi->pid.virt;
|
||||
int status;
|
||||
|
||||
switch (pi->state) {
|
||||
case TASK_DEAD:
|
||||
if (waitid(P_PID, pid, NULL, WNOWAIT | WEXITED) < 0) {
|
||||
pr_perror("Wait on %d zombie failed\n", pid);
|
||||
return -1;
|
||||
}
|
||||
case TASK_HELPER:
|
||||
if (waitpid(pid, &status, 0) != pid) {
|
||||
pr_perror("waitpid for helper %d failed", pid);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) {
|
||||
pr_perror("Can not unset mask of blocked signals");
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int restore_one_zombie(CoreEntry *core)
|
||||
{
|
||||
int exit_code = core->tc->exit_code;
|
||||
@ -1093,7 +1132,12 @@ static int restore_one_task(int pid, CoreEntry *core)
|
||||
ret = restore_one_zombie(core);
|
||||
else if (current->state == TASK_HELPER) {
|
||||
restore_finish_stage(CR_STATE_RESTORE);
|
||||
ret = 0;
|
||||
if (wait_on_helpers_zombies()) {
|
||||
pr_err("failed to wait on helpers and zombies\n");
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
|
||||
ret = -1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user