diff --git a/cr-restore.c b/cr-restore.c index 270d1d162..dc560593c 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -1374,7 +1374,7 @@ static int restore_switch_stage(int next_stage) return restore_wait_inprogress_tasks(); } -static int attach_to_tasks() +static int attach_to_tasks(bool root_seized) { struct pstree_item *item; @@ -1394,9 +1394,21 @@ static int attach_to_tasks() for (i = 0; i < item->nr_threads; i++) { pid = item->threads[i].real; - if (ptrace(PTRACE_ATTACH, pid, 0, 0)) { - pr_perror("Can't attach to %d", pid); - return -1; + if (item != root_item || !root_seized) { + if (ptrace(PTRACE_ATTACH, pid, 0, 0)) { + pr_perror("Can't attach to %d", pid); + return -1; + } + } else { + /* + * Root item is SEIZE-d, so we only need + * to stop one (INTERRUPT) to make wait4 + * and SYSCALL below work. + */ + if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) { + pr_perror("Can't interrupt task"); + return -1; + } } if (wait4(pid, &status, __WALL, NULL) != pid) { @@ -1513,10 +1525,43 @@ static int restore_root_task(struct pstree_item *init) futex_set(&task_entries->nr_in_progress, stage_participants(CR_STATE_RESTORE_NS)); + /* + * This means we're called from lib's criu_restore_child(). + * In that case create the root task as the child one to+ + * the caller. This is the only way to correctly restore the + * pdeath_sig of the root task. But also looks nice. + */ + if (opts.swrk_restore) + init->rst->clone_flags |= CLONE_PARENT; + ret = fork_with_pid(init); if (ret < 0) return -1; + if (opts.swrk_restore) { + /* + * Root task is not our sibling. This means, that + * we will not notice when (if) it dies in SIGCHLD + * handler, but we should. To do this -- attach to + * the guy with ptrace and (!) make the kernel + * deliver us the signal when it will get stopped. + * It will in case of e.g. segfault before handling + * the signal. + */ + + act.sa_flags &= ~SA_NOCLDSTOP; + ret = sigaction(SIGCHLD, &act, NULL); + if (ret < 0) { + pr_perror("sigaction() failed"); + goto out; + } + + if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) { + pr_perror("Can't attach to init"); + goto out; + } + } + pr_info("Wait until namespaces are created\n"); ret = restore_wait_inprogress_tasks(); if (ret) @@ -1570,7 +1615,7 @@ static int restore_root_task(struct pstree_item *init) timing_stop(TIME_RESTORE); - ret = attach_to_tasks(); + ret = attach_to_tasks(opts.swrk_restore); pr_info("Restore finished successfully. Resuming tasks.\n"); futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE); diff --git a/crtools.c b/crtools.c index c3a2e270b..b662fff36 100644 --- a/crtools.c +++ b/crtools.c @@ -183,14 +183,16 @@ int main(int argc, char *argv[]) if (init_service_fd()) return 1; - if (!strcmp(argv[1], "swrk")) + if (!strcmp(argv[1], "swrk")) { /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk " and is not for CLI/scripts. * The arguments semantics can change at any tyme with the * corresponding lib call change. */ + opts.swrk_restore = true; return cr_service_work(atoi(argv[2])); + } while (1) { idx = -1; diff --git a/include/cr_options.h b/include/cr_options.h index 2732e58d6..55ca70b32 100644 --- a/include/cr_options.h +++ b/include/cr_options.h @@ -34,6 +34,7 @@ struct cr_options { bool link_remap_ok; unsigned int rst_namespaces_flags; bool log_file_per_pid; + bool swrk_restore; char *output; char *root; char *pidfile; diff --git a/lib/criu.c b/lib/criu.c index 86256fd0a..7c1ac0785 100644 --- a/lib/criu.c +++ b/lib/criu.c @@ -15,10 +15,6 @@ #include "rpc.pb-c.h" #include "cr-service-const.h" -#ifndef PR_SET_CHILD_SUBREAPER -#define PR_SET_CHILD_SUBREAPER 36 -#endif - const char *criu_lib_version = CRIU_VERSION; static char *service_address = CR_DEFAULT_SERVICE_ADDRESS; @@ -582,15 +578,6 @@ int criu_restore_child(void) if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sks)) goto out; - /* - * Set us as child subreaper so that after the swrk - * finishes restore and exits the restored subtree - * gets reparented to us. - */ - - if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0)) - goto err; - pid = fork(); if (pid < 0) goto err; @@ -633,8 +620,6 @@ int criu_restore_child(void) close(sks[0]); waitpid(pid, NULL, 0); - /* Drop the subreaper role _after_ swrk exits */ - prctl(PR_SET_CHILD_SUBREAPER, 0, 0, 0); if (!ret) { ret = resp->success ? resp->restore->pid : -EBADE;