2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 09:58:09 +00:00

ctrools: Rewrite task/threads stopping engine is back

This commit brings the former "Rewrite task/threads stopping engine"
commit back. Handling it separately is too complex so better try
to handle it in-place.

Note some tests might fault, it's expected.
---

Stopping tasks with STOP and proceeding with SEIZE is actually excessive --
the SEIZE if enough. Moreover, just killing a task with STOP is also racy,
since task should be given some time to come to sleep before its proc
can be parsed.

Rewrite all this code to SEIZE task and all its threads from the very beginning.

With this we can distinguish stopped task state and migrate it properly (not
supported now, need to implement).

This thing however has one BIG problem -- after we SEIZE-d a task we should
seize
it's threads, but we should do it in a loop -- reading /proc/pid/task and
seizing
them again and again, until the contents of this dir stops changing (not done
now).

Besides, after we seized a task and all its threads we cannot scan it's children
list once -- task can get reparented to init and any task's child can call clone
with CLONE_PARENT flag thus repopulating the children list of the already seized
task (not done also)

This patch is ugly, yes, but splitting it doesn't help to review it much, sorry
:(

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
This commit is contained in:
Cyrill Gorcunov 2012-02-01 19:45:31 +04:00
parent ab82c2de98
commit e61605169f
9 changed files with 228 additions and 153 deletions

254
cr-dump.c
View File

@ -724,7 +724,7 @@ err:
return ret;
}
static int parse_threads(pid_t pid, int pid_dir, struct pstree_item *item)
static int parse_threads(struct pstree_item *item, int pid_dir)
{
struct dirent *de;
DIR *dir;
@ -733,7 +733,7 @@ static int parse_threads(pid_t pid, int pid_dir, struct pstree_item *item)
dir = opendir_proc(pid_dir, "task");
if (!dir) {
pr_perror("Can't open %d/task", pid);
pr_perror("Can't open %d/task", item->pid);
return -1;
}
@ -762,7 +762,7 @@ static int parse_threads(pid_t pid, int pid_dir, struct pstree_item *item)
return 0;
}
static int parse_children(pid_t pid, int pid_dir, struct pstree_item *item)
static int parse_children(struct pstree_item *item, int pid_dir)
{
FILE *file;
char *tok;
@ -774,7 +774,7 @@ static int parse_children(pid_t pid, int pid_dir, struct pstree_item *item)
file = fopen_proc(pid_dir, "task/%d/children", item->threads[i]);
if (!file) {
pr_perror("Can't open %d children %d",
pid, item->threads[i]);
item->pid, item->threads[i]);
goto err;
}
@ -806,91 +806,154 @@ err:
return -1;
}
static struct pstree_item *add_pstree_entry(pid_t pid, int pid_dir, struct list_head *list)
static void unseize_task_and_threads(struct pstree_item *item, enum cr_task_state st)
{
int i;
for (i = 0; i < item->nr_threads; i++)
unseize_task(item->threads[i], st); /* item->pid will be here */
}
static void pstree_switch_state(struct list_head *list, struct cr_options *opts)
{
struct pstree_item *item;
list_for_each_entry(item, list, list) {
unseize_task_and_threads(item, opts->final_state);
if (opts->leader_only)
break;
}
}
static int seize_threads(struct pstree_item *item)
{
int i = 0, ret;
if ((item->state == TASK_DEAD) && (item->nr_threads > 1)) {
pr_err("Zombies with threads are not supported\n");
goto err;
}
for (i = 0; i < item->nr_threads; i++) {
if (item->pid == item->threads[i])
continue;
pr_info("\tSeizing %d's %d thread\n", item->pid, item->threads[i]);
ret = seize_task(item->threads[i]);
if (ret < 0)
goto err;
if (ret == TASK_SHOULD_BE_DEAD) {
pr_err("Potentially zombie thread not supported\n");
goto err;
}
if (ret == TASK_STOPPED) {
pr_err("Stopped threads not supported\n");
goto err;
}
}
return 0;
err:
for (i--; i >= 0; i--) {
if (item->pid == item->threads[i])
continue;
unseize_task(item->threads[i], CR_TASK_STOP /* FIXME */);
}
return -1;
}
static struct pstree_item *collect_task(pid_t pid, struct list_head *list)
{
int ret, pid_dir;
struct pstree_item *item;
item = xzalloc(sizeof(*item));
if (!item)
goto err;
if (parse_threads(pid, pid_dir, item))
goto err_free;
if (parse_children(pid, pid_dir, item))
ret = seize_task(pid);
if (ret < 0)
goto err_free;
pr_info("Seized task %d, state %d\n", pid, ret);
item->pid = pid;
item->state = ret;
pid_dir = open_pid_proc(pid);
if (pid_dir < 0)
goto err_free;
if (item->state == TASK_SHOULD_BE_DEAD) {
struct proc_pid_stat_small ps;
ret = parse_pid_stat_small(pid, pid_dir, &ps);
if (ret < 0)
goto err_close;
if (ps.state != 'Z') {
pr_err("Unseizeable non-zombie %d found, state %c\n",
item->pid, ps.state);
goto err_close;
}
item->state = TASK_DEAD;
}
ret = parse_threads(item, pid_dir);
if (ret < 0)
goto err_close;
ret = seize_threads(item);
if (ret < 0)
goto err_close;
ret = parse_children(item, pid_dir);
if (ret < 0)
goto err_close;
if ((item->state == TASK_DEAD) && (item->nr_children > 0)) {
pr_err("Zombie with children?! O_o Run, run, run!\n");
goto err_close;
}
close(pid_dir);
list_add_tail(&item->list, list);
pr_info("Collected %d in %d state\n", item->pid, item->state);
return item;
err_close:
close(pid_dir);
err_free:
xfree(item->threads);
xfree(item->children);
xfree(item->threads);
xfree(item);
err:
return NULL;
}
static const int state_sigs[] = {
[CR_TASK_STOP] = SIGSTOP,
[CR_TASK_RUN] = SIGCONT,
[CR_TASK_KILL] = SIGKILL,
};
static int ps_switch_state(int pid, enum cr_task_state state)
{
return kill(pid, state_sigs[state]);
}
static void pstree_switch_state(struct list_head *list,
enum cr_task_state state, int leader_only)
static int collect_pstree(pid_t pid, struct list_head *pstree_list, int leader_only)
{
struct pstree_item *item;
int i;
/*
* Since ptrace-seize doesn't work on frozen tasks
* we stick with explicit tasks stopping via stop
* signal, but in future it's aimed to switch to
* kernel freezer.
*/
pr_info("Collecting tasks starting from %d\n", pid);
item = collect_task(pid, pstree_list);
if (item == NULL)
return -1;
list_for_each_entry(item, list, list) {
kill(item->pid, state_sigs[state]);
if (leader_only)
break;
}
}
if (leader_only)
return 0;
static int collect_pstree(pid_t pid, struct list_head *pstree_list)
{
struct pstree_item *item;
unsigned long i;
int pid_dir;
int ret = -1;
for (i = 0; i < item->nr_children; i++)
if (collect_pstree(item->children[i], pstree_list, 0) < 0)
return -1;
pid_dir = open_pid_proc(pid);
if (pid_dir < 0)
goto err;
if (ps_switch_state(pid, CR_TASK_STOP))
goto err;
item = add_pstree_entry(pid, pid_dir, pstree_list);
if (!item)
goto err;
for (i = 0; i < item->nr_children; i++) {
ret = collect_pstree(item->children[i], pstree_list);
if (ret)
goto err_close;
}
ret = 0;
err_close:
close(pid_dir);
err:
return ret;
return 0;
}
static int dump_pstree(pid_t pid, struct list_head *pstree_list, struct cr_fdset *cr_fdset)
@ -1085,25 +1148,12 @@ static int dump_task_thread(pid_t pid, struct cr_fdset *cr_fdset)
if (!core)
goto err;
ret = seize_task(pid);
if (ret) {
pr_err("Failed to seize thread (pid: %d) with %d\n",
pid, ret);
goto err_free;
}
pr_info("Dumping GP/FPU registers ... ");
ret = get_task_regs(pid, core);
if (ret)
goto err_free;
pr_info("OK\n");
ret = unseize_task(pid);
if (ret) {
pr_err("Can't unsieze thread (pid: %d)\n", pid);
goto err_free;
}
core->tc.task_state = TASK_ALIVE;
core->tc.exit_code = 0;
@ -1122,16 +1172,6 @@ static int dump_one_zombie(struct pstree_item *item, struct proc_pid_stat *pps,
{
struct core_entry *core;
if (item->nr_children) {
pr_err("Zombie %d with children.\n", item->pid);
return -1;
}
if (item->nr_threads > 1) {
pr_err("Zombie %d with threads.\n", item->pid);
return -1;
}
cr_fdset = cr_fdset_open(item->pid, CR_FD_DESC_CORE, cr_fdset);
if (cr_fdset == NULL)
return -1;
@ -1191,6 +1231,11 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
pr_info("Dumping task (pid: %d)\n", pid);
pr_info("========================================\n");
if (item->state == TASK_STOPPED) {
pr_err("Stopped tasks are not supported\n");
goto err;
}
pid_dir = open_pid_proc(pid);
if (pid_dir < 0) {
pr_perror("Can't open %d proc dir", pid);
@ -1202,17 +1247,8 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
if (ret < 0)
goto err;
switch (pps_buf.state) {
case 'Z':
if (item->state == TASK_DEAD)
return dump_one_zombie(item, &pps_buf, cr_fdset);
case 'T':
/* Stopped -- can dump one */
break;
default:
ret = -1;
pr_err("Task in bad state: %c\n", pps_buf.state);
goto err;
};
ret = -1;
if (!cr_fdset_open(item->pid, CR_FD_DESC_TASK, cr_fdset))
@ -1224,13 +1260,6 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
goto err;
}
ret = seize_task(pid);
if (ret) {
pr_err("Failed to seize task (pid: %d) with %d\n",
pid, ret);
goto err;
}
ret = dump_task_core_seized(pid, pid_dir, &pps_buf, cr_fdset);
if (ret) {
pr_err("Dump core (pid: %d) failed with %d\n", pid, ret);
@ -1273,12 +1302,6 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
goto err;
}
ret = unseize_task(pid);
if (ret) {
pr_err("Can't unsieze (pid: %d) task\n", pid);
goto err;
}
ret = dump_task_files(pid, pid_dir, cr_fdset);
if (ret) {
pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
@ -1326,7 +1349,7 @@ int cr_dump_tasks(pid_t pid, struct cr_options *opts)
pr_info("Dumping process (pid: %d)\n", pid);
pr_info("========================================\n");
if (collect_pstree(pid, &pstree_list))
if (collect_pstree(pid, &pstree_list, opts->leader_only))
goto err;
if (opts->namespaces_flags) {
@ -1367,17 +1390,8 @@ int cr_dump_tasks(pid_t pid, struct cr_options *opts)
ret = 0;
err:
switch (opts->final_state) {
case CR_TASK_RUN:
case CR_TASK_KILL:
pstree_switch_state(&pstree_list,
opts->final_state, opts->leader_only);
case CR_TASK_STOP: /* they are already stopped */
break;
}
pstree_switch_state(&pstree_list, opts);
free_pstree(&pstree_list);
close_cr_fdset(&cr_fdset);
return ret;

View File

@ -289,7 +289,7 @@ int main(int argc, char *argv[])
int action = -1;
int log_inited = 0;
static const char short_opts[] = "df:p:t:hcD:o:n:";
static const char short_opts[] = "dsf:p:t:hcD:o:n:";
BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE);
@ -306,6 +306,9 @@ int main(int argc, char *argv[])
for (opt = getopt_long(argc - 1, argv + 1, short_opts, NULL, &idx); opt != -1;
opt = getopt_long(argc - 1, argv + 1, short_opts, NULL, &idx)) {
switch (opt) {
case 's':
opts.final_state = CR_TASK_STOP;
break;
case 'p':
pid = atoi(optarg);
opts.leader_only = true;
@ -316,7 +319,6 @@ int main(int argc, char *argv[])
break;
case 'c':
opts.show_pages_content = true;
opts.final_state = CR_TASK_RUN;
break;
case 'f':
opts.show_dump_file = optarg;
@ -398,10 +400,9 @@ usage:
printk(" -p checkpoint/restore only a single process identified by pid\n");
printk(" -t checkpoint/restore the whole process tree identified by pid\n");
printk(" -f show contents of a checkpoint file\n");
printk(" -c in case of checkpoint -- continue running the process after\n"
" checkpoint complete, in case of showing file contents --\n"
" show contents of pages dumped in hexdump format\n");
printk(" -c show contents of pages dumped in hexdump format\n");
printk(" -d detach after restore\n");
printk(" -s leave tasks in stopped state after checkpoint instead of killing them\n");
printk(" -n checkpoint/restore namespaces - values must be separated by comma\n");
printk(" supported: uts, ipc\n");

View File

@ -43,7 +43,6 @@ enum {
};
enum cr_task_state {
CR_TASK_RUN,
CR_TASK_STOP,
CR_TASK_KILL,
};
@ -139,6 +138,7 @@ struct vma_area {
struct pstree_item {
struct list_head list;
pid_t pid; /* leader pid */
int state; /* TASK_XXX constants */
u32 nr_children; /* number of children */
u32 nr_threads; /* number of threads */
u32 *threads; /* array of threads */

View File

@ -317,9 +317,10 @@ struct core_entry {
};
} __packed;
#define TASK_ALIVE 0x1
#define TASK_DEAD 0x2
#define TASK_STOPPED 0x3 /* FIXME - implement */
#define TASK_SHOULD_BE_DEAD 0x0
#define TASK_ALIVE 0x1
#define TASK_DEAD 0x2
#define TASK_STOPPED 0x3 /* FIXME - implement */
#endif /* CONFIG_X86_64 */

View File

@ -4,6 +4,12 @@
#define PROC_TASK_COMM_LEN 32
#define PROC_TASK_COMM_LEN_FMT "(%31s"
struct proc_pid_stat_small {
int pid;
char comm[PROC_TASK_COMM_LEN];
char state;
};
struct proc_pid_stat {
int pid;
char comm[PROC_TASK_COMM_LEN];
@ -72,6 +78,7 @@ struct proc_status_creds {
};
extern int parse_pid_stat(pid_t pid, int pid_dir, struct proc_pid_stat *s);
extern int parse_pid_stat_small(pid_t pid, int pid_dir, struct proc_pid_stat_small *s);
extern int parse_maps(pid_t pid, int pid_dir, struct list_head *vma_area_list, bool use_map_files);
extern int parse_pid_status(int pid_dir, struct proc_status_creds *);

View File

@ -33,7 +33,7 @@
#define PTRACE_O_TRACEEXIT 0x00000040
extern int seize_task(pid_t pid);
extern int unseize_task(pid_t pid);
extern int unseize_task(pid_t pid, enum cr_task_state st);
extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes);
extern int ptrace_show_area(pid_t pid, void *addr, long bytes);

View File

@ -15,6 +15,7 @@
#include <sys/wait.h>
#include <sys/socket.h>
#include "crtools.h"
#include "compiler.h"
#include "syscall.h"
#include "types.h"

View File

@ -175,6 +175,36 @@ err_bogus_mapping:
goto err;
}
int parse_pid_stat_small(pid_t pid, int pid_dir, struct proc_pid_stat_small *s)
{
FILE *f;
char *tok;
int n;
f = fopen_proc(pid_dir, "stat");
if (f == NULL) {
pr_perror("Can't open %d's stat", pid);
return -1;
}
memset(s, 0, sizeof(*s));
n = fscanf(f, "%d " PROC_TASK_COMM_LEN_FMT " %c",
&s->pid, s->comm, &s->state);
if (n < 3) {
pr_err("Parsing %d's stat failed (#fields do not match)\n", pid);
return -1;
}
s->comm[PROC_TASK_COMM_LEN-1] = '\0';
tok = strchr(s->comm, ')');
if (tok)
*tok = '\0';
fclose(f);
return 0;
}
int parse_pid_stat(pid_t pid, int pid_dir, struct proc_pid_stat *s)
{
FILE *f;

View File

@ -13,14 +13,22 @@
#include <sys/resource.h>
#include <sys/wait.h>
#include "crtools.h"
#include "compiler.h"
#include "types.h"
#include "util.h"
#include "ptrace.h"
int unseize_task(pid_t pid)
int unseize_task(pid_t pid, enum cr_task_state st)
{
return ptrace(PTRACE_DETACH, pid, NULL, NULL);
if (st == CR_TASK_STOP)
return ptrace(PTRACE_DETACH, pid, NULL, NULL);
else if (st == CR_TASK_KILL)
return ptrace(PTRACE_KILL, pid, NULL, NULL);
else {
BUG_ON(1);
return -1;
}
}
/*
@ -30,48 +38,61 @@ int unseize_task(pid_t pid)
* of it so the task would not know if it was saddled
* up with someone else.
*/
int seize_task(pid_t pid)
{
siginfo_t si;
int status;
int ret = 0;
int ret;
ret = ptrace(PTRACE_SEIZE, pid, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (ret < 0) {
pr_perror("Can't seize task");
goto err;
}
if (ret < 0)
return TASK_SHOULD_BE_DEAD; /* Caller should verify it's really dead */
ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
if (ret < 0) {
pr_perror("Can't interrupt task");
pr_perror("SEIZE %d: can't interrupt task", pid);
goto err;
}
ret = -10;
if (wait4(pid, &status, __WALL, NULL) != pid)
ret = wait4(pid, &status, __WALL, NULL);
if (ret < 0) {
pr_perror("SEIZE %d: can't wait task", pid);
goto err;
}
ret = -20;
if (!WIFSTOPPED(status))
if (ret != pid) {
pr_err("SEIZE %d: wrong task attached (%d)\n", pid, ret);
goto err;
}
jerr_rc(ptrace(PTRACE_GETSIGINFO, pid, NULL, &si), ret, err_cont);
if (!WIFSTOPPED(status)) {
pr_err("SEIZE %d: task not stopped after seize\n", pid);
goto err;
}
ret = -30;
if ((si.si_code >> 8) != PTRACE_EVENT_STOP)
goto err_cont;
ret = ptrace(PTRACE_GETSIGINFO, pid, NULL, &si);
if (ret < 0) {
pr_perror("SEIZE %d: can't read signfo", pid);
goto err;
}
jerr_rc(ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *)(unsigned long)PTRACE_O_TRACEEXIT), ret, err_cont);
if ((si.si_code >> 8) != PTRACE_EVENT_STOP) {
pr_err("SEIZE %d: wrong stop event received 0x%x\n", pid,
(unsigned int)si.si_code);
goto err;
}
if (si.si_signo == SIGTRAP)
return TASK_ALIVE;
else if (si.si_signo == SIGSTOP)
return TASK_STOPPED;
pr_err("SEIZE %d: unsupported stop signal %d\n", pid, si.si_signo);
err:
return ret;
err_cont:
kill(pid, SIGCONT);
goto err;
unseize_task(pid, CR_TASK_STOP);
return -1;
}
int ptrace_show_area_r(pid_t pid, void *addr, long bytes)