From cf63c1d9e8c239d93c2e42409581c20817bf564c Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 31 May 2012 14:50:00 +0400 Subject: [PATCH] crtools: link pstree_item-s in a tree (v3) because they describes a process TREE. It's usefull, when we dump tasks from another pid namespace, because a real pid is got from parasite. In previous version we need to update pid in two places one is in a pstree_item and one is in a children array. A process tree will be necessery to restore sid and pgid, because we should add fake tasks in a tree. For example if a sesion leader is absent. v2: fix rollback actions v3: fix comments from Pavel Emelyanov * add macros for_each_pstree_item * and a few bugs Signed-off-by: Andrey Vagin Signed-off-by: Pavel Emelyanov --- cr-dump.c | 224 +++++++++++++++++++++++++++------------------- cr-restore.c | 92 ++++++++++--------- cr-show.c | 24 ++--- include/crtools.h | 12 ++- include/image.h | 2 +- 5 files changed, 196 insertions(+), 158 deletions(-) diff --git a/cr-dump.c b/cr-dump.c index 1bd2d90a9..3292007ef 100644 --- a/cr-dump.c +++ b/cr-dump.c @@ -49,17 +49,24 @@ static char big_buffer[PATH_MAX]; static char loc_buf[PAGE_SIZE]; -void free_pstree(struct list_head *pstree_list) -{ - struct pstree_item *item, *p; +static struct pstree_item *root_item = NULL; - list_for_each_entry_safe(item, p, pstree_list, list) { - xfree(item->children); +static void free_pstree(struct pstree_item *root_item) +{ + struct pstree_item *item = root_item, *parent; + + while (item) { + if (!list_empty(&item->children)) { + item = list_first_entry(&item->children, struct pstree_item, list); + continue; + } + + parent = item->parent; + list_del(&item->list); xfree(item->threads); xfree(item); + item = parent; } - - INIT_LIST_HEAD(pstree_list); } void free_mappings(struct list_head *vma_area_list) @@ -1073,9 +1080,44 @@ err: return -1; } +struct pstree_item *alloc_pstree_item() +{ + struct pstree_item *item; + + item = xzalloc(sizeof(*item)); + if (!item) + return NULL; + + INIT_LIST_HEAD(&item->children); + item->threads = NULL; + item->nr_threads = 0; + + return item; +} + static int get_children(struct pstree_item *item) { - return parse_children(item, &item->children, &item->nr_children); + u32 *ch; + int ret, i, nr_children; + struct pstree_item *c; + + ret = parse_children(item, &ch, &nr_children); + if (ret < 0) + return ret; + + for (i = 0; i < nr_children; i++) { + c = alloc_pstree_item(); + if (c == NULL) { + ret = -1; + goto free; + } + c->pid = ch[i]; + c->parent = item; + list_add_tail(&c->list, &item->children); + } +free: + xfree(ch); + return ret; } static void unseize_task_and_threads(const struct pstree_item *item, int st) @@ -1086,12 +1128,36 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st) unseize_task(item->threads[i], st); /* item->pid will be here */ } -static void pstree_switch_state(const struct list_head *list, int st) +struct pstree_item *pstree_item_next(struct pstree_item *item) { - struct pstree_item *item; + if (!list_empty(&item->children)) { + item = list_first_entry(&item->children, struct pstree_item, list); + return item; + } + + while (1) { + if (item->parent == NULL) { + item = NULL; + break; + } + if (item->list.next == &item->parent->children) { + item = item->parent; + continue; + } else { + item = list_entry(item->list.next, struct pstree_item, list); + break; + } + } + + return item; +} + +static void pstree_switch_state(struct pstree_item *root_item, int st) +{ + struct pstree_item *item = root_item; pr_info("Unfreezing tasks into %d\n", st); - list_for_each_entry(item, list, list) + for_each_pstree_item(item) unseize_task_and_threads(item, st); } @@ -1180,11 +1246,11 @@ static int collect_threads(struct pstree_item *item) * unsupported (FIXME #2). */ -static int check_xids(struct list_head *list) +static int check_xids(struct pstree_item *root_item) { struct pstree_item *p, *tmp; - list_for_each_entry(p, list, list) { + for_each_pstree_item(p) { if (p->parent == NULL) continue; @@ -1196,35 +1262,28 @@ static int check_xids(struct list_head *list) } /* Easing #2 for pgids */ - list_for_each_entry(tmp, list, list) + for_each_pstree_item(tmp) if (tmp->pid == p->pgid) break; - if (&tmp->list == list) { + if (tmp == NULL) { pr_err("PGIG mismatch on %d (%d)\n", p->pid, p->pgid); return -1; } } + return 0; } -static struct pstree_item *collect_task(pid_t pid, struct pstree_item *parent, - struct list_head *list) +static int collect_task(struct pstree_item *item) { int ret; - struct pstree_item *item; - - item = xzalloc(sizeof(*item)); - if (!item) - goto err; - - item->pid = pid; - item->parent = parent; + pid_t pid = item->pid; ret = seize_task(pid, item_ppid(item), &item->pgid, &item->sid); if (ret < 0) - goto err_free; + goto err; pr_info("Seized task %d, state %d\n", pid, ret); item->state = ret; @@ -1237,40 +1296,43 @@ static struct pstree_item *collect_task(pid_t pid, struct pstree_item *parent, if (ret < 0) goto err_close; - if ((item->state == TASK_DEAD) && (item->nr_children > 0)) { + if ((item->state == TASK_DEAD) && !list_empty(&item->children)) { pr_err("Zombie with children?! O_o Run, run, run!\n"); goto err_close; } close_pid_proc(); - list_add_tail(&item->list, list); pr_info("Collected %d in %d state\n", item->pid, item->state); - return item; + return 0; err_close: close_pid_proc(); unseize_task(pid, item->state); -err_free: - xfree(item->children); - xfree(item->threads); - xfree(item); err: - return NULL; + return -1; } static int check_subtree(const struct pstree_item *item) { u32 *ch; - int nr, ret; + int nr, ret, i; + struct pstree_item *child; ret = parse_children(item, &ch, &nr); if (ret < 0) return ret; - ret = ((nr == item->nr_children) && !memcmp(ch, item->children, nr)); + i = 0; + list_for_each_entry(child, &item->children, list) { + if (child->pid != ch[i]) + break; + i++; + if (i > nr) + break; + } xfree(ch); - if (!ret) { + if (i != nr) { pr_info("Children set has changed while suspending\n"); return -1; } @@ -1278,23 +1340,25 @@ static int check_subtree(const struct pstree_item *item) return 0; } -static int collect_subtree(pid_t pid, struct pstree_item *parent, - struct list_head *pstree_list, int leader_only) +static int collect_subtree(struct pstree_item *item, int leader_only) { - struct pstree_item *item; - int i; + struct pstree_item *child; + pid_t pid = item->pid; + int ret; pr_info("Collecting tasks starting from %d\n", pid); - item = collect_task(pid, parent, pstree_list); - if (item == NULL) + ret = collect_task(item); + if (ret) return -1; if (leader_only) return 0; - for (i = 0; i < item->nr_children; i++) - if (collect_subtree(item->children[i], item, pstree_list, 0) < 0) + list_for_each_entry(child, &item->children, list) { + ret = collect_subtree(child, 0); + if (ret < 0) return -1; + } if (check_subtree(item)) return -1; @@ -1302,40 +1366,36 @@ static int collect_subtree(pid_t pid, struct pstree_item *parent, return 0; } -static int dump_pstree(pid_t pid, const struct list_head *pstree_list); +static int dump_pstree(struct pstree_item *item); -static int collect_dump_pstree(pid_t pid, struct list_head *pstree_list, - const struct cr_options *opts) +static int collect_pstree(pid_t pid, const struct cr_options *opts) { int ret, attempts = 5; while (1) { - struct pstree_item *item; + root_item = alloc_pstree_item(); + if (root_item == NULL) + return -1; - ret = collect_subtree(pid, NULL, pstree_list, opts->leader_only); + root_item->pid = pid; + INIT_LIST_HEAD(&root_item->list); + + ret = collect_subtree(root_item, opts->leader_only); if (ret == 0) { /* * Some tasks could have been reparented to * namespaces' reaper. Check this. */ if (opts->namespaces_flags & CLONE_NEWPID) { - item = list_first_entry(pstree_list, - struct pstree_item, list); - BUG_ON(item->pid != 1); + BUG_ON(root_item->pid != 1); - if (check_subtree(item)) + if (check_subtree(root_item)) goto try_again; } break; } - if (list_empty(pstree_list)) - /* - * No items at all -- no need in re-scanning it again - */ - break; - /* * Old tasks can die and new ones can appear while we * try to seize the swarm. It's much simpler (and reliable) @@ -1349,61 +1409,46 @@ try_again: attempts--; pr_info("Trying to suspend tasks again\n"); - while (!list_empty(pstree_list)) { - item = list_first_entry(pstree_list, - struct pstree_item, list); - list_del(&item->list); - - unseize_task_and_threads(item, TASK_ALIVE); - - xfree(item->children); - xfree(item->threads); - xfree(item); - } + pstree_switch_state(root_item, TASK_ALIVE); + free_pstree(root_item); } if (!ret) - ret = check_xids(pstree_list); + ret = check_xids(root_item); if (ret) return ret; - return dump_pstree(pid, pstree_list); + return dump_pstree(root_item); } -static int dump_pstree(pid_t pid, const struct list_head *pstree_list) +static int dump_pstree(struct pstree_item *root_item) { - const struct pstree_item *item; + struct pstree_item *item = root_item; struct pstree_entry e; int ret = -1; int pstree_fd; pr_info("\n"); - pr_info("Dumping pstree (pid: %d)\n", pid); + pr_info("Dumping pstree (pid: %d)\n", root_item->pid); pr_info("----------------------------------------\n"); pstree_fd = open_image(CR_FD_PSTREE, O_DUMP); if (pstree_fd < 0) return -1; - list_for_each_entry(item, pstree_list, list) { - - pr_info("Process: %d (%d children)\n", - item->pid, item->nr_children); + for_each_pstree_item(item) { + pr_info("Process: %d\n", item->pid); e.pid = item->pid; + e.ppid = item->parent ? item->parent->pid : 0; e.pgid = item->pgid; e.sid = item->sid; - e.nr_children = item->nr_children; e.nr_threads = item->nr_threads; if (write_img(pstree_fd, &e)) goto err; - if (write_img_buf(pstree_fd, item->children, - item->nr_children * sizeof(u32))) - goto err; - if (write_img_buf(pstree_fd, item->threads, item->nr_threads * sizeof(u32))) goto err; @@ -1647,7 +1692,6 @@ err_cure: int cr_dump_tasks(pid_t pid, const struct cr_options *opts) { - LIST_HEAD(pstree_list); struct pstree_item *item; int ret = -1; @@ -1655,7 +1699,7 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts) pr_info("Dumping process %s(pid: %d)\n", !opts->leader_only ? "group " : "", pid); pr_info("========================================\n"); - if (collect_dump_pstree(pid, &pstree_list, opts)) + if (collect_pstree(pid, opts)) goto err; if (opts->namespaces_flags) { @@ -1682,7 +1726,7 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts) if (init_pipes_dump()) goto err; - list_for_each_entry(item, &pstree_list, list) { + for_each_pstree_item(item) { if (dump_one_task(item)) goto err; @@ -1711,9 +1755,9 @@ err: */ if (ret) tcp_unlock_all(); - pstree_switch_state(&pstree_list, + pstree_switch_state(root_item, ret ? TASK_ALIVE : opts->final_state); - free_pstree(&pstree_list); + free_pstree(root_item); return ret; } diff --git a/cr-restore.c b/cr-restore.c index 62e84e8b8..4df2258ee 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -48,7 +48,7 @@ static struct task_entries *task_entries; static struct pstree_item *me; -static LIST_HEAD(tasks); +static struct pstree_item *root_item = NULL; static int restore_task_with_children(void *); static int sigreturn_restore(pid_t pid, struct list_head *vmas, int nr_vmas); @@ -70,6 +70,7 @@ static int shmem_remap(void *old_addr, void *new_addr, unsigned long size) static int prepare_pstree(void) { int ret = 0, ps_fd; + struct pstree_item *pi, *parent = NULL; pr_info("Reading image tree\n"); @@ -88,37 +89,46 @@ static int prepare_pstree(void) while (1) { struct pstree_entry e; - struct pstree_item *pi; ret = read_img_eof(ps_fd, &e); if (ret <= 0) break; ret = -1; - pi = xmalloc(sizeof(*pi)); + pi = alloc_pstree_item(); if (pi == NULL) break; pi->rst = xzalloc(sizeof(*pi->rst)); - if (pi->rst == NULL) + if (pi->rst == NULL) { + xfree(pi); break; + } pi->pid = e.pid; pi->pgid = e.pgid; pi->sid = e.sid; - ret = -1; - pi->nr_children = e.nr_children; - pi->children = xmalloc(e.nr_children * sizeof(u32)); - if (!pi->children) - break; + if (e.ppid == 0) { + BUG_ON(root_item); + root_item = pi; + pi->parent = NULL; + INIT_LIST_HEAD(&pi->list); + } else { + for_each_pstree_item(parent) + if (parent->pid == e.ppid) + break; - ret = read_img_buf(ps_fd, pi->children, - e.nr_children * sizeof(u32)); - if (ret < 0) - break; + if (parent == NULL) { + pr_err("Can't find a parent for %d", pi->pid); + xfree(pi); + break; + } + + pi->parent = parent; + list_add(&pi->list, &parent->children); + } - ret = -1; pi->nr_threads = e.nr_threads; pi->threads = xmalloc(e.nr_threads * sizeof(u32)); if (!pi->threads) @@ -129,7 +139,6 @@ static int prepare_pstree(void) if (ret < 0) break; - list_add_tail(&pi->list, &tasks); task_entries->nr += e.nr_threads; task_entries->nr_tasks++; } @@ -178,7 +187,7 @@ static int prepare_shared(void) if (collect_inotify()) return -1; - list_for_each_entry(pi, &tasks, list) { + for_each_pstree_item(pi) { ret = prepare_shmem_pid(pi->pid); if (ret < 0) break; @@ -464,16 +473,18 @@ static int restore_one_task(int pid) */ #define STACK_SIZE (8 * 4096) struct cr_clone_arg { - int pid, fd; + struct pstree_item *item; unsigned long clone_flags; + int fd; }; -static inline int fork_with_pid(int pid, unsigned long ns_clone_flags) +static inline int fork_with_pid(struct pstree_item *item, unsigned long ns_clone_flags) { int ret = -1; char buf[32]; struct cr_clone_arg ca; void *stack; + pid_t pid = item->pid; pr_info("Forking task with %d pid (flags 0x%lx)\n", pid, ns_clone_flags); @@ -485,7 +496,7 @@ static inline int fork_with_pid(int pid, unsigned long ns_clone_flags) } snprintf(buf, sizeof(buf), "%d", pid - 1); - ca.pid = pid; + ca.item = item; ca.clone_flags = ns_clone_flags; ca.fd = open(LAST_PID_PATH, O_RDWR); if (ca.fd < 0) { @@ -591,15 +602,18 @@ static void restore_pgid(void) static int restore_task_with_children(void *_arg) { struct cr_clone_arg *ca = _arg; + struct pstree_item *child; pid_t pid; - int ret, i; + int ret; sigset_t blockmask; close_safe(&ca->fd); + me = ca->item; + pid = getpid(); - if (ca->pid != pid) { - pr_err("Pid %d do not match expected %d\n", pid, ca->pid); + if (me->pid != pid) { + pr_err("Pid %d do not match expected %d\n", pid, me->pid); exit(-1); } @@ -607,15 +621,6 @@ static int restore_task_with_children(void *_arg) if (ret < 0) exit(1); - list_for_each_entry(me, &tasks, list) - if (me->pid == pid) - break; - - if (me == list_entry(&tasks, struct pstree_item, list)) { - pr_err("Pid %d not found in pstree image\n", pid); - exit(1); - } - if (ca->clone_flags) { ret = prepare_namespace(me->pid, ca->clone_flags); if (ret) @@ -637,9 +642,9 @@ static int restore_task_with_children(void *_arg) exit(1); } - pr_info("Restoring %d children:\n", me->nr_children); - for (i = 0; i < me->nr_children; i++) { - ret = fork_with_pid(me->children[i], 0); + pr_info("Restoring children:\n"); + list_for_each_entry(child, &me->children, list) { + ret = fork_with_pid(child, 0); if (ret < 0) exit(1); } @@ -652,11 +657,10 @@ static int restore_task_with_children(void *_arg) return restore_one_task(me->pid); } -static int restore_root_task(pid_t pid, struct cr_options *opts) +static int restore_root_task(struct pstree_item *init, struct cr_options *opts) { int ret; struct sigaction act, old_act; - struct pstree_item *init; ret = sigaction(SIGCHLD, NULL, &act); if (ret < 0) { @@ -672,13 +676,6 @@ static int restore_root_task(pid_t pid, struct cr_options *opts) return -1; } - init = list_first_entry(&tasks, struct pstree_item, list); - if (init->pid != pid) { - pr_err("Pids mismatch. Init has pid %d, requested %d\n", - init->pid, pid); - return -1; - } - /* * FIXME -- currently we assume that all the tasks live * in the same set of namespaces. This is done to debug @@ -686,7 +683,7 @@ static int restore_root_task(pid_t pid, struct cr_options *opts) * this later. */ - ret = fork_with_pid(init->pid, opts->namespaces_flags); + ret = fork_with_pid(init, opts->namespaces_flags); if (ret < 0) return -1; @@ -705,11 +702,12 @@ static int restore_root_task(pid_t pid, struct cr_options *opts) out: if (ret < 0) { - pr_err("Someone can't be restored\n"); struct pstree_item *pi; + pr_err("Someone can't be restored\n"); - list_for_each_entry(pi, &tasks, list) + for_each_pstree_item(pi) kill(pi->pid, SIGKILL); + return 1; } @@ -746,7 +744,7 @@ static int restore_all_tasks(pid_t pid, struct cr_options *opts) if (prepare_shared() < 0) return -1; - return restore_root_task(pid, opts); + return restore_root_task(root_item, opts); } #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) diff --git a/cr-show.c b/cr-show.c index 7b0c2a6c3..31c15655b 100644 --- a/cr-show.c +++ b/cr-show.c @@ -418,8 +418,8 @@ static int show_collect_pstree(int fd_pstree, struct list_head *collect) ret = read_img_eof(fd_pstree, &e); if (ret <= 0) goto out; - pr_msg("pid: %8d pgid: %8d sid %8d nr_children: %8d nr_threads: %8d\n", - e.pid, e.pgid, e.sid, e.nr_children, e.nr_threads); + pr_msg("pid: %8d ppid %8d pgid: %8d sid %8d nr_threads: %8d\n", + e.pid, e.ppid, e.pgid, e.sid, e.nr_threads); if (collect) { item = xzalloc(sizeof(struct pstree_item)); @@ -437,18 +437,6 @@ static int show_collect_pstree(int fd_pstree, struct list_head *collect) list_add_tail(&item->list, collect); } - if (e.nr_children) { - pr_msg("\\\n"); - pr_msg(" +--- children: "); - while (e.nr_children--) { - ret = read_img(fd_pstree, &pid); - if (ret < 0) - goto out; - pr_msg(" %6d", pid); - } - pr_msg("\n"); - } - if (e.nr_threads) { pr_msg(" \\\n"); pr_msg(" --- threads: "); @@ -640,7 +628,7 @@ err: static int cr_show_all(struct cr_options *opts) { - struct pstree_item *item = NULL; + struct pstree_item *item = NULL, *tmp; LIST_HEAD(pstree_list); int i, ret = -1, fd, pid; @@ -709,7 +697,11 @@ static int cr_show_all(struct cr_options *opts) } out: - free_pstree(&pstree_list); + list_for_each_entry_safe(item, tmp, &pstree_list, list) { + list_del(&item->list); + xfree(item->threads); + xfree(item); + } return ret; } diff --git a/include/crtools.h b/include/crtools.h index fdfb59b81..447f988d4 100644 --- a/include/crtools.h +++ b/include/crtools.h @@ -3,13 +3,12 @@ #include +#include "list.h" #include "types.h" #include "list.h" #include "util.h" #include "image.h" -extern void free_pstree(struct list_head *pstree_list); - #define CR_FD_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH) #define CR_FD_PERM_DUMP (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) @@ -180,16 +179,21 @@ struct pstree_item { struct list_head list; pid_t pid; /* leader pid */ struct pstree_item *parent; + struct list_head children; /* array of children */ pid_t pgid; pid_t sid; int state; /* TASK_XXX constants */ - int nr_children; /* number of children */ int nr_threads; /* number of threads */ u32 *threads; /* array of threads */ - u32 *children; /* array of children */ struct rst_info *rst; }; +extern struct pstree_item *alloc_pstree_item(void); +extern struct pstree_item *pstree_item_next(struct pstree_item *item); + +#define for_each_pstree_item(pi) \ + for (pi = root_item; pi != NULL; pi = pstree_item_next(pi)) + static inline int in_vma_area(struct vma_area *vma, unsigned long addr) { return addr >= (unsigned long)vma->vma.start && diff --git a/include/image.h b/include/image.h index c3716de2d..cb953307c 100644 --- a/include/image.h +++ b/include/image.h @@ -141,9 +141,9 @@ struct fs_entry { struct pstree_entry { u32 pid; + u32 ppid; u32 pgid; u32 sid; - u32 nr_children; u32 nr_threads; } __packed;