#include #include #include #include "cr_options.h" #include "pstree.h" #include "util.h" #include "lock.h" #include "namespaces.h" #include "files.h" #include "tty.h" #include "asm/dump.h" #include "protobuf.h" #include "protobuf/pstree.pb-c.h" struct pstree_item *root_item; void core_entry_free(CoreEntry *core) { if (core) { arch_free_thread_info(core); if (core->thread_core) xfree(core->thread_core->sas); xfree(core->thread_core); xfree(core->tc); xfree(core->ids); } } CoreEntry *core_entry_alloc(int alloc_thread_info, int alloc_tc) { CoreEntry *core; TaskCoreEntry *tc; core = xmalloc(sizeof(*core)); if (!core) return NULL; core_entry__init(core); core->mtype = CORE_ENTRY__MARCH; if (alloc_thread_info) { ThreadCoreEntry *thread_core; ThreadSasEntry *sas; if (arch_alloc_thread_info(core)) goto err; thread_core = xmalloc(sizeof(*thread_core)); if (!thread_core) goto err; thread_core_entry__init(thread_core); core->thread_core = thread_core; sas = xmalloc(sizeof(*sas)); if (!sas) goto err; thread_sas_entry__init(sas); core->thread_core->sas = sas; } if (alloc_tc) { tc = xzalloc(sizeof(*tc) + TASK_COMM_LEN); if (!tc) goto err; task_core_entry__init(tc); tc->comm = (void *)tc + sizeof(*tc); core->tc = tc; } return core; err: core_entry_free(core); return NULL; } int pstree_alloc_cores(struct pstree_item *item) { unsigned int i; item->core = xzalloc(sizeof(*item->core) * item->nr_threads); if (!item->core) return -1; for (i = 0; i < item->nr_threads; i++) { if (item->threads[i].real == item->pid.real) item->core[i] = core_entry_alloc(1, 1); else item->core[i] = core_entry_alloc(1, 0); if (!item->core[i]) goto err; } return 0; err: pstree_free_cores(item); return -1; } void pstree_free_cores(struct pstree_item *item) { unsigned int i; if (item->core) { for (i = 1; i < item->nr_threads; i++) core_entry_free(item->core[i]); xfree(item->core); item->core = NULL; } } void free_pstree(struct pstree_item *root_item) { struct pstree_item *item = root_item, *parent; while (item) { if (!list_empty(&item->children)) { item = list_first_entry(&item->children, struct pstree_item, sibling); continue; } parent = item->parent; list_del(&item->sibling); pstree_free_cores(item); xfree(item->threads); xfree(item); item = parent; } } struct pstree_item *__alloc_pstree_item(bool rst) { struct pstree_item *item; if (!rst) { item = xzalloc(sizeof(*item)); if (!item) return NULL; } else { item = shmalloc(sizeof(*item) + sizeof(item->rst[0])); if (!item) return NULL; memset(item, 0, sizeof(*item) + sizeof(item->rst[0])); } INIT_LIST_HEAD(&item->children); INIT_LIST_HEAD(&item->sibling); item->pid.virt = -1; item->pid.real = -1; item->born_sid = -1; return item; } /* Deep first search on children */ struct pstree_item *pstree_item_next(struct pstree_item *item) { if (!list_empty(&item->children)) return list_first_entry(&item->children, struct pstree_item, sibling); while (item->parent) { if (item->sibling.next != &item->parent->children) return list_entry(item->sibling.next, struct pstree_item, sibling); item = item->parent; } return NULL; } int dump_pstree(struct pstree_item *root_item) { struct pstree_item *item = root_item; PstreeEntry e = PSTREE_ENTRY__INIT; int ret = -1, i; int pstree_fd; pr_info("\n"); pr_info("Dumping pstree (pid: %d)\n", root_item->pid.real); pr_info("----------------------------------------\n"); /* * Make sure we're dumping session leader, if not an * appropriate option must be passed. * * Also note that if we're not a session leader we * can't get the situation where the leader sits somewhere * deeper in process tree, thus top-level checking for * leader is enough. */ if (root_item->pid.virt != root_item->sid) { if (!opts.shell_job) { pr_err("The root process %d is not a session leader. " "Consider using --" OPT_SHELL_JOB " option\n", item->pid.virt); return -1; } } pstree_fd = open_image(CR_FD_PSTREE, O_DUMP); if (pstree_fd < 0) return -1; for_each_pstree_item(item) { pr_info("Process: %d(%d)\n", item->pid.virt, item->pid.real); e.pid = item->pid.virt; e.ppid = item->parent ? item->parent->pid.virt : 0; e.pgid = item->pgid; e.sid = item->sid; e.n_threads = item->nr_threads; e.threads = xmalloc(sizeof(e.threads[0]) * e.n_threads); if (!e.threads) goto err; for (i = 0; i < item->nr_threads; i++) e.threads[i] = item->threads[i].virt; ret = pb_write_one(pstree_fd, &e, PB_PSTREE); xfree(e.threads); if (ret) goto err; } ret = 0; err: pr_info("----------------------------------------\n"); close(pstree_fd); return ret; } static int max_pid = 0; static int prepare_pstree_for_shell_job(void) { pid_t current_sid = getsid(getpid()); pid_t current_gid = getpgid(getpid()); struct pstree_item *pi; pid_t old_sid; pid_t old_gid; if (!opts.shell_job) return 0; if (root_item->sid == root_item->pid.virt) return 0; /* * Migration of a root task group leader is a bit tricky. * When a task yields SIGSTOP, the kernel notifies the parent * with SIGCHLD. This means when task is running in a * shell, the shell obtains SIGCHLD and sends a task to * the background. * * The situation gets changed once we restore the * program -- our tool become an additional stub between * the restored program and the shell. So to be able to * notify the shell with SIGCHLD from our restored * program -- we make the root task to inherit the * process group from us. * * Not that clever solution but at least it works. */ old_sid = root_item->sid; old_gid = root_item->pgid; pr_info("Migrating process tree (GID %d->%d SID %d->%d)\n", old_gid, current_gid, old_sid, current_sid); for_each_pstree_item(pi) { if (pi->pgid == old_gid) pi->pgid = current_gid; if (pi->sid == old_sid) pi->sid = current_sid; } max_pid = max((int)current_sid, max_pid); max_pid = max((int)current_gid, max_pid); return 0; } static int read_pstree_image(void) { int ret = 0, i, ps_fd, fd; struct pstree_item *pi, *parent = NULL; pr_info("Reading image tree\n"); ps_fd = open_image(CR_FD_PSTREE, O_RSTR); if (ps_fd < 0) return ps_fd; while (1) { PstreeEntry *e; ret = pb_read_one_eof(ps_fd, &e, PB_PSTREE); if (ret <= 0) break; ret = -1; pi = alloc_pstree_item_with_rst(); if (pi == NULL) break; pi->pid.virt = e->pid; max_pid = max((int)e->pid, max_pid); pi->pgid = e->pgid; max_pid = max((int)e->pgid, max_pid); pi->sid = e->sid; max_pid = max((int)e->sid, max_pid); if (e->ppid == 0) { if (root_item) { pr_err("Parent missed on non-root task " "with pid %d, image corruption!\n", e->pid); goto err; } root_item = pi; pi->parent = NULL; } else { /* * Fast path -- if the pstree image is not edited, the * parent of any item should have already being restored * and sit among the last item's ancestors. */ while (parent) { if (parent->pid.virt == e->ppid) break; parent = parent->parent; } if (parent == NULL) { for_each_pstree_item(parent) { if (parent->pid.virt == e->ppid) break; } if (parent == NULL) { pr_err("Can't find a parent for %d\n", pi->pid.virt); pstree_entry__free_unpacked(e, NULL); xfree(pi); goto err; } } pi->parent = parent; list_add(&pi->sibling, &parent->children); } parent = pi; pi->nr_threads = e->n_threads; pi->threads = xmalloc(e->n_threads * sizeof(struct pid)); if (!pi->threads) break; for (i = 0; i < e->n_threads; i++) { pi->threads[i].real = -1; pi->threads[i].virt = e->threads[i]; } task_entries->nr_threads += e->n_threads; task_entries->nr_tasks++; pstree_entry__free_unpacked(e, NULL); fd = open_image(CR_FD_IDS, O_RSTR, pi->pid.virt); if (fd < 0) { if (errno == ENOENT) continue; goto err; } ret = pb_read_one(fd, &pi->ids, PB_IDS); close(fd); if (ret != 1) goto err; } err: close(ps_fd); return ret; } static int prepare_pstree_ids(void) { struct pstree_item *item, *child, *helper, *tmp; LIST_HEAD(helpers); pid_t current_pgid = getpgid(getpid()); /* * Some task can be reparented to init. A helper task should be added * for restoring sid of such tasks. The helper tasks will be exited * immediately after forking children and all children will be * reparented to init. */ list_for_each_entry(item, &root_item->children, sibling) { /* * If a child belongs to the root task's session or it's * a session leader himself -- this is a simple case, we * just proceed in a normal way. */ if (item->sid == root_item->sid || item->sid == item->pid.virt) continue; helper = alloc_pstree_item_with_rst(); if (helper == NULL) return -1; helper->sid = item->sid; helper->pgid = item->sid; helper->pid.virt = item->sid; helper->state = TASK_HELPER; helper->parent = root_item; list_add_tail(&helper->sibling, &helpers); task_entries->nr_helpers++; pr_info("Add a helper %d for restoring SID %d\n", helper->pid.virt, helper->sid); child = list_entry(item->sibling.prev, struct pstree_item, sibling); item = child; /* * Stack on helper task all children with target sid. */ list_for_each_entry_safe_continue(child, tmp, &root_item->children, sibling) { if (child->sid != helper->sid) continue; if (child->sid == child->pid.virt) continue; pr_info("Attach %d to the temporary task %d\n", child->pid.virt, helper->pid.virt); child->parent = helper; list_move(&child->sibling, &helper->children); } } /* Try to connect helpers to session leaders */ for_each_pstree_item(item) { if (!item->parent) /* skip the root task */ continue; if (item->state == TASK_HELPER) continue; if (item->sid != item->pid.virt) { struct pstree_item *parent; if (item->parent->sid == item->sid) continue; /* the task could fork a child before and after setsid() */ parent = item->parent; while (parent && parent->pid.virt != item->sid) { if (parent->born_sid != -1 && parent->born_sid != item->sid) { pr_err("Can't determinate with which sid (%d or %d)" "the process %d was born\n", parent->born_sid, item->sid, parent->pid.virt); return -1; } parent->born_sid = item->sid; pr_info("%d was born with sid %d\n", parent->pid.virt, item->sid); parent = parent->parent; } if (parent == NULL) { pr_err("Can't find a session leader for %d\n", item->sid); return -1; } continue; } pr_info("Session leader %d\n", item->sid); /* Try to find helpers, who should be connected to the leader */ list_for_each_entry(child, &helpers, sibling) { if (child->state != TASK_HELPER) continue; if (child->sid != item->sid) continue; child->pgid = item->pgid; child->pid.virt = ++max_pid; child->parent = item; list_move(&child->sibling, &item->children); pr_info("Attach %d to the task %d\n", child->pid.virt, item->pid.virt); break; } } /* All other helpers are session leaders for own sessions */ list_splice(&helpers, &root_item->children); /* Add a process group leader if it is absent */ for_each_pstree_item(item) { struct pstree_item *gleader; if (!item->pgid || item->pid.virt == item->pgid) continue; for_each_pstree_item(gleader) { if (gleader->pid.virt == item->pgid) break; } if (gleader) { item->rst->pgrp_leader = gleader; continue; } /* * If the PGID is eq to current one -- this * means we're inheriting group from the current * task so we need to escape creating a helper here. */ if (current_pgid == item->pgid) continue; helper = alloc_pstree_item_with_rst(); if (helper == NULL) return -1; helper->sid = item->sid; helper->pgid = item->pgid; helper->pid.virt = item->pgid; helper->state = TASK_HELPER; helper->parent = item; list_add(&helper->sibling, &item->children); task_entries->nr_helpers++; item->rst->pgrp_leader = helper; pr_info("Add a helper %d for restoring PGID %d\n", helper->pid.virt, helper->pgid); } return 0; } static unsigned long get_clone_mask(TaskKobjIdsEntry *i, TaskKobjIdsEntry *p) { unsigned long mask = 0; if (i->files_id == p->files_id) mask |= CLONE_FILES; if (i->pid_ns_id != p->pid_ns_id) mask |= CLONE_NEWPID; if (i->net_ns_id != p->net_ns_id) mask |= CLONE_NEWNET; if (i->ipc_ns_id != p->ipc_ns_id) mask |= CLONE_NEWIPC; if (i->uts_ns_id != p->uts_ns_id) mask |= CLONE_NEWUTS; if (i->mnt_ns_id != p->mnt_ns_id) mask |= CLONE_NEWNS; return mask; } static int prepare_pstree_kobj_ids(void) { struct pstree_item *item; /* Find a process with minimal pid for shared fd tables */ for_each_pstree_item(item) { struct pstree_item *parent = item->parent; TaskKobjIdsEntry *ids; unsigned long cflags; if (!item->ids) { if (item == root_item) { cflags = opts.rst_namespaces_flags; goto set_mask; } continue; } if (parent) ids = parent->ids; else ids = root_ids; /* * Add some sanity check on image data. */ if (unlikely(!ids)) { pr_err("No kIDs provided, image corruption\n"); return -1; } cflags = get_clone_mask(item->ids, ids); if (cflags & CLONE_FILES) { int ret; /* * There might be a case when kIDs for * root task are the same as in root_ids, * thus it's image corruption and we should * exit out. */ if (unlikely(!item->parent)) { pr_err("Image corruption on kIDs data\n"); return -1; } ret = shared_fdt_prepare(item); if (ret) return ret; } set_mask: item->rst->clone_flags = cflags; /* * Workaround for current namespaces model -- * all tasks should be in one namespace. And * this namespace is either inherited from the * criu or is created for the init task (only) */ if (item == root_item) { pr_info("Will restore in %lx namespaces\n", cflags); current_ns_mask = cflags & CLONE_ALLNS; } else if (cflags & CLONE_ALLNS) { pr_err("Can't restore sub-task in NS\n"); return -1; } } pr_debug("NS mask to use %lx\n", current_ns_mask); return 0; } int prepare_pstree(void) { int ret; ret = read_pstree_image(); if (!ret) /* * Shell job may inherit sid/pgid from the current * shell, not from image. Set things up for this. */ ret = prepare_pstree_for_shell_job(); if (!ret) /* * Walk the collected tree and prepare for restoring * of shared objects at clone time */ ret = prepare_pstree_kobj_ids(); if (!ret) /* * Session/Group leaders might be dead. Need to fix * pstree with properly injected helper tasks. */ ret = prepare_pstree_ids(); return ret; } bool restore_before_setsid(struct pstree_item *child) { int csid = child->born_sid == -1 ? child->sid : child->born_sid; if (child->parent->born_sid == csid) return true; return false; } bool pid_in_pstree(pid_t pid) { struct pstree_item *item; for_each_pstree_item(item) { if (item->pid.real == pid) return true; } return false; }