#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "types.h" #include "list.h" #include "file-ids.h" #include "kcmp-ids.h" #include "compiler.h" #include "crtools.h" #include "syscall.h" #include "ptrace.h" #include "util.h" #include "sockets.h" #include "namespaces.h" #include "image.h" #include "proc_parse.h" #include "parasite.h" #include "parasite-syscall.h" #include "files.h" #include "pipes.h" #include "shmem.h" #include "sk-inet.h" #include "eventfd.h" #include "eventpoll.h" #include "inotify.h" #ifndef CONFIG_X86_64 # error No x86-32 support yet #endif static char big_buffer[PATH_MAX]; static char loc_buf[PAGE_SIZE]; static struct pstree_item *root_item = NULL; static void free_pstree(struct pstree_item *root_item) { struct pstree_item *item = root_item, *parent; while (item) { if (!list_empty(&item->children)) { item = list_first_entry(&item->children, struct pstree_item, list); continue; } parent = item->parent; list_del(&item->list); xfree(item->threads); xfree(item); item = parent; } } void free_mappings(struct list_head *vma_area_list) { struct vma_area *vma_area, *p; list_for_each_entry_safe(vma_area, p, vma_area_list, list) { if (vma_area->vm_file_fd > 0) close(vma_area->vm_file_fd); free(vma_area); } INIT_LIST_HEAD(vma_area_list); } static int collect_mappings(pid_t pid, struct list_head *vma_area_list) { int ret = -1; pr_info("\n"); pr_info("Collecting mappings (pid: %d)\n", pid); pr_info("----------------------------------------\n"); ret = parse_smaps(pid, vma_area_list, true); if (ret < 0) goto err; pr_info_vma_list(vma_area_list); pr_info("----------------------------------------\n"); ret = 0; err: return ret; } struct cr_fdset *glob_fdset; static int collect_fds(pid_t pid, int *fd, int *nr_fd) { struct dirent *de; DIR *fd_dir; int n; pr_info("\n"); pr_info("Collecting fds (pid: %d)\n", pid); pr_info("----------------------------------------\n"); fd_dir = opendir_proc(pid, "fd"); if (!fd_dir) return -1; n = 0; while ((de = readdir(fd_dir))) { if (!strcmp(de->d_name, ".")) continue; if (!strcmp(de->d_name, "..")) continue; if (n > *nr_fd - 1) return -ENOMEM; fd[n++] = atoi(de->d_name); } *nr_fd = n; pr_info("Found %d file descriptors\n", n); pr_info("----------------------------------------\n"); closedir(fd_dir); return 0; } /* * Ghost files are those not visible from the FS. Dumping them is * nasty and the only way we have -- just carry its contents with * us. Any brave soul to implement link unlinked file back? */ struct ghost_file { u32 dev; u32 ino; u32 id; struct list_head list; }; static u32 ghost_file_ids = 1; static LIST_HEAD(ghost_files); /* * This constant is selected without any calculations. Just do not * want to pick up too big files with us in the image. */ #define MAX_GHOST_FILE_SIZE (1 * 1024 * 1024) static int dump_ghost_file(int _fd, u32 id, const struct stat *st) { int img, fd; struct ghost_file_entry gfe; char lpath[32]; pr_info("Dumping ghost file contents (id %#x)\n", id); img = open_image(CR_FD_GHOST_FILE, O_DUMP, id); if (img < 0) return -1; /* * Reopen file locally since it may have no read * permissions when drained */ snprintf(lpath, sizeof(lpath), "/proc/self/fd/%d", _fd); fd = open(lpath, O_RDONLY); if (fd < 0) { pr_perror("Can't open ghost original file"); return -1; } gfe.uid = st->st_uid; gfe.gid = st->st_gid; gfe.mode = st->st_mode; if (write_img(img, &gfe)) return -1; if (copy_file(fd, img, st->st_size)) return -1; close(fd); close(img); return 0; } static int dump_ghost_remap(char *path, const struct stat *st, int lfd, u32 id) { struct ghost_file *gf; struct remap_file_path_entry rpe; pr_info("Dumping ghost file for fd %d id %#x\n", lfd, id); if (st->st_size > MAX_GHOST_FILE_SIZE) { pr_err("Can't dump ghost file %s of %lu size\n", path, st->st_size); return -1; } list_for_each_entry(gf, &ghost_files, list) if ((gf->dev == st->st_dev) && (gf->ino == st->st_ino)) goto dump_entry; gf = xmalloc(sizeof(*gf)); if (gf == NULL) return -1; gf->dev = st->st_dev; gf->ino = st->st_ino; gf->id = ghost_file_ids++; list_add_tail(&gf->list, &ghost_files); if (dump_ghost_file(lfd, gf->id, st)) return -1; dump_entry: rpe.orig_id = id; rpe.remap_id = gf->id | REMAP_GHOST; return write_img(fdset_fd(glob_fdset, CR_FD_REMAP_FPATH), &rpe); } static int check_path_remap(char *path, const struct stat *ost, int lfd, u32 id) { int ret; struct stat pst; if (ost->st_nlink == 0) /* * Unpleasant, but easy case. File is completely invisible * from the FS. Just dump its contents and that's it. But * be careful whether anybody still has any of its hardlinks * also open. */ return dump_ghost_remap(path, ost, lfd, id); ret = stat(path, &pst); if (ret < 0) { /* * FIXME linked file, but path is not accessible (unless any * other error occurred). We can create a temporary link to it * uning linkat with AT_EMPTY_PATH flag and remap it to this * name. */ pr_perror("Can't stat path"); return -1; } if ((pst.st_ino != ost->st_ino) || (pst.st_dev != ost->st_dev)) { /* * FIXME linked file, but the name we see it by is reused * by somebody else. */ pr_err("Unaccessible path opened %u:%u, need %u:%u\n", (int)pst.st_dev, (int)pst.st_ino, (int)ost->st_dev, (int)ost->st_ino); return -1; } /* * File is linked and visible by the name it is opened by * this task. Go ahead and dump it. */ return 0; } static int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p) { char fd_str[128]; int len, rfd; struct reg_file_entry rfe; snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd); len = readlink(fd_str, big_buffer, sizeof(big_buffer) - 1); if (len < 0) { pr_perror("Can't readlink %s", fd_str); return len; } big_buffer[len] = '\0'; pr_info("Dumping path for %d fd via self %d [%s]\n", p->fd, lfd, big_buffer); if (check_path_remap(big_buffer, &p->stat, lfd, id)) return -1; rfe.len = len; rfe.flags = p->flags; rfe.pos = p->pos; rfe.id = id; rfe.fown = p->fown; rfd = fdset_fd(glob_fdset, CR_FD_REG_FILES); if (write_img(rfd, &rfe)) return -1; if (write_img_buf(rfd, big_buffer, len)) return -1; return 0; } u32 make_gen_id(const struct fd_parms *p) { return MAKE_FD_GENID(p->stat.st_dev, p->stat.st_ino, p->pos); } int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, const struct cr_fdset *cr_fdset) { struct fdinfo_entry e; int ret = -1; e.type = ops->type; e.id = p->id = ops->make_gen_id(p); e.fd = p->fd; e.flags = p->fd_flags; ret = fd_id_generate(p->pid, &e); if (ret == 1) /* new ID generated */ ret = ops->dump(lfd, e.id, p); if (ret < 0) goto err; pr_info("fdinfo: type: 0x%2x flags: 0x%4x pos: 0x%8lx fd: %d\n", ops->type, p->flags, p->pos, p->fd); if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &e)) goto err; ret = 0; err: return ret; } static const struct fdtype_ops regfile_ops = { .type = FDINFO_REG, .make_gen_id = make_gen_id, .dump = dump_one_reg_file, }; static int dump_reg_file(struct fd_parms *p, int lfd, const struct cr_fdset *cr_fdset) { return do_dump_gen_file(p, lfd, ®file_ops, cr_fdset); } static int dump_task_exe_link(pid_t pid, struct mm_entry *mm) { struct fd_parms params = { }; int fd, ret; fd = open_proc(pid, "exe"); if (fd < 0) return -1; if (fstat(fd, ¶ms.stat) < 0) { pr_perror("Can't fstat exe link"); return -1; } params.fd = FD_DESC_INVALID; mm->exe_file_id = fd_id_generate_special(); ret = dump_one_reg_file(fd, mm->exe_file_id, ¶ms); close(fd); return ret; } static int fill_fd_params(pid_t pid, int fd, int lfd, char fd_flags, struct fd_parms *p) { struct f_owner_ex owner_ex; u32 v[2]; if (fstat(lfd, &p->stat) < 0) { pr_perror("Can't stat fd %d\n", lfd); return -1; } p->fd = fd; p->pos = lseek(lfd, 0, SEEK_CUR); p->flags = fcntl(lfd, F_GETFL); p->pid = pid; p->id = FD_ID_INVALID; p->fd_flags = fd_flags; p->fown = (fown_t){ }; pr_info("%d fdinfo %d: pos: 0x%16lx flags: %16o/%#x\n", pid, fd, p->pos, p->flags, (int)fd_flags); p->fown.signum = fcntl(lfd, F_GETSIG, 0); if (p->fown.signum < 0) { pr_perror("Can't get owner signum on %d\n", lfd); return -1; } if (fcntl(lfd, F_GETOWN_EX, (long)&owner_ex)) { pr_perror("Can't get owners on %d\n", lfd); return -1; } /* * Simple case -- nothing is changed. */ if (owner_ex.pid == 0) return 0; if (fcntl(lfd, F_GETOWNER_UIDS, (long)&v)) { pr_perror("Can't get owner uids on %d\n", lfd); return -1; } p->fown.uid = v[0]; p->fown.euid = v[1]; p->fown.pid_type= owner_ex.type; p->fown.pid = owner_ex.pid; return 0; } static int dump_unsupp_fd(const struct fd_parms *p) { pr_err("Can't dump file %d of that type [%#x]\n", p->fd, p->stat.st_mode); return -1; } static int dump_chrdev(struct fd_parms *p, int lfd, const struct cr_fdset *set) { int maj = major(p->stat.st_rdev); switch (maj) { case MEM_MAJOR: return dump_reg_file(p, lfd, set); case TTY_MAJOR: case UNIX98_PTY_SLAVE_MAJOR: if (p->fd < 3) { pr_info("... Skipping tty ... %d\n", p->fd); return 0; } } return dump_unsupp_fd(p); } #ifndef PIPEFS_MAGIC #define PIPEFS_MAGIC 0x50495045 #endif static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags, const struct cr_fdset *cr_fdset) { struct fd_parms p; struct statfs statfs; if (fill_fd_params(pid, fd, lfd, fd_flags, &p) < 0) { pr_perror("Can't get stat on %d", fd); return -1; } if (S_ISSOCK(p.stat.st_mode)) return dump_socket(&p, lfd, cr_fdset); if (S_ISCHR(p.stat.st_mode)) return dump_chrdev(&p, lfd, cr_fdset); if (fstatfs(lfd, &statfs)) { pr_perror("Can't obtain statfs on fd %d\n", fd); return -1; } if (is_anon_inode(&statfs)) { if (is_eventfd_link(lfd)) return dump_eventfd(&p, lfd, cr_fdset); else if (is_eventpoll_link(lfd)) return dump_eventpoll(&p, lfd, cr_fdset); else if (is_inotify_link(lfd)) return dump_inotify(&p, lfd, cr_fdset); else return dump_unsupp_fd(&p); } if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode)) return dump_reg_file(&p, lfd, cr_fdset); if (S_ISFIFO(p.stat.st_mode) && (statfs.f_type == PIPEFS_MAGIC)) return dump_pipe(&p, lfd, cr_fdset); return dump_unsupp_fd(&p); } static int dump_task_files_seized(struct parasite_ctl *ctl, const struct cr_fdset *cr_fdset, int *fds, int nr_fds) { int *lfds; char *flags; int i, ret = -1; pr_info("\n"); pr_info("Dumping opened files (pid: %d)\n", ctl->pid); pr_info("----------------------------------------\n"); lfds = xmalloc(PARASITE_MAX_FDS * sizeof(int)); if (!lfds) goto err; flags = xmalloc(PARASITE_MAX_FDS * sizeof(char)); if (!flags) goto err1; ret = parasite_drain_fds_seized(ctl, fds, lfds, nr_fds, flags); if (ret) goto err2; for (i = 0; i < nr_fds; i++) { ret = dump_one_file(ctl->pid, fds[i], lfds[i], flags[i], cr_fdset); close(lfds[i]); if (ret) goto err2; } pr_info("----------------------------------------\n"); err2: xfree(flags); err1: xfree(lfds); err: return ret; } static int dump_task_fs(pid_t pid, struct cr_fdset *fdset) { struct fd_parms p = { .fd = FD_DESC_INVALID, }; struct fs_entry fe; int fd, ret; fd = open_proc(pid, "cwd"); if (fd < 0) return -1; if (fstat(fd, &p.stat) < 0) { pr_perror("Can't stat cwd"); return -1; } fe.cwd_id = fd_id_generate_special(); ret = dump_one_reg_file(fd, fe.cwd_id, &p); if (ret < 0) return ret; close(fd); fd = open_proc(pid, "root"); if (fd < 0) return -1; if (fstat(fd, &p.stat) < 0) { pr_perror("Can't stat root"); return -1; } fe.root_id = fd_id_generate_special(); ret = dump_one_reg_file(fd, fe.root_id, &p); if (ret < 0) return ret; close(fd); pr_info("Dumping task cwd id %#x root id %#x\n", fe.cwd_id, fe.root_id); return write_img(fdset_fd(fdset, CR_FD_FS), &fe); } static int dump_filemap(pid_t pid, struct vma_entry *vma, int file_fd, const struct cr_fdset *fdset) { struct fd_parms p = { }; if (fstat(file_fd, &p.stat) < 0) { pr_perror("Can't stat file for vma"); return -1; } p.fd = FD_DESC_INVALID; if ((vma->prot & PROT_WRITE) && vma_entry_is(vma, VMA_FILE_SHARED)) p.flags = O_RDWR; else p.flags = O_RDONLY; vma->shmid = fd_id_generate_special(); return dump_one_reg_file(file_fd, vma->shmid, &p); } static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list, const struct cr_fdset *cr_fdset) { struct vma_area *vma_area; int ret = -1, fd; pr_info("\n"); pr_info("Dumping mappings (pid: %d)\n", pid); pr_info("----------------------------------------\n"); fd = fdset_fd(cr_fdset, CR_FD_VMAS); list_for_each_entry(vma_area, vma_area_list, list) { struct vma_entry *vma = &vma_area->vma; pr_info_vma(vma_area); if (!vma_entry_is(vma, VMA_AREA_REGULAR) || vma_entry_is(vma, VMA_AREA_SYSVIPC)) ret = 0; else if (vma_entry_is(vma, VMA_ANON_SHARED)) ret = add_shmem_area(pid, vma); else if (vma_entry_is(vma, VMA_FILE_PRIVATE) || vma_entry_is(vma, VMA_FILE_SHARED)) ret = dump_filemap(pid, vma, vma_area->vm_file_fd, cr_fdset); else ret = 0; if (!ret) ret = write_img(fd, vma); if (ret) goto err; } ret = 0; pr_info("----------------------------------------\n"); err: return ret; } static int dump_task_creds(pid_t pid, const struct parasite_dump_misc *misc, const struct cr_fdset *fds) { int ret, i; struct proc_status_creds cr; struct creds_entry ce; pr_info("\n"); pr_info("Dumping creds for %d)\n", pid); pr_info("----------------------------------------\n"); ret = parse_pid_status(pid, &cr); if (ret < 0) return ret; ce.uid = cr.uids[0]; ce.gid = cr.gids[0]; ce.euid = cr.uids[1]; ce.egid = cr.gids[1]; ce.suid = cr.uids[2]; ce.sgid = cr.gids[2]; ce.fsuid = cr.uids[3]; ce.fsgid = cr.gids[3]; BUILD_BUG_ON(CR_CAP_SIZE != PROC_CAP_SIZE); for (i = 0; i < CR_CAP_SIZE; i++) { ce.cap_inh[i] = cr.cap_inh[i]; ce.cap_prm[i] = cr.cap_prm[i]; ce.cap_eff[i] = cr.cap_eff[i]; ce.cap_bnd[i] = cr.cap_bnd[i]; } ce.secbits = misc->secbits; ret = write_img(fdset_fd(fds, CR_FD_CREDS), &ce); if (ret < 0) return ret; return 0; } #define assign_reg(dst, src, e) dst.e = (__typeof__(dst.e))src.e #define assign_array(dst, src, e) memcpy(&dst.e, &src.e, sizeof(dst.e)) static int get_task_auxv(pid_t pid, struct mm_entry *mm) { int fd, ret, i; pr_info("Obtainting task auvx ... "); fd = open_proc(pid, "auxv"); if (fd < 0) return -1; for (i = 0; i < AT_VECTOR_SIZE; i++) { ret = read(fd, &mm->mm_saved_auxv[i], sizeof(mm->mm_saved_auxv[0])); if (ret == 0) break; else if (ret != sizeof(mm->mm_saved_auxv[0])) { ret = -1; pr_perror("Error readind %d's auxv[%d]", pid, i); goto err; } } ret = 0; err: close_safe(&fd); return ret; } static int dump_task_mm(pid_t pid, const struct proc_pid_stat *stat, const struct parasite_dump_misc *misc, const struct cr_fdset *fdset) { struct mm_entry mme; mme.mm_start_code = stat->start_code; mme.mm_end_code = stat->end_code; mme.mm_start_data = stat->start_data; mme.mm_end_data = stat->end_data; mme.mm_start_stack = stat->start_stack; mme.mm_start_brk = stat->start_brk; mme.mm_arg_start = stat->arg_start; mme.mm_arg_end = stat->arg_end; mme.mm_env_start = stat->env_start; mme.mm_env_end = stat->env_end; mme.mm_brk = misc->brk; if (get_task_auxv(pid, &mme)) return -1; pr_info("OK\n"); if (dump_task_exe_link(pid, &mme)) return -1; return write_img(fdset_fd(fdset, CR_FD_MM), &mme); } static int get_task_personality(pid_t pid, u32 *personality) { FILE *file = NULL; int ret = -1; pr_info("Obtainting personality ... "); file = fopen_proc(pid, "personality"); if (!file) goto err; if (!fgets(loc_buf, sizeof(loc_buf), file)) { perror("Can't read task personality"); goto err; } *personality = atoi(loc_buf); ret = 0; err: if (file) fclose(file); return ret; } static int get_task_regs(pid_t pid, struct core_entry *core, const struct parasite_ctl *ctl) { user_fpregs_struct_t fpregs = {-1}; user_regs_struct_t regs = {-1}; int ret = -1; pr_info("Dumping GP/FPU registers ... "); if (ctl) regs = ctl->regs_orig; else { if (ptrace(PTRACE_GETREGS, pid, NULL, ®s)) { pr_err("Can't obtain GP registers for %d\n", pid); goto err; } } if (ptrace(PTRACE_GETFPREGS, pid, NULL, &fpregs)) { pr_err("Can't obtain FPU registers for %d\n", pid); goto err; } /* Did we come from a system call? */ if ((int)regs.orig_ax >= 0) { /* Restart the system call */ switch ((long)(int)regs.ax) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: regs.ax = regs.orig_ax; regs.ip -= 2; break; case -ERESTART_RESTARTBLOCK: regs.ax = __NR_restart_syscall; regs.ip -= 2; break; } } assign_reg(core->arch.gpregs, regs, r15); assign_reg(core->arch.gpregs, regs, r14); assign_reg(core->arch.gpregs, regs, r13); assign_reg(core->arch.gpregs, regs, r12); assign_reg(core->arch.gpregs, regs, bp); assign_reg(core->arch.gpregs, regs, bx); assign_reg(core->arch.gpregs, regs, r11); assign_reg(core->arch.gpregs, regs, r10); assign_reg(core->arch.gpregs, regs, r9); assign_reg(core->arch.gpregs, regs, r8); assign_reg(core->arch.gpregs, regs, ax); assign_reg(core->arch.gpregs, regs, cx); assign_reg(core->arch.gpregs, regs, dx); assign_reg(core->arch.gpregs, regs, si); assign_reg(core->arch.gpregs, regs, di); assign_reg(core->arch.gpregs, regs, orig_ax); assign_reg(core->arch.gpregs, regs, ip); assign_reg(core->arch.gpregs, regs, cs); assign_reg(core->arch.gpregs, regs, flags); assign_reg(core->arch.gpregs, regs, sp); assign_reg(core->arch.gpregs, regs, ss); assign_reg(core->arch.gpregs, regs, fs_base); assign_reg(core->arch.gpregs, regs, gs_base); assign_reg(core->arch.gpregs, regs, ds); assign_reg(core->arch.gpregs, regs, es); assign_reg(core->arch.gpregs, regs, fs); assign_reg(core->arch.gpregs, regs, gs); assign_reg(core->arch.fpregs, fpregs, cwd); assign_reg(core->arch.fpregs, fpregs, swd); assign_reg(core->arch.fpregs, fpregs, twd); assign_reg(core->arch.fpregs, fpregs, fop); assign_reg(core->arch.fpregs, fpregs, rip); assign_reg(core->arch.fpregs, fpregs, rdp); assign_reg(core->arch.fpregs, fpregs, mxcsr); assign_reg(core->arch.fpregs, fpregs, mxcsr_mask); assign_array(core->arch.fpregs, fpregs, st_space); assign_array(core->arch.fpregs, fpregs, xmm_space); assign_array(core->arch.fpregs, fpregs, padding); ret = 0; err: return ret; } static int dump_task_core(struct core_entry *core, int fd_core) { pr_info("Dumping header ... "); core->header.version = HEADER_VERSION; core->header.arch = HEADER_ARCH_X86_64; core->header.flags = 0; return write_img(fd_core, core); } static DECLARE_KCMP_TREE(vm_tree, KCMP_VM); static DECLARE_KCMP_TREE(fs_tree, KCMP_FS); static DECLARE_KCMP_TREE(files_tree, KCMP_FILES); static DECLARE_KCMP_TREE(sighand_tree, KCMP_SIGHAND); static int dump_task_kobj_ids(pid_t pid, struct core_entry *core) { int new; struct kid_elem elem; elem.pid = pid; elem.idx = 0; /* really 0 for all */ elem.genid = 0; /* FIXME optimize */ new = 0; core->ids.vm_id = kid_generate_gen(&vm_tree, &elem, &new); if (!core->ids.vm_id || !new) { pr_err("Can't make VM id for %d\n", pid); return -1; } new = 0; core->ids.fs_id = kid_generate_gen(&fs_tree, &elem, &new); if (!core->ids.fs_id || !new) { pr_err("Can't make FS id for %d\n", pid); return -1; } new = 0; core->ids.files_id = kid_generate_gen(&files_tree, &elem, &new); if (!core->ids.files_id || !new) { pr_err("Can't make FILES id for %d\n", pid); return -1; } new = 0; core->ids.sighand_id = kid_generate_gen(&sighand_tree, &elem, &new); if (!core->ids.sighand_id || !new) { pr_err("Can't make IO id for %d\n", pid); return -1; } return 0; } static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat, const struct parasite_dump_misc *misc, const struct parasite_ctl *ctl, const struct cr_fdset *cr_fdset) { struct core_entry *core; int ret = -1; pr_info("\n"); pr_info("Dumping core (pid: %d)\n", pid); pr_info("----------------------------------------\n"); core = xzalloc(sizeof(*core)); if (!core) goto err; ret = dump_task_kobj_ids(pid, core); if (ret) goto err_free; ret = dump_task_mm(pid, stat, misc, cr_fdset); if (ret) goto err_free; ret = get_task_regs(pid, core, ctl); if (ret) goto err_free; ret = get_task_personality(pid, &core->tc.personality); if (ret) goto err_free; strncpy((char *)core->tc.comm, stat->comm, TASK_COMM_LEN); core->tc.flags = stat->flags; BUILD_BUG_ON(sizeof(core->tc.blk_sigset) != sizeof(k_rtsigset_t)); memcpy(&core->tc.blk_sigset, &misc->blocked, sizeof(k_rtsigset_t)); core->tc.task_state = TASK_ALIVE; core->tc.exit_code = 0; ret = dump_task_core(core, fdset_fd(cr_fdset, CR_FD_CORE)); if (ret) goto err_free; pr_info("OK\n"); err_free: free(core); err: pr_info("----------------------------------------\n"); return ret; } static int parse_threads(const struct pstree_item *item, struct pid **_t, int *_n) { struct dirent *de; DIR *dir; struct pid *t = NULL; int nr = 1; dir = opendir_proc(item->pid.real_pid, "task"); if (!dir) return -1; while ((de = readdir(dir))) { struct pid *tmp; /* We expect numbers only here */ if (de->d_name[0] == '.') continue; tmp = xrealloc(t, nr * sizeof(struct pid)); if (!tmp) { xfree(t); return -1; } t = tmp; t[nr - 1].real_pid = atoi(de->d_name); nr++; } closedir(dir); *_t = t; *_n = nr - 1; return 0; } static int get_threads(struct pstree_item *item) { return parse_threads(item, &item->threads, &item->nr_threads); } static int check_threads(const struct pstree_item *item) { struct pid *t; int nr, ret; ret = parse_threads(item, &t, &nr); if (ret) return ret; ret = ((nr == item->nr_threads) && !memcmp(t, item->threads, nr)); xfree(t); if (!ret) { pr_info("Threads set has changed while suspending\n"); return -1; } return 0; } static int parse_children(const struct pstree_item *item, u32 **_c, int *_n) { FILE *file; char *tok; u32 *ch = NULL; int nr = 1, i; for (i = 0; i < item->nr_threads; i++) { file = fopen_proc(item->pid.real_pid, "task/%d/children", item->threads[i].real_pid); if (!file) goto err; if (!(fgets(loc_buf, sizeof(loc_buf), file))) loc_buf[0] = 0; fclose(file); tok = strtok(loc_buf, " \n"); while (tok) { u32 *tmp = xrealloc(ch, nr * sizeof(u32)); if (!tmp) goto err; ch = tmp; ch[nr - 1] = atoi(tok); nr++; tok = strtok(NULL, " \n"); } } *_c = ch; *_n = nr - 1; return 0; err: xfree(ch); return -1; } struct pstree_item *__alloc_pstree_item(bool rst) { struct pstree_item *item; item = xzalloc(sizeof(*item) + (rst ? sizeof(item->rst[0]) : 0)); if (!item) return NULL; INIT_LIST_HEAD(&item->children); item->threads = NULL; item->nr_threads = 0; return item; } static int get_children(struct pstree_item *item) { u32 *ch; int ret, i, nr_children; struct pstree_item *c; ret = parse_children(item, &ch, &nr_children); if (ret < 0) return ret; for (i = 0; i < nr_children; i++) { c = alloc_pstree_item(); if (c == NULL) { ret = -1; goto free; } c->pid.real_pid = ch[i]; c->parent = item; list_add_tail(&c->list, &item->children); } free: xfree(ch); return ret; } static void unseize_task_and_threads(const struct pstree_item *item, int st) { int i; for (i = 0; i < item->nr_threads; i++) unseize_task(item->threads[i].real_pid, st); /* item->pid will be here */ } struct pstree_item *pstree_item_next(struct pstree_item *item) { if (!list_empty(&item->children)) { item = list_first_entry(&item->children, struct pstree_item, list); return item; } while (1) { if (item->parent == NULL) { item = NULL; break; } if (item->list.next == &item->parent->children) { item = item->parent; continue; } else { item = list_entry(item->list.next, struct pstree_item, list); break; } } return item; } static void pstree_switch_state(struct pstree_item *root_item, int st) { struct pstree_item *item = root_item; pr_info("Unfreezing tasks into %d\n", st); for_each_pstree_item(item) unseize_task_and_threads(item, st); } static pid_t item_ppid(const struct pstree_item *item) { item = item->parent; return item ? item->pid.real_pid : -1; } static int seize_threads(const struct pstree_item *item) { int i = 0, ret; if ((item->state == TASK_DEAD) && (item->nr_threads > 1)) { pr_err("Zombies with threads are not supported\n"); goto err; } for (i = 0; i < item->nr_threads; i++) { pid_t pid = item->threads[i].real_pid; if (item->pid.real_pid == pid) continue; pr_info("\tSeizing %d's %d thread\n", item->pid.real_pid, pid); ret = seize_task(pid, item_ppid(item), NULL, NULL); if (ret < 0) goto err; if (ret == TASK_DEAD) { pr_err("Zombie thread not supported\n"); goto err; } if (ret == TASK_STOPPED) { pr_err("Stopped threads not supported\n"); goto err; } } return 0; err: for (i--; i >= 0; i--) { if (item->pid.real_pid == item->threads[i].real_pid) continue; unseize_task(item->threads[i].real_pid, TASK_ALIVE); } return -1; } static int collect_threads(struct pstree_item *item) { int ret; ret = get_threads(item); if (!ret) ret = seize_threads(item); if (!ret) ret = check_threads(item); return ret; } /* * Few words about sid and pgid handling. * * An axiom: session can only be changed on self, group -- on self or * on one of self children. * * Conclusions: * 1. both should better be saved in pstree.img so we can restore sid * at the time we fork kids and pgid is just for harmony; * 2. if we seized the parent these IDs* shouldn't change and it's safe * to read and check them right at the seizing time. * * Easings: * 1. with the existing setsid we can only reset sid to task's pid. * Thus, if task escaped from its ancestors with sid we will not be * able to easily restore this construction. Thus for now this is * treated as "unsupported" (FIXME); * * 2. when task whose pid is equal to pgid/sid (i.e. leader) exits we * lose the ability to restore the grp/session easily with the * existing interfaces, thus we also treat this as temporarily * unsupported (FIXME #2). */ static int check_xids(struct pstree_item *root_item) { struct pstree_item *p, *tmp; for_each_pstree_item(p) { if (p->parent == NULL) continue; /* Easing #1 and #2 for sids */ if ((p->sid != p->pid.pid) && (p->sid != p->parent->sid)) { pr_err("SID mismatch on %d (%d/%d)\n", p->pid.pid, p->sid, p->parent->sid); return -1; } /* Easing #2 for pgids */ for_each_pstree_item(tmp) if (tmp->pid.pid == p->pgid) break; if (tmp == NULL) { pr_err("PGIG mismatch on %d (%d)\n", p->pid.pid, p->pgid); return -1; } } return 0; } static int collect_task(struct pstree_item *item) { int ret; pid_t pid = item->pid.real_pid; ret = seize_task(pid, item_ppid(item), &item->pgid, &item->sid); if (ret < 0) goto err; pr_info("Seized task %d, state %d\n", pid, ret); item->state = ret; ret = collect_threads(item); if (ret < 0) goto err_close; ret = get_children(item); if (ret < 0) goto err_close; if ((item->state == TASK_DEAD) && !list_empty(&item->children)) { pr_err("Zombie with children?! O_o Run, run, run!\n"); goto err_close; } close_pid_proc(); pr_info("Collected %d in %d state\n", item->pid.real_pid, item->state); return 0; err_close: close_pid_proc(); unseize_task(pid, item->state); err: return -1; } static int check_subtree(const struct pstree_item *item) { u32 *ch; int nr, ret, i; struct pstree_item *child; ret = parse_children(item, &ch, &nr); if (ret < 0) return ret; i = 0; list_for_each_entry(child, &item->children, list) { if (child->pid.real_pid != ch[i]) break; i++; if (i > nr) break; } xfree(ch); if (i != nr) { pr_info("Children set has changed while suspending\n"); return -1; } return 0; } static int collect_subtree(struct pstree_item *item, int leader_only) { struct pstree_item *child; pid_t pid = item->pid.real_pid; int ret; pr_info("Collecting tasks starting from %d\n", pid); ret = collect_task(item); if (ret) return -1; if (leader_only) return 0; list_for_each_entry(child, &item->children, list) { ret = collect_subtree(child, 0); if (ret < 0) return -1; } if (check_subtree(item)) return -1; return 0; } static int collect_pstree(pid_t pid, const struct cr_options *opts) { int ret, attempts = 5; while (1) { root_item = alloc_pstree_item(); if (root_item == NULL) return -1; root_item->pid.real_pid = pid; INIT_LIST_HEAD(&root_item->list); ret = collect_subtree(root_item, opts->leader_only); if (ret == 0) { /* * Some tasks could have been reparented to * namespaces' reaper. Check this. */ if (opts->namespaces_flags & CLONE_NEWPID) { BUG_ON(root_item->pid.real_pid != 1); if (check_subtree(root_item)) goto try_again; } break; } /* * Old tasks can die and new ones can appear while we * try to seize the swarm. It's much simpler (and reliable) * just to restart the collection from the beginning * rather than trying to chase them. */ try_again: if (attempts == 0) break; attempts--; pr_info("Trying to suspend tasks again\n"); pstree_switch_state(root_item, TASK_ALIVE); free_pstree(root_item); } if (!ret) ret = check_xids(root_item); return ret; } static int dump_pstree(struct pstree_item *root_item) { struct pstree_item *item = root_item; struct pstree_entry e; int ret = -1, i; int pstree_fd; pr_info("\n"); pr_info("Dumping pstree (pid: %d)\n", root_item->pid.real_pid); pr_info("----------------------------------------\n"); pstree_fd = open_image(CR_FD_PSTREE, O_DUMP); if (pstree_fd < 0) return -1; for_each_pstree_item(item) { pr_info("Process: %d(%d)\n", item->pid.pid, item->pid.real_pid); e.pid = item->pid.pid; e.ppid = item->parent ? item->parent->pid.pid : 0; e.pgid = item->pgid; e.sid = item->sid; e.nr_threads = item->nr_threads; if (write_img(pstree_fd, &e)) goto err; for (i = 0; i < item->nr_threads; i++) { if (write_img_buf(pstree_fd, &item->threads[i].pid, sizeof(u32))) goto err; } } ret = 0; err: pr_info("----------------------------------------\n"); close(pstree_fd); return ret; } static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid) { struct core_entry *core; int ret = -1, fd_core; unsigned int *taddr; pid_t pid = tid->real_pid; pr_info("\n"); pr_info("Dumping core for thread (pid: %d)\n", pid); pr_info("----------------------------------------\n"); core = xzalloc(sizeof(*core)); if (!core) goto err; ret = get_task_regs(pid, core, NULL); if (ret) goto err_free; ret = parasite_dump_thread_seized(parasite_ctl, pid, &taddr, &tid->pid); if (ret) { pr_err("Can't dump tid address for pid %d", pid); goto err_free; } pr_info("%d: tid_address=%p\n", pid, taddr); core->clear_tid_address = (u64) taddr; pr_info("OK\n"); core->tc.task_state = TASK_ALIVE; core->tc.exit_code = 0; fd_core = open_image(CR_FD_CORE, O_DUMP, pid); if (fd_core < 0) goto err_free; ret = dump_task_core(core, fd_core); close(fd_core); err_free: free(core); err: pr_info("----------------------------------------\n"); return ret; } static int dump_one_zombie(const struct pstree_item *item, const struct proc_pid_stat *pps) { struct core_entry *core; int ret = -1, fd_core; core = xzalloc(sizeof(*core)); if (core == NULL) goto err; core->tc.task_state = TASK_DEAD; core->tc.exit_code = pps->exit_code; fd_core = open_image(CR_FD_CORE, O_DUMP, item->pid.pid); if (fd_core < 0) goto err_free; ret = dump_task_core(core, fd_core); close(fd_core); err_free: xfree(core); err: return ret; } static struct proc_pid_stat pps_buf; static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item) { int i; for (i = 0; i < item->nr_threads; i++) { /* Leader is already dumped */ if (item->pid.real_pid == item->threads[i].real_pid) { item->threads[i].pid = item->pid.pid; continue; } if (dump_task_thread(parasite_ctl, &item->threads[i])) return -1; } return 0; } static int dump_one_task(struct pstree_item *item) { pid_t pid = item->pid.real_pid; LIST_HEAD(vma_area_list); struct parasite_ctl *parasite_ctl; int ret = -1; struct parasite_dump_misc misc; struct cr_fdset *cr_fdset = NULL; int nr_fds = PARASITE_MAX_FDS; int *fds = NULL; pr_info("========================================\n"); pr_info("Dumping task (pid: %d)\n", pid); pr_info("========================================\n"); fds = xmalloc(nr_fds * sizeof(int)); if (!fds) goto err_free; if (item->state == TASK_STOPPED) { pr_err("Stopped tasks are not supported\n"); goto err_free; } pr_info("Obtainting task stat ... "); ret = parse_pid_stat(pid, &pps_buf); if (ret < 0) goto err; if (item->state == TASK_DEAD) return dump_one_zombie(item, &pps_buf); ret = -1; cr_fdset = cr_task_fdset_open(item->pid.pid, O_DUMP); if (!cr_fdset) goto err; ret = collect_mappings(pid, &vma_area_list); if (ret) { pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret); goto err; } ret = collect_fds(pid, fds, &nr_fds); if (ret) { pr_err("Collect fds (pid: %d) failed with %d\n", pid, ret); goto err; } parasite_ctl = parasite_infect_seized(pid, &vma_area_list); if (!parasite_ctl) { ret = -1; pr_err("Can't infect (pid: %d) with parasite\n", pid); goto err; } ret = dump_task_files_seized(parasite_ctl, cr_fdset, fds, nr_fds); if (ret) { pr_err("Dump files (pid: %d) failed with %d\n", pid, ret); goto err_cure; } ret = parasite_dump_pages_seized(parasite_ctl, &vma_area_list, cr_fdset); if (ret) { pr_err("Can't dump pages (pid: %d) with parasite\n", pid); goto err_cure; } ret = parasite_dump_sigacts_seized(parasite_ctl, cr_fdset); if (ret) { pr_err("Can't dump sigactions (pid: %d) with parasite\n", pid); goto err_cure; } ret = parasite_dump_itimers_seized(parasite_ctl, cr_fdset); if (ret) { pr_err("Can't dump itimers (pid: %d)\n", pid); goto err_cure; } ret = parasite_dump_misc_seized(parasite_ctl, &misc); if (ret) { pr_err("Can't dump misc (pid: %d)\n", pid); goto err_cure; } ret = dump_task_core_all(pid, &pps_buf, &misc, parasite_ctl, cr_fdset); if (ret) { pr_err("Dump core (pid: %d) failed with %d\n", pid, ret); goto err_cure; } ret = dump_task_threads(parasite_ctl, item); if (ret) { pr_err("Can't dump threads\n"); goto err_cure; } ret = parasite_cure_seized(parasite_ctl); if (ret) { pr_err("Can't cure (pid: %d) from parasite\n", pid); goto err; } ret = dump_task_mappings(pid, &vma_area_list, cr_fdset); if (ret) { pr_err("Dump mappings (pid: %d) failed with %d\n", pid, ret); goto err; } ret = dump_task_creds(pid, &misc, cr_fdset); if (ret) { pr_err("Dump creds (pid: %d) failed with %d\n", pid, ret); goto err; } ret = dump_task_fs(pid, cr_fdset); if (ret) { pr_err("Dump fs (pid: %d) failed with %d\n", pid, ret); goto err; } err: close_cr_fdset(&cr_fdset); close_pid_proc(); err_free: free_mappings(&vma_area_list); xfree(fds); return ret; err_cure: parasite_cure_seized(parasite_ctl); goto err; } int cr_dump_tasks(pid_t pid, const struct cr_options *opts) { struct pstree_item *item; int ret = -1; pr_info("========================================\n"); pr_info("Dumping process %s(pid: %d)\n", !opts->leader_only ? "group " : "", pid); pr_info("========================================\n"); if (collect_pstree(pid, opts)) goto err; if (dump_pstree(root_item)) goto err; if (opts->namespaces_flags) { if (dump_namespaces(pid, opts->namespaces_flags) < 0) goto err; } /* * Ignore collection errors by now since we may not want * to dump the missed sockets. But later, when we will start * dumping containers, we'll better fail here, rather than * in the dump stage */ collect_sockets(); glob_fdset = cr_glob_fdset_open(O_DUMP); if (!glob_fdset) goto err; if (init_shmem_dump()) goto err; for_each_pstree_item(item) { if (dump_one_task(item)) goto err; if (opts->leader_only) break; } ret = cr_dump_shmem(); if (ret) goto err; ret = fix_external_unix_sockets(); if (ret) goto err; fd_id_show_tree(); err: fini_shmem_dump(); close_cr_fdset(&glob_fdset); /* * If we've failed to do anything -- unlock all TCP sockets * so that the connections can go on. But if we succeeded -- * don't, just close them silently. */ if (ret) tcp_unlock_all(); pstree_switch_state(root_item, ret ? TASK_ALIVE : opts->final_state); free_pstree(root_item); return ret; }