From 9d918c59646b41c063b87d2ff1da902782ba1454 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 19 Jul 2012 13:23:01 +0400 Subject: [PATCH] protobuf: Convert core_entry to PB format v5 This requires some exlanations - Since we use protobuf data in restorer code we need to carry a copy of appropriate PB entities in resident memory. For this sake task_restore_core_args and thread_restore_args were significantly reworked. In short -- the caller code fills PB structures into task arguments space. v3: - Combine everything arch related to thread_info field, and make it optional - Drop "version" field from message, we check version in another specific message - Don't forget to call core_entry__free_unpacked where needed - We continue dumping FPU state, still it's not yet restored v4: - Don't carry task_core_entry and task_kobs_ids_entry for threads, and yield error if present in image. v5: - Allocate core_entry depending on type of task being dumped Signed-off-by: Cyrill Gorcunov Signed-off-by: Pavel Emelyanov --- cr-dump.c | 270 +++++++++++++++++++++++++++++--------------- cr-restore.c | 117 ++++++++++++------- cr-show.c | 161 ++++++++++++-------------- include/image.h | 119 ------------------- include/restorer.h | 10 +- protobuf/Makefile | 1 + protobuf/core.proto | 80 +++++++++++++ restorer.c | 46 ++------ 8 files changed, 433 insertions(+), 371 deletions(-) create mode 100644 protobuf/core.proto diff --git a/cr-dump.c b/cr-dump.c index de1839e1b..e41654d2c 100644 --- a/cr-dump.c +++ b/cr-dump.c @@ -51,6 +51,7 @@ #include "protobuf/fs.pb-c.h" #include "protobuf/mm.pb-c.h" #include "protobuf/creds.pb-c.h" +#include "protobuf/core.pb-c.h" #ifndef CONFIG_X86_64 # error No x86-32 support yet @@ -490,8 +491,8 @@ static int dump_task_creds(pid_t pid, const struct parasite_dump_misc *misc, return pb_write(fdset_fd(fds, CR_FD_CREDS), &ce, creds_entry); } -#define assign_reg(dst, src, e) dst.e = (__typeof__(dst.e))src.e -#define assign_array(dst, src, e) memcpy(&dst.e, &src.e, sizeof(dst.e)) +#define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))src.e +#define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e)) static int get_task_auxv(pid_t pid, MmEntry *mm) { @@ -585,10 +586,11 @@ err: return ret; } -static int get_task_regs(pid_t pid, struct core_entry *core, const struct parasite_ctl *ctl) +static int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl) { user_fpregs_struct_t fpregs = {-1}; user_regs_struct_t regs = {-1}; + int ret = -1; pr_info("Dumping GP/FPU registers ... "); @@ -624,46 +626,51 @@ static int get_task_regs(pid_t pid, struct core_entry *core, const struct parasi } } - assign_reg(core->arch.gpregs, regs, r15); - assign_reg(core->arch.gpregs, regs, r14); - assign_reg(core->arch.gpregs, regs, r13); - assign_reg(core->arch.gpregs, regs, r12); - assign_reg(core->arch.gpregs, regs, bp); - assign_reg(core->arch.gpregs, regs, bx); - assign_reg(core->arch.gpregs, regs, r11); - assign_reg(core->arch.gpregs, regs, r10); - assign_reg(core->arch.gpregs, regs, r9); - assign_reg(core->arch.gpregs, regs, r8); - assign_reg(core->arch.gpregs, regs, ax); - assign_reg(core->arch.gpregs, regs, cx); - assign_reg(core->arch.gpregs, regs, dx); - assign_reg(core->arch.gpregs, regs, si); - assign_reg(core->arch.gpregs, regs, di); - assign_reg(core->arch.gpregs, regs, orig_ax); - assign_reg(core->arch.gpregs, regs, ip); - assign_reg(core->arch.gpregs, regs, cs); - assign_reg(core->arch.gpregs, regs, flags); - assign_reg(core->arch.gpregs, regs, sp); - assign_reg(core->arch.gpregs, regs, ss); - assign_reg(core->arch.gpregs, regs, fs_base); - assign_reg(core->arch.gpregs, regs, gs_base); - assign_reg(core->arch.gpregs, regs, ds); - assign_reg(core->arch.gpregs, regs, es); - assign_reg(core->arch.gpregs, regs, fs); - assign_reg(core->arch.gpregs, regs, gs); + assign_reg(core->thread_info->gpregs, regs, r15); + assign_reg(core->thread_info->gpregs, regs, r14); + assign_reg(core->thread_info->gpregs, regs, r13); + assign_reg(core->thread_info->gpregs, regs, r12); + assign_reg(core->thread_info->gpregs, regs, bp); + assign_reg(core->thread_info->gpregs, regs, bx); + assign_reg(core->thread_info->gpregs, regs, r11); + assign_reg(core->thread_info->gpregs, regs, r10); + assign_reg(core->thread_info->gpregs, regs, r9); + assign_reg(core->thread_info->gpregs, regs, r8); + assign_reg(core->thread_info->gpregs, regs, ax); + assign_reg(core->thread_info->gpregs, regs, cx); + assign_reg(core->thread_info->gpregs, regs, dx); + assign_reg(core->thread_info->gpregs, regs, si); + assign_reg(core->thread_info->gpregs, regs, di); + assign_reg(core->thread_info->gpregs, regs, orig_ax); + assign_reg(core->thread_info->gpregs, regs, ip); + assign_reg(core->thread_info->gpregs, regs, cs); + assign_reg(core->thread_info->gpregs, regs, flags); + assign_reg(core->thread_info->gpregs, regs, sp); + assign_reg(core->thread_info->gpregs, regs, ss); + assign_reg(core->thread_info->gpregs, regs, fs_base); + assign_reg(core->thread_info->gpregs, regs, gs_base); + assign_reg(core->thread_info->gpregs, regs, ds); + assign_reg(core->thread_info->gpregs, regs, es); + assign_reg(core->thread_info->gpregs, regs, fs); + assign_reg(core->thread_info->gpregs, regs, gs); - assign_reg(core->arch.fpregs, fpregs, cwd); - assign_reg(core->arch.fpregs, fpregs, swd); - assign_reg(core->arch.fpregs, fpregs, twd); - assign_reg(core->arch.fpregs, fpregs, fop); - assign_reg(core->arch.fpregs, fpregs, rip); - assign_reg(core->arch.fpregs, fpregs, rdp); - assign_reg(core->arch.fpregs, fpregs, mxcsr); - assign_reg(core->arch.fpregs, fpregs, mxcsr_mask); + assign_reg(core->thread_info->fpregs, fpregs, cwd); + assign_reg(core->thread_info->fpregs, fpregs, swd); + assign_reg(core->thread_info->fpregs, fpregs, twd); + assign_reg(core->thread_info->fpregs, fpregs, fop); + assign_reg(core->thread_info->fpregs, fpregs, rip); + assign_reg(core->thread_info->fpregs, fpregs, rdp); + assign_reg(core->thread_info->fpregs, fpregs, mxcsr); + assign_reg(core->thread_info->fpregs, fpregs, mxcsr_mask); - assign_array(core->arch.fpregs, fpregs, st_space); - assign_array(core->arch.fpregs, fpregs, xmm_space); - assign_array(core->arch.fpregs, fpregs, padding); + /* Make sure we have enough space */ + BUG_ON(core->thread_info->fpregs->n_st_space != ARRAY_SIZE(fpregs.st_space)); + BUG_ON(core->thread_info->fpregs->n_xmm_space != ARRAY_SIZE(fpregs.xmm_space)); + BUG_ON(core->thread_info->fpregs->n_padding != ARRAY_SIZE(fpregs.padding)); + + assign_array(core->thread_info->fpregs, fpregs, st_space); + assign_array(core->thread_info->fpregs, fpregs, xmm_space); + assign_array(core->thread_info->fpregs, fpregs, padding); ret = 0; @@ -671,22 +678,12 @@ err: return ret; } -static int dump_task_core(struct core_entry *core, int fd_core) -{ - pr_info("Dumping header ... "); - - core->header.arch = HEADER_ARCH_X86_64; - core->header.flags = 0; - - return write_img(fd_core, core); -} - static DECLARE_KCMP_TREE(vm_tree, KCMP_VM); static DECLARE_KCMP_TREE(fs_tree, KCMP_FS); static DECLARE_KCMP_TREE(files_tree, KCMP_FILES); static DECLARE_KCMP_TREE(sighand_tree, KCMP_SIGHAND); -static int dump_task_kobj_ids(pid_t pid, struct core_entry *core) +static int dump_task_kobj_ids(pid_t pid, CoreEntry *core) { int new; struct kid_elem elem; @@ -696,29 +693,29 @@ static int dump_task_kobj_ids(pid_t pid, struct core_entry *core) elem.genid = 0; /* FIXME optimize */ new = 0; - core->ids.vm_id = kid_generate_gen(&vm_tree, &elem, &new); - if (!core->ids.vm_id || !new) { + core->ids->vm_id = kid_generate_gen(&vm_tree, &elem, &new); + if (!core->ids->vm_id || !new) { pr_err("Can't make VM id for %d\n", pid); return -1; } new = 0; - core->ids.fs_id = kid_generate_gen(&fs_tree, &elem, &new); - if (!core->ids.fs_id || !new) { + core->ids->fs_id = kid_generate_gen(&fs_tree, &elem, &new); + if (!core->ids->fs_id || !new) { pr_err("Can't make FS id for %d\n", pid); return -1; } new = 0; - core->ids.files_id = kid_generate_gen(&files_tree, &elem, &new); - if (!core->ids.files_id || !new) { + core->ids->files_id = kid_generate_gen(&files_tree, &elem, &new); + if (!core->ids->files_id || !new) { pr_err("Can't make FILES id for %d\n", pid); return -1; } new = 0; - core->ids.sighand_id = kid_generate_gen(&sighand_tree, &elem, &new); - if (!core->ids.sighand_id || !new) { + core->ids->sighand_id = kid_generate_gen(&sighand_tree, &elem, &new); + if (!core->ids->sighand_id || !new) { pr_err("Can't make IO id for %d\n", pid); return -1; } @@ -726,21 +723,114 @@ static int dump_task_kobj_ids(pid_t pid, struct core_entry *core) return 0; } +static void core_entry_free(CoreEntry *core) +{ + if (core) { + if (core->thread_info) { + if (core->thread_info->fpregs) { + xfree(core->thread_info->fpregs->st_space); + xfree(core->thread_info->fpregs->xmm_space); + xfree(core->thread_info->fpregs->padding); + } + xfree(core->thread_info->gpregs); + xfree(core->thread_info->fpregs); + } + xfree(core->thread_info); + xfree(core->tc); + xfree(core->ids); + } +} + +static CoreEntry *core_entry_alloc(int alloc_thread_info, + int alloc_tc, + int alloc_ids) +{ + CoreEntry *core; + ThreadInfoX86 *thread_info; + UserX86RegsEntry *gpregs; + UserX86FpregsEntry *fpregs; + TaskCoreEntry *tc; + TaskKobjIdsEntry *ids; + + core = xmalloc(sizeof(*core)); + if (!core) + return NULL; + core_entry__init(core); + + core->mtype = CORE_ENTRY__MARCH__X86_64; + + if (alloc_thread_info) { + thread_info = xmalloc(sizeof(*thread_info)); + if (!thread_info) + goto err; + thread_info_x86__init(thread_info); + core->thread_info = thread_info; + + gpregs = xmalloc(sizeof(*gpregs)); + if (!gpregs) + goto err; + user_x86_regs_entry__init(gpregs); + thread_info->gpregs = gpregs; + + fpregs = xmalloc(sizeof(*fpregs)); + if (!fpregs) + goto err; + user_x86_fpregs_entry__init(fpregs); + thread_info->fpregs = fpregs; + + /* These are numbers from kernel */ + fpregs->n_st_space = 32; + fpregs->n_xmm_space = 64; + fpregs->n_padding = 24; + + fpregs->st_space = xzalloc(pb_repeated_size(fpregs, st_space)); + fpregs->xmm_space = xzalloc(pb_repeated_size(fpregs, xmm_space)); + fpregs->padding = xzalloc(pb_repeated_size(fpregs, padding)); + + if (!fpregs->st_space || !fpregs->xmm_space || !fpregs->padding) + goto err; + + } + + if (alloc_tc) { + tc = xzalloc(sizeof(*tc) + TASK_COMM_LEN); + if (!tc) + goto err; + task_core_entry__init(tc); + tc->comm = (void *)tc + sizeof(*tc); + core->tc = tc; + } + + if (alloc_ids) { + ids = xmalloc(sizeof(*ids)); + if (!ids) + goto err; + task_kobj_ids_entry__init(ids); + core->ids = ids; + } + + return core; +err: + core_entry_free(core); + return NULL; +} + static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat, const struct parasite_dump_misc *misc, const struct parasite_ctl *ctl, const struct cr_fdset *cr_fdset) { - struct core_entry *core; + int fd_core = fdset_fd(cr_fdset, CR_FD_CORE); + CoreEntry *core; int ret = -1; + core = core_entry_alloc(1, 1, 1); + if (!core) + return -1; + pr_info("\n"); pr_info("Dumping core (pid: %d)\n", pid); pr_info("----------------------------------------\n"); - core = xzalloc(sizeof(*core)); - if (!core) - goto err; - ret = dump_task_kobj_ids(pid, core); if (ret) goto err_free; @@ -753,26 +843,27 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat, if (ret) goto err_free; - ret = get_task_personality(pid, &core->tc.personality); + ret = get_task_personality(pid, &core->tc->personality); if (ret) goto err_free; - strncpy((char *)core->tc.comm, stat->comm, TASK_COMM_LEN); - core->tc.flags = stat->flags; - BUILD_BUG_ON(sizeof(core->tc.blk_sigset) != sizeof(k_rtsigset_t)); - memcpy(&core->tc.blk_sigset, &misc->blocked, sizeof(k_rtsigset_t)); + strncpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN); + core->tc->flags = stat->flags; + BUILD_BUG_ON(sizeof(core->tc->blk_sigset) != sizeof(k_rtsigset_t)); + memcpy(&core->tc->blk_sigset, &misc->blocked, sizeof(k_rtsigset_t)); - core->tc.task_state = TASK_ALIVE; - core->tc.exit_code = 0; + core->tc->task_state = TASK_ALIVE; + core->tc->exit_code = 0; - ret = dump_task_core(core, fdset_fd(cr_fdset, CR_FD_CORE)); - if (ret) + ret = pb_write(fd_core, core, core_entry); + if (ret < 0) { + pr_info("ERROR\n"); goto err_free; - pr_info("OK\n"); + } else + pr_info("OK\n"); err_free: - free(core); -err: + core_entry_free(core); pr_info("----------------------------------------\n"); return ret; @@ -1121,7 +1212,7 @@ try_again: static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid) { - struct core_entry *core; + CoreEntry *core; int ret = -1, fd_core; unsigned int *taddr; pid_t pid = tid->real; @@ -1130,7 +1221,7 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid) pr_info("Dumping core for thread (pid: %d)\n", pid); pr_info("----------------------------------------\n"); - core = xzalloc(sizeof(*core)); + core = core_entry_alloc(1, 0, 0); if (!core) goto err; @@ -1145,22 +1236,19 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid) } pr_info("%d: tid_address=%p\n", pid, taddr); - core->clear_tid_address = (u64) taddr; + core->thread_info->clear_tid_addr = (u64) taddr; pr_info("OK\n"); - core->tc.task_state = TASK_ALIVE; - core->tc.exit_code = 0; - fd_core = open_image(CR_FD_CORE, O_DUMP, tid->virt); if (fd_core < 0) goto err_free; - ret = dump_task_core(core, fd_core); + ret = pb_write(fd_core, core, core_entry); close(fd_core); err_free: - free(core); + core_entry_free(core); err: pr_info("----------------------------------------\n"); return ret; @@ -1169,24 +1257,24 @@ err: static int dump_one_zombie(const struct pstree_item *item, const struct proc_pid_stat *pps) { - struct core_entry *core; + CoreEntry *core; int ret = -1, fd_core; - core = xzalloc(sizeof(*core)); + core = core_entry_alloc(0, 1, 0); if (core == NULL) goto err; - core->tc.task_state = TASK_DEAD; - core->tc.exit_code = pps->exit_code; + core->tc->task_state = TASK_DEAD; + core->tc->exit_code = pps->exit_code; fd_core = open_image(CR_FD_CORE, O_DUMP, item->pid); if (fd_core < 0) goto err_free; - ret = dump_task_core(core, fd_core); + ret = pb_write(fd_core, core, core_entry); close(fd_core); err_free: - xfree(core); + core_entry_free(core); err: return ret; } diff --git a/cr-restore.c b/cr-restore.c index d16bf5f3d..74efc2e70 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -57,7 +57,7 @@ static struct pstree_item *me; static int restore_task_with_children(void *); -static int sigreturn_restore(pid_t pid, struct list_head *vmas, int nr_vmas); +static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *vmas, int nr_vmas); static int shmem_remap(void *old_addr, void *new_addr, unsigned long size) { @@ -186,7 +186,7 @@ static int read_and_open_vmas(int pid, struct list_head *vmas, int *nr_vmas) return ret; } -static int prepare_and_sigreturn(int pid) +static int prepare_and_sigreturn(int pid, CoreEntry *core) { int err, nr_vmas; LIST_HEAD(vma_list); @@ -195,7 +195,7 @@ static int prepare_and_sigreturn(int pid) if (err) return err; - return sigreturn_restore(pid, &vma_list, nr_vmas); + return sigreturn_restore(pid, core, &vma_list, nr_vmas); } static rt_sigaction_t sigchld_act; @@ -276,7 +276,7 @@ static int pstree_wait_helpers() } -static int restore_one_alive_task(int pid) +static int restore_one_alive_task(int pid, CoreEntry *core) { pr_info("Restoring resources\n"); @@ -292,7 +292,7 @@ static int restore_one_alive_task(int pid) if (prepare_sigactions(pid)) return -1; - return prepare_and_sigreturn(pid); + return prepare_and_sigreturn(pid, core); } static void zombie_prepare_signals(void) @@ -391,24 +391,19 @@ static int restore_one_zombie(int pid, int exit_code) return -1; } -static int check_core_header(int pid, struct task_core_entry *tc) +static int check_core_header(int pid, CoreEntry *core) { int fd = -1, ret = -1; - struct image_header hdr; fd = open_image_ro(CR_FD_CORE, pid); if (fd < 0) return -1; - if (read_img(fd, &hdr) < 0) - goto out; - - if (hdr.arch != HEADER_ARCH_X86_64) { - pr_err("Core arch mismatch %d\n", (int)hdr.arch); + if (core->mtype != CORE_ENTRY__MARCH__X86_64) { + pr_err("Core march mismatch %d\n", (int)core->mtype); goto out; } - - ret = read_img(fd, tc); + ret = 0; out: close_safe(&fd); return ret < 0 ? ret : 0; @@ -416,23 +411,43 @@ out: static int restore_one_task(int pid) { - struct task_core_entry tc; + int fd, ret; + CoreEntry *core; if (me->state == TASK_HELPER) return restore_one_fake(pid); - if (check_core_header(pid, &tc)) + fd = open_image_ro(CR_FD_CORE, pid); + if (fd < 0) return -1; - switch ((int)tc.task_state) { - case TASK_ALIVE: - return restore_one_alive_task(pid); - case TASK_DEAD: - return restore_one_zombie(pid, tc.exit_code); - default: - pr_err("Unknown state in code %d\n", (int)tc.task_state); + ret = pb_read(fd, &core, core_entry); + close(fd); + + if (ret < 0) return -1; + + if (check_core_header(pid, core)) { + ret = -1; + goto out; } + + switch ((int)core->tc->task_state) { + case TASK_ALIVE: + ret = restore_one_alive_task(pid, core); + break; + case TASK_DEAD: + ret = restore_one_zombie(pid, core->tc->exit_code); + break; + default: + pr_err("Unknown state in code %d\n", (int)core->tc->task_state); + ret = -1; + break; + } + +out: + core_entry__free_unpacked(core, NULL); + return ret; } /* @@ -1117,7 +1132,7 @@ out: return ret; } -static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) +static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_vmas, int nr_vmas) { long restore_code_len, restore_task_vma_len; long restore_thread_vma_len, self_vmas_len, vmas_len; @@ -1134,7 +1149,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) struct thread_restore_args *thread_args; LIST_HEAD(self_vma_list); - int fd_core = -1; int fd_pages = -1; int i; @@ -1159,12 +1173,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) BUILD_BUG_ON(SHMEMS_SIZE % PAGE_SIZE); BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE); - fd_core = open_image_ro(CR_FD_CORE, pid); - if (fd_core < 0) { - pr_perror("Can't open core-out-%d", pid); - goto err; - } - fd_pages = open_image_ro(CR_FD_PAGES, pid); if (fd_pages < 0) { pr_perror("Can't open pages-%d", pid); @@ -1279,12 +1287,24 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) /* * Arguments for task restoration. */ + + BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64); + task_args->pid = pid; - task_args->fd_core = fd_core; task_args->logfd = log_get_fd(); task_args->sigchld_act = sigchld_act; task_args->fd_pages = fd_pages; + strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm)); + + task_args->clear_tid_addr = core->thread_info->clear_tid_addr; + task_args->ids = *core->ids; + task_args->gpregs = *core->thread_info->gpregs; + task_args->blk_sigset = core->tc->blk_sigset; + + /* No longer need it */ + core_entry__free_unpacked(core, NULL); + ret = prepare_itimers(pid, task_args); if (ret < 0) goto err; @@ -1310,18 +1330,40 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) * Fill up per-thread data. */ for (i = 0; i < me->nr_threads; i++) { + int fd_core; thread_args[i].pid = me->threads[i].virt; /* skip self */ if (thread_args[i].pid == pid) continue; - /* Core files are to be opened */ - thread_args[i].fd_core = open_image_ro(CR_FD_CORE, thread_args[i].pid); - if (thread_args[i].fd_core < 0) + fd_core = open_image_ro(CR_FD_CORE, thread_args[i].pid); + if (fd_core < 0) { + pr_err("Can't open core data for thread %d\n", + thread_args[i].pid); goto err; + } - thread_args[i].rst_lock = &task_args->rst_lock; + ret = pb_read(fd_core, &core, core_entry); + close(fd_core); + + if (core->tc || core->ids) { + pr_err("Thread has optional fields present %d\n", + thread_args[i].pid); + ret = -1; + } + + if (ret < 0) { + pr_err("Can't read core data for thread %d\n", + thread_args[i].pid); + goto err; + } + + thread_args[i].rst_lock = &task_args->rst_lock; + thread_args[i].gpregs = *core->thread_info->gpregs; + thread_args[i].clear_tid_addr = core->thread_info->clear_tid_addr; + + core_entry__free_unpacked(core, NULL); pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p\n", i, thread_args[i].mem_zone.stack, @@ -1334,12 +1376,10 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) pr_info("task_args: %p\n" "task_args->pid: %d\n" - "task_args->fd_core: %d\n" "task_args->nr_threads: %d\n" "task_args->clone_restore_fn: %p\n" "task_args->thread_args: %p\n", task_args, task_args->pid, - task_args->fd_core, task_args->nr_threads, task_args->clone_restore_fn, task_args->thread_args); @@ -1362,7 +1402,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas) err: free_mappings(&self_vma_list); - close_safe(&fd_core); /* Just to be sure */ exit(1); diff --git a/cr-show.c b/cr-show.c index 9703ac29c..47fe3ba07 100644 --- a/cr-show.c +++ b/cr-show.c @@ -40,6 +40,7 @@ #include "protobuf/mm.pb-c.h" #include "protobuf/vma.pb-c.h" #include "protobuf/creds.pb-c.h" +#include "protobuf/core.pb-c.h" #define DEF_PAGES_PER_LINE 6 @@ -51,24 +52,6 @@ #define PR_SYMBOL(sym) \ (isprint(sym) ? sym : '.') -#define pr_regs4(s, n1, n2, n3, n4) \ - pr_msg("%8s: 0x%16lx " \ - "%8s: 0x%16lx " \ - "%8s: 0x%16lx " \ - "%8s: 0x%16lx\n", \ - #n1, s.n1, \ - #n2, s.n2, \ - #n3, s.n3, \ - #n4, s.n4) - -#define pr_regs3(s, n1, n2, n3) \ - pr_msg("%8s: 0x%16lx " \ - "%8s: 0x%16lx " \ - "%8s: 0x%16lx\n", \ - #n1, s.n1, \ - #n2, s.n2, \ - #n3, s.n3) - static LIST_HEAD(pstree_list); void show_files(int fd_files, struct cr_options *o) @@ -370,30 +353,6 @@ void show_pstree(int fd_pstree, struct cr_options *o) show_collect_pstree(fd_pstree, NULL); } -static void show_core_regs(int fd_core) -{ - struct user_regs_entry regs; - - pr_msg("\n\t---[GP registers set]---\n"); - - lseek(fd_core, GET_FILE_OFF(struct core_entry, arch.gpregs), SEEK_SET); - - if (read_img(fd_core, ®s) < 0) - goto err; - - pr_regs4(regs, cs, ip, ds, es); - pr_regs4(regs, ss, sp, fs, gs); - pr_regs4(regs, di, si, dx, cx); - pr_regs4(regs, ax, r8, r9, r10); - pr_regs4(regs, r11, r12, r13, r14); - pr_regs3(regs, r15, bp, bx); - pr_regs4(regs, orig_ax, flags, fs_base, gs_base); - pr_msg("\n"); - -err: - return; -} - static inline char *task_state_str(int state) { switch (state) { @@ -406,68 +365,98 @@ static inline char *task_state_str(int state) } } -static void show_core_rest(int fd_core) +static void show_core_rest(TaskCoreEntry *tc) { - struct task_core_entry tc; + if (!tc) + return; - lseek(fd_core, GET_FILE_OFF(struct core_entry, tc), SEEK_SET); - if (read_img(fd_core, &tc) < 0) - goto err; - - pr_msg("\n\t---[Task parameters]---\n"); - pr_msg("\tPersonality: %#x\n", tc.personality); - pr_msg("\tCommand: %s\n", tc.comm); + pr_msg("\t---[ Task parameters ]---\n"); + pr_msg("\tPersonality: %#x\n", tc->personality); + pr_msg("\tCommand: %s\n", tc->comm); pr_msg("\tState: %d (%s)\n", - (int)tc.task_state, - task_state_str((int)tc.task_state)); + (int)tc->task_state, + task_state_str((int)tc->task_state)); pr_msg("\t Exit code: %u\n", - (unsigned int)tc.exit_code); + (unsigned int)tc->exit_code); - pr_msg("\tBlkSig: 0x%lx\n", tc.blk_sigset); + pr_msg("\tBlkSig: 0x%lx\n", tc->blk_sigset); pr_msg("\n"); - -err: - return; } -static void show_core_ids(int fd) +static void show_core_ids(TaskKobjIdsEntry *ids) { - struct core_ids_entry cie; + if (!ids) + return; - lseek(fd, GET_FILE_OFF(struct core_entry, ids), SEEK_SET); - if (read_img(fd, &cie) < 0) - goto err; + pr_msg("\t---[ Task IDS ]---\n"); + pr_msg("\tVM: %#x\n", ids->vm_id); + pr_msg("\tFS: %#x\n", ids->fs_id); + pr_msg("\tFILES: %#x\n", ids->files_id); + pr_msg("\tSIGHAND: %#x\n", ids->sighand_id); +} - pr_msg("\tVM: %#x\n", cie.vm_id); - pr_msg("\tFS: %#x\n", cie.fs_id); - pr_msg("\tFILES: %#x\n", cie.files_id); - pr_msg("\tSIGHAND: %#x\n", cie.sighand_id); -err: - return; +static void show_core_regs(UserX86RegsEntry *regs) +{ +#define pr_regs4(s, n1, n2, n3, n4) \ + pr_msg("\t%8s: 0x%-16lx " \ + "%8s: 0x%-16lx " \ + "%8s: 0x%-16lx " \ + "%8s: 0x%-16lx\n", \ + #n1, s->n1, \ + #n2, s->n2, \ + #n3, s->n3, \ + #n4, s->n4) + +#define pr_regs3(s, n1, n2, n3) \ + pr_msg("\t%8s: 0x%-16lx " \ + "%8s: 0x%-16lx " \ + "%8s: 0x%-16lx\n", \ + #n1, s->n1, \ + #n2, s->n2, \ + #n3, s->n3) + + pr_msg("\t---[ GP registers set ]---\n"); + + pr_regs4(regs, cs, ip, ds, es); + pr_regs4(regs, ss, sp, fs, gs); + pr_regs4(regs, di, si, dx, cx); + pr_regs4(regs, ax, r8, r9, r10); + pr_regs4(regs, r11, r12, r13, r14); + pr_regs3(regs, r15, bp, bx); + pr_regs4(regs, orig_ax, flags, fs_base, gs_base); + pr_msg("\n"); +} + +void show_thread_info(ThreadInfoX86 *thread_info) +{ + pr_msg("\t---[ Thread info ]---\n"); + pr_msg("\tclear_tid_addr: 0x%lx\n", thread_info->clear_tid_addr); + pr_msg("\n"); + + show_core_regs(thread_info->gpregs); } void show_core(int fd_core, struct cr_options *o) { - struct stat stat; - bool is_thread; + CoreEntry *core; - if (fstat(fd_core, &stat)) { - pr_perror("Can't get stat on core file"); - goto out; + pr_img_head(CR_FD_CORE); + if (pb_read_eof(fd_core, &core, core_entry) > 0) { + + pr_msg("\t---[ General ]---\n"); + pr_msg("\tmtype: 0x%x\n", core->mtype); + pr_msg("\n"); + + /* Continue if we support it */ + if (core->mtype == CORE_ENTRY__MARCH__X86_64) { + show_thread_info(core->thread_info); + show_core_rest(core->tc); + show_core_ids(core->ids); + } + + core_entry__free_unpacked(core, NULL); } - - is_thread = (stat.st_size == GET_FILE_OFF_AFTER(struct core_entry)); - - if (is_thread) - pr_img_head(CR_FD_CORE, " (thread)"); - else - pr_img_head(CR_FD_CORE); - - show_core_regs(fd_core); - show_core_rest(fd_core); - show_core_ids(fd_core); -out: pr_img_tail(CR_FD_CORE); } diff --git a/include/image.h b/include/image.h index 637e59a62..11df0f416 100644 --- a/include/image.h +++ b/include/image.h @@ -87,138 +87,19 @@ struct page_entry { #define CR_CAP_SIZE 2 -#define HEADER_ARCH_X86_64 1 - -struct image_header { - u32 arch; - u32 flags; -} __packed; - -/* - * PTRACE_GETREGS - * PTRACE_GETFPREGS - * PTRACE_GETFPXREGS dep CONFIG_X86_32 - * PTRACE_GET_THREAD_AREA dep CONFIG_X86_32 || CONFIG_IA32_EMULATION - * PTRACE_GETFDPIC dep CONFIG_BINFMT_ELF_FDPIC - * - * PTRACE_ARCH_PRCTL dep CONFIG_X86_64 - * ARCH_SET_GS/ARCH_GET_FS - * ARCH_SET_FS/ARCH_GET_GS - */ - #ifdef CONFIG_X86_64 -struct user_regs_entry { - u64 r15; - u64 r14; - u64 r13; - u64 r12; - u64 bp; - u64 bx; - u64 r11; - u64 r10; - u64 r9; - u64 r8; - u64 ax; - u64 cx; - u64 dx; - u64 si; - u64 di; - u64 orig_ax; - u64 ip; - u64 cs; - u64 flags; - u64 sp; - u64 ss; - u64 fs_base; - u64 gs_base; - u64 ds; - u64 es; - u64 fs; - u64 gs; -} __packed; - -struct desc_struct { - union { - struct { - u32 a; - u32 b; - } x86_32; - u64 base_addr; - }; -} __packed; - -struct user_fpregs_entry { - u16 cwd; - u16 swd; - u16 twd; /* Note this is not the same as - the 32bit/x87/FSAVE twd */ - u16 fop; - u64 rip; - u64 rdp; - u32 mxcsr; - u32 mxcsr_mask; - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - u32 padding[24]; -} __packed; - #define GDT_ENTRY_TLS_ENTRIES 3 #define TASK_COMM_LEN 16 #define TASK_PF_USED_MATH 0x00002000 -#define CKPT_ARCH_SIZE (1 * 4096) - -struct ckpt_arch_entry { - union { - struct { - struct user_regs_entry gpregs; - struct user_fpregs_entry fpregs; - }; - u8 __arch_pad[CKPT_ARCH_SIZE]; /* should be enough for all */ - }; -}; - -#define CKPT_CORE_SIZE (2 * 4096) - #ifdef CONFIG_X86_64 # define AT_VECTOR_SIZE 44 #else # define AT_VECTOR_SIZE 22 /* Not needed at moment */ #endif -struct task_core_entry { - u8 task_state; - u8 pad[3]; - u32 exit_code; - - u32 personality; - u8 comm[TASK_COMM_LEN]; - u32 flags; - u64 blk_sigset; -}; - -struct core_ids_entry { - u32 vm_id; - u32 files_id; - u32 fs_id; - u32 sighand_id; -} __packed; - -struct core_entry { - union { - struct { - struct image_header header; - struct task_core_entry tc; - struct ckpt_arch_entry arch; - struct core_ids_entry ids; - u64 clear_tid_address; - }; - u8 __core_pad[CKPT_CORE_SIZE]; - }; -} __packed; - #define TASK_ALIVE 0x1 #define TASK_DEAD 0x2 #define TASK_STOPPED 0x3 /* FIXME - implement */ diff --git a/include/restorer.h b/include/restorer.h index 1be2168a2..1cf2970e8 100644 --- a/include/restorer.h +++ b/include/restorer.h @@ -14,6 +14,7 @@ #include "../protobuf/mm.pb-c.h" #include "../protobuf/vma.pb-c.h" #include "../protobuf/creds.pb-c.h" +#include "../protobuf/core.pb-c.h" #ifndef CONFIG_X86_64 # error Only x86-64 is supported @@ -60,15 +61,15 @@ struct thread_restore_args { struct restore_mem_zone mem_zone; int pid; - int fd_core; mutex_t *rst_lock; + UserX86RegsEntry gpregs; + u64 clear_tid_addr; } __aligned(sizeof(long)); struct task_restore_core_args { struct restore_mem_zone mem_zone; int pid; /* task pid */ - int fd_core; /* opened core file */ int fd_exe_link; /* opened self->exe file */ int fd_pages; /* opened pages dump file */ int logfd; @@ -95,6 +96,11 @@ struct task_restore_core_args { MmEntry mm; u64 mm_saved_auxv[AT_VECTOR_SIZE]; + u64 clear_tid_addr; + u64 blk_sigset; + char comm[TASK_COMM_LEN]; + TaskKobjIdsEntry ids; + UserX86RegsEntry gpregs; } __aligned(sizeof(long)); struct pt_regs { diff --git a/protobuf/Makefile b/protobuf/Makefile index 0b66c3bd3..1ffa913cb 100644 --- a/protobuf/Makefile +++ b/protobuf/Makefile @@ -51,6 +51,7 @@ PROTO_FILES += ipc-sem.proto PROTO_FILES += utsns.proto PROTO_FILES += creds.proto PROTO_FILES += vma.proto +PROTO_FILES += core.proto HDRS := $(patsubst %.proto,%.pb-c.h,$(PROTO_FILES)) SRCS := $(patsubst %.proto,%.pb-c.c,$(PROTO_FILES)) diff --git a/protobuf/core.proto b/protobuf/core.proto new file mode 100644 index 000000000..4fd57537a --- /dev/null +++ b/protobuf/core.proto @@ -0,0 +1,80 @@ +message user_x86_regs_entry { + required uint64 r15 = 1; + required uint64 r14 = 2; + required uint64 r13 = 3; + required uint64 r12 = 4; + required uint64 bp = 5; + required uint64 bx = 6; + required uint64 r11 = 7; + required uint64 r10 = 8; + required uint64 r9 = 9; + required uint64 r8 = 10; + required uint64 ax = 11; + required uint64 cx = 12; + required uint64 dx = 13; + required uint64 si = 14; + required uint64 di = 15; + required uint64 orig_ax = 16; + required uint64 ip = 17; + required uint64 cs = 18; + required uint64 flags = 19; + required uint64 sp = 20; + required uint64 ss = 21; + required uint64 fs_base = 22; + required uint64 gs_base = 23; + required uint64 ds = 24; + required uint64 es = 25; + required uint64 fs = 26; + required uint64 gs = 27; +} + +message user_x86_fpregs_entry { + required uint32 cwd = 1; + required uint32 swd = 2; + required uint32 twd = 3; + required uint32 fop = 4; + required uint64 rip = 5; + required uint64 rdp = 6; + required uint32 mxcsr = 7; + required uint32 mxcsr_mask = 8; + repeated uint32 st_space = 9; + repeated uint32 xmm_space = 10; + repeated uint32 padding = 11; +} + +message task_core_entry { + required uint32 task_state = 1; + required uint32 exit_code = 2; + + required uint32 personality = 3; + required uint32 flags = 4; + required uint64 blk_sigset = 5; + + required string comm = 6; +} + +message task_kobj_ids_entry { + required uint32 vm_id = 1; + required uint32 files_id = 2; + required uint32 fs_id = 3; + required uint32 sighand_id = 4; +} + +message thread_info_x86 { + required uint64 clear_tid_addr = 1; + required user_x86_regs_entry gpregs = 2; + required user_x86_fpregs_entry fpregs = 3; +} + +message core_entry { + enum march { + UNKNOWN = 0; + X86_64 = 1; + } + + required march mtype = 1; + optional thread_info_x86 thread_info = 2; + + optional task_core_entry tc = 3; + optional task_kobj_ids_entry ids = 4; +} diff --git a/restorer.c b/restorer.c index e20a2abed..dfea61a3b 100644 --- a/restorer.c +++ b/restorer.c @@ -131,7 +131,6 @@ static void restore_creds(CredsEntry *ce) long __export_restore_thread(struct thread_restore_args *args) { long ret = -1; - struct core_entry *core_entry; struct rt_sigframe *rt_sigframe; unsigned long new_sp, fsgs_base; int my_pid = sys_gettid(); @@ -143,23 +142,12 @@ long __export_restore_thread(struct thread_restore_args *args) goto core_restore_end; } - core_entry = (struct core_entry *)&args->mem_zone.heap; - - ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry)); - if (ret != sizeof(*core_entry)) { - write_num_n(__LINE__); - goto core_restore_end; - } - - /* We're to close it! */ - sys_close(args->fd_core); - - sys_set_tid_address((int *) core_entry->clear_tid_address); + sys_set_tid_address((int *)args->clear_tid_addr); rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8; -#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d -#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s +#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d +#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = args->gpregs.s CPREGT1(r8); CPREGT1(r9); @@ -183,7 +171,7 @@ long __export_restore_thread(struct thread_restore_args *args) CPREGT1(gs); CPREGT1(fs); - fsgs_base = core_entry->arch.gpregs.fs_base; + fsgs_base = args->gpregs.fs_base; ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base); if (ret) { write_num_n(__LINE__); @@ -191,7 +179,7 @@ long __export_restore_thread(struct thread_restore_args *args) goto core_restore_end; } - fsgs_base = core_entry->arch.gpregs.gs_base; + fsgs_base = args->gpregs.gs_base; ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base); if (ret) { write_num_n(__LINE__); @@ -294,7 +282,6 @@ static u64 restore_mapping(const VmaEntry *vma_entry) long __export_restore_task(struct task_restore_core_args *args) { long ret = -1; - struct core_entry *core_entry; VmaEntry *vma_entry; u64 va; @@ -310,8 +297,6 @@ long __export_restore_task(struct task_restore_core_args *args) restorer_set_logfd(args->logfd); - core_entry = first_on_heap(core_entry, args->mem_zone.heap); - #if 0 write_hex_n((long)args); write_hex_n((long)args->mem_zone.heap); @@ -319,12 +304,6 @@ long __export_restore_task(struct task_restore_core_args *args) write_hex_n((long)vma_entry); #endif - ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry)); - if (ret != sizeof(*core_entry)) { - write_num_n(__LINE__); - goto core_restore_end; - } - for (vma_entry = args->self_vmas; vma_entry->start != 0; vma_entry++) { if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) continue; @@ -412,7 +391,6 @@ long __export_restore_task(struct task_restore_core_args *args) sys_munmap(args->tgt_vmas, ((void *)(vma_entry + 1) - ((void *)args->tgt_vmas))); - sys_close(args->fd_core); ret = sys_munmap(args->shmems, SHMEMS_SIZE); if (ret < 0) { @@ -421,12 +399,12 @@ long __export_restore_task(struct task_restore_core_args *args) goto core_restore_end; } - sys_set_tid_address((int *) core_entry->clear_tid_address); + sys_set_tid_address((int *)args->clear_tid_addr); /* * Tune up the task fields. */ - ret |= sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0); + ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0); ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0); @@ -462,8 +440,8 @@ long __export_restore_task(struct task_restore_core_args *args) */ rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8; -#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d -#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s +#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d +#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = args->gpregs.s CPREG1(r8); CPREG1(r9); @@ -487,7 +465,7 @@ long __export_restore_task(struct task_restore_core_args *args) CPREG1(gs); CPREG1(fs); - fsgs_base = core_entry->arch.gpregs.fs_base; + fsgs_base = args->gpregs.fs_base; ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base); if (ret) { write_num_n(__LINE__); @@ -495,7 +473,7 @@ long __export_restore_task(struct task_restore_core_args *args) goto core_restore_end; } - fsgs_base = core_entry->arch.gpregs.gs_base; + fsgs_base = args->gpregs.gs_base; ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base); if (ret) { write_num_n(__LINE__); @@ -506,7 +484,7 @@ long __export_restore_task(struct task_restore_core_args *args) /* * Blocked signals. */ - rt_sigframe->uc.uc_sigmask.sig[0] = core_entry->tc.blk_sigset; + rt_sigframe->uc.uc_sigmask.sig[0] = args->blk_sigset; /* * Threads restoration. This requires some more comments. This