2012-01-16 13:53:51 +03:00
|
|
|
#define CR_NOGLIBC
|
2011-10-24 22:23:06 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/stat.h>
|
2012-01-19 01:33:19 +03:00
|
|
|
#include <sys/wait.h>
|
2012-01-24 16:45:19 +04:00
|
|
|
#include <sys/time.h>
|
2012-02-14 20:20:10 +03:00
|
|
|
#include <sys/shm.h>
|
2011-10-24 22:23:06 +04:00
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
2011-11-12 19:26:40 +04:00
|
|
|
#include <sched.h>
|
2011-10-24 22:23:06 +04:00
|
|
|
|
|
|
|
#include "compiler.h"
|
|
|
|
#include "types.h"
|
|
|
|
#include "syscall.h"
|
2012-01-17 11:27:29 +03:00
|
|
|
#include "restorer-log.h"
|
2011-10-24 22:23:06 +04:00
|
|
|
#include "util.h"
|
|
|
|
#include "image.h"
|
|
|
|
|
2011-10-25 21:25:42 +04:00
|
|
|
#include "crtools.h"
|
2011-12-26 20:33:09 +04:00
|
|
|
#include "lock.h"
|
2011-10-24 22:23:06 +04:00
|
|
|
#include "restorer.h"
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
#include "protobuf/creds.pb-c.h"
|
|
|
|
|
2012-02-07 13:29:01 +04:00
|
|
|
#define sys_prctl_safe(opcode, val1, val2, val3) \
|
|
|
|
({ \
|
|
|
|
long __ret = sys_prctl(opcode, val1, val2, val3, 0); \
|
|
|
|
if (__ret) { \
|
|
|
|
write_num_n(__LINE__); \
|
2012-03-24 15:28:36 +04:00
|
|
|
write_num_n(__ret); \
|
2012-02-07 13:29:01 +04:00
|
|
|
} \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static struct task_entries *task_entries;
|
|
|
|
|
|
|
|
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
|
|
|
{
|
|
|
|
write_num(siginfo->si_pid);
|
|
|
|
if (siginfo->si_code & CLD_EXITED)
|
|
|
|
write_string(" exited, status=");
|
|
|
|
else if (siginfo->si_code & CLD_KILLED)
|
|
|
|
write_string(" killed by signal ");
|
|
|
|
write_num_n(siginfo->si_status);
|
|
|
|
|
2012-04-03 00:52:00 +04:00
|
|
|
futex_abort_and_wake(&task_entries->nr_in_progress);
|
2012-01-19 01:33:19 +03:00
|
|
|
/* sa_restorer may be unmaped, so we can't go back to userspace*/
|
|
|
|
sys_kill(sys_getpid(), SIGSTOP);
|
2012-06-13 14:29:00 +04:00
|
|
|
sys_exit_group(1);
|
2012-01-19 01:33:19 +03:00
|
|
|
}
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
static void restore_creds(CredsEntry *ce)
|
2012-01-27 21:43:32 +04:00
|
|
|
{
|
|
|
|
int b, i;
|
|
|
|
struct cap_header hdr;
|
|
|
|
struct cap_data data[_LINUX_CAPABILITY_U32S_3];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We're still root here and thus can do it without failures.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (ce == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First -- set the SECURE_NO_SETUID_FIXUP bit not to
|
|
|
|
* lose caps bits when changing xids.
|
|
|
|
*/
|
|
|
|
|
|
|
|
sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Second -- restore xids. Since we still have the CAP_SETUID
|
|
|
|
* capability nothing should fail. But call the setfsXid last
|
|
|
|
* to override the setresXid settings.
|
|
|
|
*/
|
|
|
|
|
|
|
|
sys_setresuid(ce->uid, ce->euid, ce->suid);
|
|
|
|
sys_setfsuid(ce->fsuid);
|
|
|
|
sys_setresgid(ce->gid, ce->egid, ce->sgid);
|
|
|
|
sys_setfsgid(ce->fsgid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Third -- restore securebits. We don't need them in any
|
|
|
|
* special state any longer.
|
|
|
|
*/
|
|
|
|
|
|
|
|
sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fourth -- trim bset. This can only be done while
|
|
|
|
* having the CAP_SETPCAP capablity.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (b = 0; b < CR_CAP_SIZE; b++) {
|
|
|
|
for (i = 0; i < 32; i++) {
|
|
|
|
if (ce->cap_bnd[b] & (1 << i))
|
|
|
|
/* already set */
|
|
|
|
continue;
|
|
|
|
|
|
|
|
sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fifth -- restore caps. Nothing but cap bits are changed
|
|
|
|
* at this stage, so just do it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
hdr.version = _LINUX_CAPABILITY_VERSION_3;
|
|
|
|
hdr.pid = 0;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE);
|
|
|
|
|
|
|
|
for (i = 0; i < CR_CAP_SIZE; i++) {
|
|
|
|
data[i].eff = ce->cap_eff[i];
|
|
|
|
data[i].prm = ce->cap_prm[i];
|
|
|
|
data[i].inh = ce->cap_inh[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
sys_capset(&hdr, data);
|
|
|
|
}
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
/*
|
2011-11-18 16:09:01 +04:00
|
|
|
* Threads restoration via sigreturn. Note it's locked
|
|
|
|
* routine and calls for unlock at the end.
|
2011-11-16 18:19:24 +04:00
|
|
|
*/
|
2012-04-18 15:44:00 +04:00
|
|
|
long __export_restore_thread(struct thread_restore_args *args)
|
2011-10-27 19:55:42 +04:00
|
|
|
{
|
2011-11-12 19:26:40 +04:00
|
|
|
long ret = -1;
|
2012-02-08 17:00:32 +04:00
|
|
|
struct core_entry *core_entry;
|
|
|
|
struct rt_sigframe *rt_sigframe;
|
|
|
|
unsigned long new_sp, fsgs_base;
|
|
|
|
int my_pid = sys_gettid();
|
2011-10-25 21:25:42 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
if (my_pid != args->pid) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(my_pid);
|
|
|
|
write_num_n(args->pid);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
core_entry = (struct core_entry *)&args->mem_zone.heap;
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
|
|
|
|
if (ret != sizeof(*core_entry)) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We're to close it! */
|
|
|
|
sys_close(args->fd_core);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
sys_set_tid_address((int *) core_entry->clear_tid_address);
|
2012-02-02 13:15:00 +03:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-01-31 23:22:35 +04:00
|
|
|
#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d
|
|
|
|
#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
CPREGT1(r8);
|
|
|
|
CPREGT1(r9);
|
|
|
|
CPREGT1(r10);
|
|
|
|
CPREGT1(r11);
|
|
|
|
CPREGT1(r12);
|
|
|
|
CPREGT1(r13);
|
|
|
|
CPREGT1(r14);
|
|
|
|
CPREGT1(r15);
|
|
|
|
CPREGT2(rdi, di);
|
|
|
|
CPREGT2(rsi, si);
|
|
|
|
CPREGT2(rbp, bp);
|
|
|
|
CPREGT2(rbx, bx);
|
|
|
|
CPREGT2(rdx, dx);
|
|
|
|
CPREGT2(rax, ax);
|
|
|
|
CPREGT2(rcx, cx);
|
|
|
|
CPREGT2(rsp, sp);
|
|
|
|
CPREGT2(rip, ip);
|
|
|
|
CPREGT2(eflags, flags);
|
|
|
|
CPREGT1(cs);
|
|
|
|
CPREGT1(gs);
|
|
|
|
CPREGT1(fs);
|
|
|
|
|
|
|
|
fsgs_base = core_entry->arch.gpregs.fs_base;
|
2012-04-18 01:55:00 +04:00
|
|
|
ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
|
2012-02-08 17:00:32 +04:00
|
|
|
if (ret) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-08 17:00:32 +04:00
|
|
|
fsgs_base = core_entry->arch.gpregs.gs_base;
|
2012-04-18 01:55:00 +04:00
|
|
|
ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
|
2012-02-08 17:00:32 +04:00
|
|
|
if (ret) {
|
2011-11-21 18:00:12 +04:00
|
|
|
write_num_n(__LINE__);
|
2012-02-08 17:00:32 +04:00
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
2012-03-26 19:38:00 +04:00
|
|
|
mutex_unlock(args->rst_lock);
|
2012-02-08 17:00:32 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* FIXME -- threads do not share creds, but it looks like
|
|
|
|
* nobody tries to mess with this crap. That said we should
|
|
|
|
* pass the master thread creds here
|
|
|
|
*/
|
|
|
|
|
|
|
|
restore_creds(NULL);
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
2012-02-08 17:00:32 +04:00
|
|
|
|
|
|
|
write_num(sys_gettid());
|
|
|
|
write_string_n(": Restored");
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
|
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
2012-02-08 17:00:32 +04:00
|
|
|
|
|
|
|
new_sp = (long)rt_sigframe + 8;
|
|
|
|
asm volatile(
|
|
|
|
"movq %0, %%rax \n"
|
|
|
|
"movq %%rax, %%rsp \n"
|
|
|
|
"movl $"__stringify(__NR_rt_sigreturn)", %%eax \n"
|
|
|
|
"syscall \n"
|
|
|
|
:
|
|
|
|
: "r"(new_sp)
|
|
|
|
: "rax","rsp","memory");
|
|
|
|
core_restore_end:
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(sys_getpid());
|
2012-06-13 14:29:00 +04:00
|
|
|
sys_exit_group(1);
|
2012-02-08 17:00:32 +04:00
|
|
|
return -1;
|
2011-10-27 23:15:11 +04:00
|
|
|
}
|
|
|
|
|
2012-02-07 19:32:11 +04:00
|
|
|
static long restore_self_exe_late(struct task_restore_core_args *args)
|
|
|
|
{
|
2012-03-24 13:22:37 +04:00
|
|
|
int fd = args->fd_exe_link;
|
2012-02-07 19:32:11 +04:00
|
|
|
|
2012-03-24 13:22:37 +04:00
|
|
|
write_string("Restoring EXE\n");
|
|
|
|
sys_prctl_safe(PR_SET_MM, PR_SET_MM_EXE_FILE, fd, 0);
|
|
|
|
sys_close(fd);
|
2012-02-07 19:32:11 +04:00
|
|
|
|
2012-03-24 13:22:37 +04:00
|
|
|
/* FIXME Once kernel side stabilized -- fix error reporting */
|
|
|
|
return 0;
|
2012-02-07 19:32:11 +04:00
|
|
|
}
|
|
|
|
|
2012-07-19 12:43:36 +04:00
|
|
|
static u64 restore_mapping(const VmaEntry *vma_entry)
|
2012-02-14 20:20:03 +03:00
|
|
|
{
|
2012-02-17 15:37:21 +04:00
|
|
|
int prot = vma_entry->prot;
|
|
|
|
int flags = vma_entry->flags | MAP_FIXED;
|
2012-02-17 15:51:51 +04:00
|
|
|
u64 addr;
|
2012-02-14 20:20:03 +03:00
|
|
|
|
2012-02-14 20:20:10 +03:00
|
|
|
if (vma_entry_is(vma_entry, VMA_AREA_SYSVIPC))
|
|
|
|
return sys_shmat(vma_entry->fd, (void *)vma_entry->start,
|
|
|
|
(vma_entry->prot & PROT_WRITE) ? 0 : SHM_RDONLY);
|
|
|
|
|
2012-02-17 15:37:21 +04:00
|
|
|
/*
|
|
|
|
* Restore or shared mappings are tricky, since
|
|
|
|
* we open anonymous mapping via map_files/
|
|
|
|
* MAP_ANONYMOUS should be eliminated so fd would
|
|
|
|
* be taken into account by a kernel.
|
|
|
|
*/
|
2012-02-17 15:43:17 +03:00
|
|
|
if (vma_entry_is(vma_entry, VMA_ANON_SHARED) && (vma_entry->fd != -1UL))
|
|
|
|
flags &= ~MAP_ANONYMOUS;
|
2012-02-14 20:20:03 +03:00
|
|
|
|
|
|
|
/* A mapping of file with MAP_SHARED is up to date */
|
|
|
|
if (vma_entry->fd == -1 || !(vma_entry->flags & MAP_SHARED))
|
|
|
|
prot |= PROT_WRITE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Should map memory here. Note we map them as
|
|
|
|
* writable since we're going to restore page
|
|
|
|
* contents.
|
|
|
|
*/
|
2012-02-17 15:51:51 +04:00
|
|
|
addr = sys_mmap((void *)vma_entry->start,
|
2012-02-14 20:20:03 +03:00
|
|
|
vma_entry_len(vma_entry),
|
2012-02-17 15:37:21 +04:00
|
|
|
prot, flags,
|
2012-02-14 20:20:03 +03:00
|
|
|
vma_entry->fd,
|
|
|
|
vma_entry->pgoff);
|
2012-02-17 15:51:51 +04:00
|
|
|
|
|
|
|
if (vma_entry->fd != -1)
|
|
|
|
sys_close(vma_entry->fd);
|
|
|
|
|
|
|
|
return addr;
|
2012-02-14 20:20:03 +03:00
|
|
|
}
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
/*
|
|
|
|
* The main routine to restore task via sigreturn.
|
2012-01-14 21:22:06 +03:00
|
|
|
* This one is very special, we never return there
|
|
|
|
* but use sigreturn facility to restore core registers
|
|
|
|
* and jump execution to some predefined ip read from
|
|
|
|
* core file.
|
2011-11-16 18:19:24 +04:00
|
|
|
*/
|
2012-04-18 15:44:00 +04:00
|
|
|
long __export_restore_task(struct task_restore_core_args *args)
|
2011-10-24 22:23:06 +04:00
|
|
|
{
|
2011-10-25 23:36:03 +04:00
|
|
|
long ret = -1;
|
2012-01-17 13:32:35 +03:00
|
|
|
struct core_entry *core_entry;
|
2012-07-19 12:43:36 +04:00
|
|
|
VmaEntry *vma_entry;
|
2012-01-17 13:32:35 +03:00
|
|
|
u64 va;
|
2011-10-26 00:30:41 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
struct rt_sigframe *rt_sigframe;
|
|
|
|
unsigned long new_sp, fsgs_base;
|
|
|
|
pid_t my_pid = sys_getpid();
|
2012-01-19 01:33:19 +03:00
|
|
|
rt_sigaction_t act;
|
|
|
|
|
|
|
|
task_entries = args->task_entries;
|
2012-04-18 01:55:00 +04:00
|
|
|
sys_sigaction(SIGCHLD, NULL, &act, sizeof(rt_sigset_t));
|
2012-01-19 01:33:19 +03:00
|
|
|
act.rt_sa_handler = sigchld_handler;
|
2012-04-18 01:55:00 +04:00
|
|
|
sys_sigaction(SIGCHLD, &act, NULL, sizeof(rt_sigset_t));
|
2011-11-03 11:58:45 +04:00
|
|
|
|
2012-03-01 18:15:58 +04:00
|
|
|
restorer_set_logfd(args->logfd);
|
2012-01-16 17:31:36 +03:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
core_entry = first_on_heap(core_entry, args->mem_zone.heap);
|
2011-10-26 22:50:46 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
#if 0
|
2012-01-17 13:32:35 +03:00
|
|
|
write_hex_n((long)args);
|
|
|
|
write_hex_n((long)args->mem_zone.heap);
|
|
|
|
write_hex_n((long)core_entry);
|
|
|
|
write_hex_n((long)vma_entry);
|
2011-11-16 18:19:24 +04:00
|
|
|
#endif
|
2011-10-25 23:36:03 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
|
|
|
|
if (ret != sizeof(*core_entry)) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
2012-03-02 19:30:23 +04:00
|
|
|
for (vma_entry = args->self_vmas; vma_entry->start != 0; vma_entry++) {
|
2012-01-17 13:32:35 +03:00
|
|
|
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
|
|
|
|
continue;
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
if (sys_munmap((void *)vma_entry->start, vma_entry_len(vma_entry))) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
goto core_restore_end;
|
2011-10-27 18:59:21 +04:00
|
|
|
}
|
2012-01-17 13:32:35 +03:00
|
|
|
}
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2012-03-02 19:30:23 +04:00
|
|
|
sys_munmap(args->self_vmas,
|
|
|
|
((void *)(vma_entry + 1) - ((void *)args->self_vmas)));
|
2011-10-24 23:56:36 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* OK, lets try to map new one.
|
|
|
|
*/
|
2012-03-27 16:34:00 +04:00
|
|
|
for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
|
2012-01-17 13:32:35 +03:00
|
|
|
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
|
|
|
|
continue;
|
2011-11-08 12:20:28 +04:00
|
|
|
|
2012-02-14 20:20:03 +03:00
|
|
|
va = restore_mapping(vma_entry);
|
2012-01-17 13:32:35 +03:00
|
|
|
|
|
|
|
if (va != vma_entry->start) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_hex_n(vma_entry->start);
|
|
|
|
write_hex_n(vma_entry->end);
|
|
|
|
write_hex_n(vma_entry->prot);
|
|
|
|
write_hex_n(vma_entry->flags);
|
|
|
|
write_hex_n(vma_entry->fd);
|
|
|
|
write_hex_n(vma_entry->pgoff);
|
|
|
|
write_hex_n(va);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
}
|
2012-01-02 17:13:47 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* Read page contents.
|
|
|
|
*/
|
|
|
|
while (1) {
|
2012-03-21 09:45:00 +04:00
|
|
|
ret = sys_read(args->fd_pages, &va, sizeof(va));
|
2012-01-17 13:32:35 +03:00
|
|
|
if (!ret)
|
|
|
|
break;
|
2012-04-05 12:57:29 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
if (ret != sizeof(va)) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
2011-10-27 19:39:08 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 09:45:00 +04:00
|
|
|
ret = sys_read(args->fd_pages, (void *)va, PAGE_SIZE);
|
2012-01-17 13:32:35 +03:00
|
|
|
if (ret != PAGE_SIZE) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-21 09:45:00 +04:00
|
|
|
sys_close(args->fd_pages);
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* Walk though all VMAs again to drop PROT_WRITE
|
|
|
|
* if it was not there.
|
|
|
|
*/
|
2012-03-27 16:34:00 +04:00
|
|
|
for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
|
2012-01-17 13:32:35 +03:00
|
|
|
if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
|
|
|
|
struct shmem_info *entry;
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
entry = find_shmem(args->shmems,
|
|
|
|
vma_entry->shmid);
|
|
|
|
if (entry && entry->pid == my_pid &&
|
|
|
|
entry->start == vma_entry->start)
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&entry->lock, 1);
|
2012-01-17 13:32:35 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (vma_entry->prot & PROT_WRITE)
|
|
|
|
continue;
|
|
|
|
|
2012-04-18 01:55:00 +04:00
|
|
|
sys_mprotect((void *)vma_entry->start,
|
2012-01-17 13:32:35 +03:00
|
|
|
vma_entry_len(vma_entry),
|
|
|
|
vma_entry->prot);
|
|
|
|
}
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
sys_munmap(args->tgt_vmas,
|
|
|
|
((void *)(vma_entry + 1) - ((void *)args->tgt_vmas)));
|
2012-01-17 13:32:35 +03:00
|
|
|
sys_close(args->fd_core);
|
|
|
|
|
|
|
|
ret = sys_munmap(args->shmems, SHMEMS_SIZE);
|
|
|
|
if (ret < 0) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
2012-02-02 13:15:00 +03:00
|
|
|
sys_set_tid_address((int *) core_entry->clear_tid_address);
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* Tune up the task fields.
|
|
|
|
*/
|
2012-02-07 13:29:01 +04:00
|
|
|
ret |= sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0);
|
2012-04-23 15:26:41 +04:00
|
|
|
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA, (long)args->mm.mm_start_data, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA, (long)args->mm.mm_end_data, 0);
|
2012-04-09 14:51:37 +04:00
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK, (long)args->mm.mm_start_stack, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK, (long)args->mm.mm_start_brk, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK, (long)args->mm.mm_brk, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START, (long)args->mm.mm_arg_start, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END, (long)args->mm.mm_arg_end, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START, (long)args->mm.mm_env_start, 0);
|
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END, (long)args->mm.mm_env_end, 0);
|
2012-07-18 20:54:00 +04:00
|
|
|
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV, (long)args->mm_saved_auxv,
|
|
|
|
sizeof(args->mm_saved_auxv));
|
2012-02-07 13:29:01 +04:00
|
|
|
if (ret)
|
|
|
|
goto core_restore_end;
|
2012-01-17 13:32:35 +03:00
|
|
|
|
2012-02-07 19:32:11 +04:00
|
|
|
/*
|
2012-03-19 14:43:00 +04:00
|
|
|
* Because of requirements applied from kernel side
|
|
|
|
* we need to restore /proc/pid/exe symlink late,
|
|
|
|
* after old existing VMAs are superseded with
|
|
|
|
* new ones from image file.
|
2012-02-07 19:32:11 +04:00
|
|
|
*/
|
|
|
|
ret = restore_self_exe_late(args);
|
|
|
|
if (ret)
|
|
|
|
goto core_restore_end;
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* We need to prepare a valid sigframe here, so
|
|
|
|
* after sigreturn the kernel will pick up the
|
|
|
|
* registers from the frame, set them up and
|
|
|
|
* finally pass execution to the new IP.
|
|
|
|
*/
|
|
|
|
rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
|
2011-10-29 01:13:32 +04:00
|
|
|
|
2012-01-22 20:16:33 +04:00
|
|
|
#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d
|
|
|
|
#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s
|
2011-10-29 01:43:43 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
CPREG1(r8);
|
|
|
|
CPREG1(r9);
|
|
|
|
CPREG1(r10);
|
|
|
|
CPREG1(r11);
|
|
|
|
CPREG1(r12);
|
|
|
|
CPREG1(r13);
|
|
|
|
CPREG1(r14);
|
|
|
|
CPREG1(r15);
|
|
|
|
CPREG2(rdi, di);
|
|
|
|
CPREG2(rsi, si);
|
|
|
|
CPREG2(rbp, bp);
|
|
|
|
CPREG2(rbx, bx);
|
|
|
|
CPREG2(rdx, dx);
|
|
|
|
CPREG2(rax, ax);
|
|
|
|
CPREG2(rcx, cx);
|
|
|
|
CPREG2(rsp, sp);
|
|
|
|
CPREG2(rip, ip);
|
|
|
|
CPREG2(eflags, flags);
|
|
|
|
CPREG1(cs);
|
|
|
|
CPREG1(gs);
|
|
|
|
CPREG1(fs);
|
|
|
|
|
2012-01-22 20:16:33 +04:00
|
|
|
fsgs_base = core_entry->arch.gpregs.fs_base;
|
2012-04-18 01:55:00 +04:00
|
|
|
ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
|
2012-01-17 13:32:35 +03:00
|
|
|
if (ret) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
2011-10-29 01:43:43 +04:00
|
|
|
|
2012-01-22 20:16:33 +04:00
|
|
|
fsgs_base = core_entry->arch.gpregs.gs_base;
|
2012-04-18 01:55:00 +04:00
|
|
|
ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
|
2012-01-17 13:32:35 +03:00
|
|
|
if (ret) {
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Blocked signals.
|
|
|
|
*/
|
2012-01-22 20:16:33 +04:00
|
|
|
rt_sigframe->uc.uc_sigmask.sig[0] = core_entry->tc.blk_sigset;
|
2012-01-17 13:32:35 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Threads restoration. This requires some more comments. This
|
|
|
|
* restorer routine and thread restorer routine has the following
|
|
|
|
* memory map, prepared by a caller code.
|
|
|
|
*
|
|
|
|
* | <-- low addresses high addresses --> |
|
|
|
|
* +-------------------------------------------------------+-----------------------+
|
|
|
|
* | this proc body | own stack | heap | rt_sigframe space | thread restore zone |
|
|
|
|
* +-------------------------------------------------------+-----------------------+
|
|
|
|
*
|
|
|
|
* where each thread restore zone is the following
|
|
|
|
*
|
|
|
|
* | <-- low addresses high addresses --> |
|
|
|
|
* +--------------------------------------------------------------------------+
|
|
|
|
* | thread restore proc | thread1 stack | thread1 heap | thread1 rt_sigframe |
|
|
|
|
* +--------------------------------------------------------------------------+
|
|
|
|
*/
|
|
|
|
|
2012-03-03 19:34:59 +03:00
|
|
|
if (args->nr_threads > 1) {
|
2012-01-17 13:32:35 +03:00
|
|
|
struct thread_restore_args *thread_args = args->thread_args;
|
|
|
|
long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND |
|
|
|
|
CLONE_THREAD | CLONE_SYSVSEM;
|
|
|
|
long last_pid_len;
|
|
|
|
long parent_tid;
|
|
|
|
int i, fd;
|
|
|
|
|
2012-01-19 19:18:58 +04:00
|
|
|
fd = sys_open(LAST_PID_PATH, O_RDWR, LAST_PID_PERM);
|
2012-01-17 13:32:35 +03:00
|
|
|
if (fd < 0) {
|
2011-11-21 18:00:12 +04:00
|
|
|
write_num_n(__LINE__);
|
2012-01-17 13:32:35 +03:00
|
|
|
write_num_n(fd);
|
2011-11-07 17:44:24 +04:00
|
|
|
goto core_restore_end;
|
|
|
|
}
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
ret = sys_flock(fd, LOCK_EX);
|
2011-11-07 17:34:59 +04:00
|
|
|
if (ret) {
|
2011-11-21 18:00:12 +04:00
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
2011-11-07 17:34:59 +04:00
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
for (i = 0; i < args->nr_threads; i++) {
|
|
|
|
char last_pid_buf[16];
|
2011-12-02 11:42:41 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/* skip self */
|
|
|
|
if (thread_args[i].pid == args->pid)
|
|
|
|
continue;
|
2011-12-02 11:42:41 +04:00
|
|
|
|
2012-03-26 19:38:00 +04:00
|
|
|
mutex_lock(&args->rst_lock);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
new_sp =
|
|
|
|
RESTORE_ALIGN_STACK((long)thread_args[i].mem_zone.stack,
|
|
|
|
sizeof(thread_args[i].mem_zone.stack));
|
2011-12-02 11:42:41 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
last_pid_len = vprint_num(last_pid_buf, thread_args[i].pid - 1);
|
|
|
|
ret = sys_write(fd, last_pid_buf, last_pid_len - 1);
|
|
|
|
if (ret < 0) {
|
2011-12-02 11:42:41 +04:00
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
2012-01-17 13:32:35 +03:00
|
|
|
write_string_n(last_pid_buf);
|
2011-12-02 11:42:41 +04:00
|
|
|
goto core_restore_end;
|
|
|
|
}
|
2012-01-16 00:53:52 +04:00
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
/*
|
|
|
|
* To achieve functionality like libc's clone()
|
|
|
|
* we need a pure assembly here, because clone()'ed
|
|
|
|
* thread will run with own stack and we must not
|
|
|
|
* have any additional instructions... oh, dear...
|
|
|
|
*/
|
|
|
|
asm volatile(
|
|
|
|
"clone_emul: \n"
|
|
|
|
"movq %2, %%rsi \n"
|
|
|
|
"subq $16, %%rsi \n"
|
|
|
|
"movq %6, %%rdi \n"
|
|
|
|
"movq %%rdi, 8(%%rsi) \n"
|
|
|
|
"movq %5, %%rdi \n"
|
|
|
|
"movq %%rdi, 0(%%rsi) \n"
|
|
|
|
"movq %1, %%rdi \n"
|
|
|
|
"movq %3, %%rdx \n"
|
|
|
|
"movq %4, %%r10 \n"
|
|
|
|
"movl $"__stringify(__NR_clone)", %%eax \n"
|
|
|
|
"syscall \n"
|
|
|
|
|
|
|
|
"testq %%rax,%%rax \n"
|
|
|
|
"jz thread_run \n"
|
|
|
|
|
|
|
|
"movq %%rax, %0 \n"
|
|
|
|
"jmp clone_end \n"
|
|
|
|
|
|
|
|
"thread_run: \n" /* new stack here */
|
|
|
|
"xorq %%rbp, %%rbp \n" /* clear ABI frame pointer */
|
|
|
|
"popq %%rax \n" /* clone_restore_fn -- restore_thread */
|
|
|
|
"popq %%rdi \n" /* arguments */
|
|
|
|
"callq *%%rax \n"
|
|
|
|
|
|
|
|
"clone_end: \n"
|
|
|
|
: "=r"(ret)
|
|
|
|
: "g"(clone_flags),
|
|
|
|
"g"(new_sp),
|
|
|
|
"g"(&parent_tid),
|
|
|
|
"g"(&thread_args[i].pid),
|
|
|
|
"g"(args->clone_restore_fn),
|
|
|
|
"g"(&thread_args[i])
|
|
|
|
: "rax", "rdi", "rsi", "rdx", "r10", "memory");
|
2011-11-12 19:26:40 +04:00
|
|
|
}
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
ret = sys_flock(fd, LOCK_UN);
|
|
|
|
if (ret) {
|
2012-01-16 23:52:15 +03:00
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(ret);
|
|
|
|
goto core_restore_end;
|
|
|
|
}
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
sys_close(fd);
|
|
|
|
}
|
2011-10-29 20:42:45 +04:00
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
/*
|
|
|
|
* Restore creds late to avoid potential problems with
|
|
|
|
* insufficient caps for restoring this or that before
|
|
|
|
*/
|
|
|
|
|
|
|
|
restore_creds(&args->creds);
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&args->task_entries->nr_in_progress);
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-01-17 12:54:30 +03:00
|
|
|
write_num(sys_getpid());
|
|
|
|
write_string_n(": Restored");
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
|
2011-10-29 20:42:45 +04:00
|
|
|
|
2012-04-18 01:55:00 +04:00
|
|
|
sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(rt_sigset_t));
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&args->task_entries->nr_in_progress);
|
2012-02-21 12:10:36 +03:00
|
|
|
|
|
|
|
sys_close(args->logfd);
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
/*
|
|
|
|
* The code that prepared the itimers makes shure the
|
|
|
|
* code below doesn't fail due to bad timing values.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define itimer_armed(args, i) \
|
|
|
|
(args->itimers[i].it_interval.tv_sec || \
|
|
|
|
args->itimers[i].it_interval.tv_usec)
|
|
|
|
|
|
|
|
if (itimer_armed(args, 0))
|
|
|
|
sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL);
|
|
|
|
if (itimer_armed(args, 1))
|
|
|
|
sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL);
|
|
|
|
if (itimer_armed(args, 2))
|
|
|
|
sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL);
|
|
|
|
|
2012-01-17 13:32:35 +03:00
|
|
|
ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
|
|
|
|
if (ret < 0) {
|
2012-02-21 12:10:36 +03:00
|
|
|
ret = ((long)__LINE__ << 32) | -ret;
|
|
|
|
goto core_restore_failed;
|
2012-01-17 13:32:35 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sigframe stack.
|
|
|
|
*/
|
|
|
|
new_sp = (long)rt_sigframe + 8;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare the stack and call for sigreturn,
|
|
|
|
* pure assembly since we don't need any additional
|
|
|
|
* code insns from gcc.
|
|
|
|
*/
|
|
|
|
asm volatile(
|
|
|
|
"movq %0, %%rax \n"
|
|
|
|
"movq %%rax, %%rsp \n"
|
|
|
|
"movl $"__stringify(__NR_rt_sigreturn)", %%eax \n"
|
|
|
|
"syscall \n"
|
|
|
|
:
|
|
|
|
: "r"(new_sp)
|
|
|
|
: "rax","rsp","memory");
|
|
|
|
|
|
|
|
core_restore_end:
|
|
|
|
write_num_n(__LINE__);
|
|
|
|
write_num_n(sys_getpid());
|
2012-06-13 14:29:00 +04:00
|
|
|
sys_exit_group(1);
|
2012-02-08 16:58:30 +04:00
|
|
|
return -1;
|
2012-02-21 12:10:36 +03:00
|
|
|
|
|
|
|
core_restore_failed:
|
|
|
|
asm volatile(
|
|
|
|
"movq %0, %%rsp \n"
|
|
|
|
"movq 0, %%rax \n"
|
|
|
|
"jmp *%%rax \n"
|
|
|
|
:
|
|
|
|
: "r"(ret)
|
|
|
|
: );
|
|
|
|
return ret;
|
2011-10-24 22:23:06 +04:00
|
|
|
}
|