2011-09-23 12:00:45 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/vfs.h>
|
|
|
|
#include <sys/ptrace.h>
|
|
|
|
#include <sys/wait.h>
|
2011-12-01 18:21:17 +04:00
|
|
|
#include <sys/file.h>
|
2012-02-14 20:20:10 +03:00
|
|
|
#include <sys/shm.h>
|
2012-06-19 15:53:00 +04:00
|
|
|
#include <sys/mount.h>
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
#include <sched.h>
|
|
|
|
|
|
|
|
#include <sys/sendfile.h>
|
|
|
|
|
|
|
|
#include "compiler.h"
|
|
|
|
#include "types.h"
|
|
|
|
|
|
|
|
#include "image.h"
|
|
|
|
#include "util.h"
|
2011-12-19 18:52:50 +04:00
|
|
|
#include "log.h"
|
2011-10-26 17:35:50 +04:00
|
|
|
#include "syscall.h"
|
2011-10-24 22:23:06 +04:00
|
|
|
#include "restorer.h"
|
2011-12-26 22:12:03 +04:00
|
|
|
#include "sockets.h"
|
2012-08-09 16:17:41 +04:00
|
|
|
#include "sk-packet.h"
|
2011-12-26 20:33:09 +04:00
|
|
|
#include "lock.h"
|
2012-01-10 18:03:00 +04:00
|
|
|
#include "files.h"
|
2012-06-22 16:24:00 +04:00
|
|
|
#include "files-reg.h"
|
2012-05-03 17:36:00 +04:00
|
|
|
#include "pipes.h"
|
2012-06-26 02:36:13 +04:00
|
|
|
#include "fifo.h"
|
2012-04-28 17:38:46 +04:00
|
|
|
#include "sk-inet.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "eventfd.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "eventpoll.h"
|
2012-08-02 12:26:35 +04:00
|
|
|
#include "signalfd.h"
|
2012-01-13 20:52:35 +04:00
|
|
|
#include "proc_parse.h"
|
2012-01-14 21:22:06 +03:00
|
|
|
#include "restorer-blob.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
#include "crtools.h"
|
2012-01-26 15:27:00 +04:00
|
|
|
#include "namespaces.h"
|
2012-05-03 18:01:05 +04:00
|
|
|
#include "shmem.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "mount.h"
|
|
|
|
#include "inotify.h"
|
2012-06-26 14:51:00 +04:00
|
|
|
#include "pstree.h"
|
2012-08-10 19:14:36 +04:00
|
|
|
#include "net.h"
|
2012-09-12 20:00:54 +04:00
|
|
|
#include "tty.h"
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2012-07-18 16:25:06 +04:00
|
|
|
#include "protobuf.h"
|
|
|
|
#include "protobuf/sa.pb-c.h"
|
2012-07-18 16:27:01 +04:00
|
|
|
#include "protobuf/itimer.pb-c.h"
|
2012-07-19 12:43:36 +04:00
|
|
|
#include "protobuf/vma.pb-c.h"
|
2012-07-18 16:25:06 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
static struct pstree_item *current;
|
2011-11-13 12:57:16 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *);
|
2012-07-19 13:23:01 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *vmas, int nr_vmas);
|
2012-09-14 14:51:40 +04:00
|
|
|
static int prepare_restorer_blob(void);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-17 15:28:13 +03:00
|
|
|
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
|
2011-12-26 21:27:03 +04:00
|
|
|
{
|
2012-03-17 11:47:00 +04:00
|
|
|
void *ret;
|
2011-12-26 21:27:03 +04:00
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
ret = mremap(old_addr, size, size,
|
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, new_addr);
|
|
|
|
if (new_addr != ret) {
|
|
|
|
pr_perror("mremap failed");
|
2011-12-26 21:27:03 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
return 0;
|
2011-12-26 21:27:03 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
static int prepare_shared(void)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 20:30:31 +04:00
|
|
|
int ret = 0;
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *pi;
|
2012-01-26 20:30:31 +04:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
pr_info("Preparing info about shared resources\n");
|
|
|
|
|
2012-05-03 18:01:05 +04:00
|
|
|
if (prepare_shmem_restore())
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-26 21:15:30 +04:00
|
|
|
|
2012-02-22 18:51:27 +04:00
|
|
|
if (prepare_shared_fdinfo())
|
2012-01-11 15:45:00 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-09-14 17:58:46 +04:00
|
|
|
if (prepare_shared_tty())
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
2012-04-03 00:50:50 +04:00
|
|
|
if (collect_reg_files())
|
|
|
|
return -1;
|
|
|
|
|
2012-04-05 20:02:00 +04:00
|
|
|
if (collect_pipes())
|
|
|
|
return -1;
|
|
|
|
|
2012-06-26 02:36:13 +04:00
|
|
|
if (collect_fifo())
|
|
|
|
return -1;
|
|
|
|
|
2012-04-03 00:54:52 +04:00
|
|
|
if (collect_inet_sockets())
|
|
|
|
return -1;
|
|
|
|
|
2012-04-06 19:27:08 +04:00
|
|
|
if (collect_unix_sockets())
|
|
|
|
return -1;
|
2012-04-03 00:58:41 +04:00
|
|
|
|
2012-08-09 16:17:41 +04:00
|
|
|
if (collect_packet_sockets())
|
|
|
|
return -1;
|
|
|
|
|
2012-05-04 13:38:00 +04:00
|
|
|
if (collect_eventfd())
|
|
|
|
return -1;
|
|
|
|
|
2012-05-04 13:38:00 +04:00
|
|
|
if (collect_eventpoll())
|
|
|
|
return -1;
|
|
|
|
|
2012-08-02 12:26:35 +04:00
|
|
|
if (collect_signalfd())
|
|
|
|
return -1;
|
|
|
|
|
2012-05-04 13:38:00 +04:00
|
|
|
if (collect_inotify())
|
|
|
|
return -1;
|
|
|
|
|
2012-09-12 20:00:54 +04:00
|
|
|
if (collect_tty())
|
|
|
|
return -1;
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
for_each_pstree_item(pi) {
|
2012-08-02 15:54:54 +04:00
|
|
|
if (pi->state == TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
ret = prepare_shmem_pid(pi->pid.virt);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
ret = prepare_fd_pid(pi->pid.virt, pi->rst);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2012-09-12 20:00:58 +04:00
|
|
|
|
2012-09-12 20:09:05 +04:00
|
|
|
ret = prepare_ctl_tty(pi->pid.virt, pi->rst, pi->ctl_tty_id);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-09-12 20:11:33 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-04-05 20:02:00 +04:00
|
|
|
mark_pipe_master();
|
2012-09-14 17:50:46 +04:00
|
|
|
|
2012-09-14 17:58:46 +04:00
|
|
|
ret = tty_setup_orphan_slavery(&opts);
|
2012-09-12 20:00:54 +04:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2012-04-06 19:27:08 +04:00
|
|
|
ret = resolve_unix_peers();
|
2012-09-12 20:11:33 +04:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2012-04-05 20:02:00 +04:00
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
ret = prepare_restorer_blob();
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2012-09-12 20:11:33 +04:00
|
|
|
show_saved_shmems();
|
|
|
|
show_saved_files();
|
2012-09-12 20:00:54 +04:00
|
|
|
err:
|
2012-01-26 20:30:31 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
static int read_and_open_vmas(int pid, struct list_head *vmas, int *nr_vmas)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-03-27 16:34:00 +04:00
|
|
|
int fd, ret = -1;
|
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_VMAS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
|
|
|
*nr_vmas = 0;
|
2011-09-23 12:00:45 +04:00
|
|
|
while (1) {
|
2012-03-27 16:34:00 +04:00
|
|
|
struct vma_area *vma;
|
2012-07-19 12:43:36 +04:00
|
|
|
VmaEntry *e;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
ret = -1;
|
|
|
|
vma = alloc_vma_area();
|
|
|
|
if (!vma)
|
|
|
|
break;
|
|
|
|
|
|
|
|
(*nr_vmas)++;
|
|
|
|
list_add_tail(&vma->list, vmas);
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one_eof(fd, &e, PB_VMAS);
|
2012-03-21 19:36:00 +04:00
|
|
|
if (ret <= 0)
|
2012-03-27 16:34:00 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-08-10 10:17:50 +04:00
|
|
|
if (e->fd != -1) {
|
|
|
|
ret = -1;
|
2012-08-11 21:34:35 +04:00
|
|
|
pr_err("Error in vma->fd setting (%Ld)\n",
|
2012-08-10 10:17:50 +04:00
|
|
|
(unsigned long long)e->fd);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-07-19 12:43:36 +04:00
|
|
|
vma->vma = *e;
|
|
|
|
vma_entry__free_unpacked(e, NULL);
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
|
2011-09-23 12:00:45 +04:00
|
|
|
continue;
|
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("Opening 0x%016lx-0x%016lx 0x%016lx vma\n",
|
|
|
|
vma->vma.start, vma->vma.end, vma->vma.pgoff);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
if (vma_entry_is(&vma->vma, VMA_AREA_SYSVIPC))
|
|
|
|
ret = vma->vma.shmid;
|
|
|
|
else if (vma_entry_is(&vma->vma, VMA_ANON_SHARED))
|
|
|
|
ret = get_shmem_fd(pid, &vma->vma);
|
|
|
|
else if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE) ||
|
|
|
|
vma_entry_is(&vma->vma, VMA_FILE_SHARED))
|
|
|
|
ret = get_filemap_fd(pid, &vma->vma);
|
2012-03-21 19:38:00 +04:00
|
|
|
else
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Can't fixup fd\n");
|
2012-03-27 16:34:00 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-21 19:38:00 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
vma->vma.fd = ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-27 16:34:00 +04:00
|
|
|
|
|
|
|
close(fd);
|
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
static int prepare_and_sigreturn(int pid, CoreEntry *core)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-03-27 16:34:00 +04:00
|
|
|
int err, nr_vmas;
|
|
|
|
LIST_HEAD(vma_list);
|
2012-03-21 19:37:00 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
err = read_and_open_vmas(pid, &vma_list, &nr_vmas);
|
2012-02-29 13:39:21 +03:00
|
|
|
if (err)
|
|
|
|
return err;
|
2012-03-21 19:37:00 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
return sigreturn_restore(pid, core, &vma_list, nr_vmas);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static rt_sigaction_t sigchld_act;
|
2011-11-29 15:12:25 +03:00
|
|
|
static int prepare_sigactions(int pid)
|
|
|
|
{
|
2011-12-03 17:24:05 +04:00
|
|
|
rt_sigaction_t act, oact;
|
2012-02-01 15:24:39 +04:00
|
|
|
int fd_sigact;
|
2012-07-18 16:25:06 +04:00
|
|
|
SaEntry *e;
|
2012-05-18 15:39:00 +04:00
|
|
|
int sig;
|
2012-02-01 15:24:39 +04:00
|
|
|
int ret = -1;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2011-12-29 19:56:34 +04:00
|
|
|
fd_sigact = open_image_ro(CR_FD_SIGACT, pid);
|
|
|
|
if (fd_sigact < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-01 17:15:00 +04:00
|
|
|
|
2011-11-29 15:12:25 +03:00
|
|
|
for (sig = 1; sig < SIGMAX; sig++) {
|
|
|
|
if (sig == SIGKILL || sig == SIGSTOP)
|
|
|
|
continue;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd_sigact, &e, PB_SIGACT);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2012-07-18 16:25:06 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_handler, e->sigaction);
|
|
|
|
ASSIGN_TYPED(act.rt_sa_flags, e->flags);
|
|
|
|
ASSIGN_TYPED(act.rt_sa_restorer, e->restorer);
|
|
|
|
ASSIGN_TYPED(act.rt_sa_mask.sig[0], e->mask);
|
|
|
|
|
|
|
|
sa_entry__free_unpacked(e, NULL);
|
2011-12-02 23:17:30 +04:00
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
if (sig == SIGCHLD) {
|
|
|
|
sigchld_act = act;
|
|
|
|
continue;
|
|
|
|
}
|
2011-12-02 23:17:30 +04:00
|
|
|
/*
|
|
|
|
* A pure syscall is used, because glibc
|
|
|
|
* sigaction overwrites se_restorer.
|
|
|
|
*/
|
2012-04-18 01:55:00 +04:00
|
|
|
ret = sys_sigaction(sig, &act, &oact, sizeof(rt_sigset_t));
|
2011-11-29 15:12:25 +03:00
|
|
|
if (ret == -1) {
|
|
|
|
pr_err("%d: Can't restore sigaction: %m\n", pid);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd_sigact);
|
2011-11-29 15:12:25 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
static int pstree_wait_helpers()
|
|
|
|
{
|
|
|
|
struct pstree_item *pi;
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
list_for_each_entry(pi, ¤t->children, list) {
|
2012-06-22 00:38:00 +04:00
|
|
|
int status, ret;
|
|
|
|
|
|
|
|
if (pi->state != TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Check, that a helper completed. */
|
|
|
|
ret = waitpid(pi->pid.virt, &status, 0);
|
|
|
|
if (ret == -1) {
|
|
|
|
if (errno == ECHILD)
|
|
|
|
continue; /* It has been waited in sigchld_handler */
|
|
|
|
pr_err("waitpid(%d) failed\n", pi->pid.virt);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status)) {
|
|
|
|
pr_err("%d exited with non-zero code (%d,%d)", pi->pid.virt,
|
|
|
|
WEXITSTATUS(status), WTERMSIG(status));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
static int restore_one_alive_task(int pid, CoreEntry *core)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("Restoring resources\n");
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
if (pstree_wait_helpers())
|
|
|
|
return -1;
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (prepare_fds(current))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-09 13:41:05 +04:00
|
|
|
if (prepare_fs(pid))
|
|
|
|
return -1;
|
|
|
|
|
2011-11-29 15:12:25 +03:00
|
|
|
if (prepare_sigactions(pid))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2012-09-02 01:02:30 +04:00
|
|
|
log_closedir();
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
return prepare_and_sigreturn(pid, core);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
static void zombie_prepare_signals(void)
|
|
|
|
{
|
|
|
|
sigset_t blockmask;
|
|
|
|
int sig;
|
|
|
|
struct sigaction act;
|
|
|
|
|
|
|
|
sigfillset(&blockmask);
|
|
|
|
sigprocmask(SIG_UNBLOCK, &blockmask, NULL);
|
|
|
|
|
|
|
|
memset(&act, 0, sizeof(act));
|
|
|
|
act.sa_handler = SIG_DFL;
|
|
|
|
|
|
|
|
for (sig = 1; sig < SIGMAX; sig++)
|
|
|
|
sigaction(sig, &act, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define SIG_FATAL_MASK ( \
|
|
|
|
(1 << SIGHUP) |\
|
|
|
|
(1 << SIGINT) |\
|
|
|
|
(1 << SIGQUIT) |\
|
|
|
|
(1 << SIGILL) |\
|
|
|
|
(1 << SIGTRAP) |\
|
|
|
|
(1 << SIGABRT) |\
|
|
|
|
(1 << SIGIOT) |\
|
|
|
|
(1 << SIGBUS) |\
|
|
|
|
(1 << SIGFPE) |\
|
|
|
|
(1 << SIGKILL) |\
|
|
|
|
(1 << SIGUSR1) |\
|
|
|
|
(1 << SIGSEGV) |\
|
|
|
|
(1 << SIGUSR2) |\
|
|
|
|
(1 << SIGPIPE) |\
|
|
|
|
(1 << SIGALRM) |\
|
|
|
|
(1 << SIGTERM) |\
|
|
|
|
(1 << SIGXCPU) |\
|
|
|
|
(1 << SIGXFSZ) |\
|
|
|
|
(1 << SIGVTALRM)|\
|
|
|
|
(1 << SIGPROF) |\
|
|
|
|
(1 << SIGPOLL) |\
|
|
|
|
(1 << SIGIO) |\
|
|
|
|
(1 << SIGSYS) |\
|
|
|
|
(1 << SIGUNUSED)|\
|
|
|
|
(1 << SIGSTKFLT)|\
|
|
|
|
(1 << SIGPWR) \
|
|
|
|
)
|
|
|
|
|
|
|
|
static inline int sig_fatal(int sig)
|
|
|
|
{
|
|
|
|
return (sig > 0) && (sig < SIGMAX) && (SIG_FATAL_MASK & (1 << sig));
|
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
struct task_entries *task_entries;
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
static int restore_one_fake(int pid)
|
|
|
|
{
|
|
|
|
/* We should wait here, otherwise last_pid will be changed. */
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_FORKING);
|
2012-07-02 15:25:00 +04:00
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE_PGID);
|
2012-06-22 00:38:00 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-02-14 14:09:01 +03:00
|
|
|
static int restore_one_zombie(int pid, int exit_code)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
|
|
|
pr_info("Restoring zombie with %d code\n", exit_code);
|
|
|
|
|
|
|
|
if (task_entries != NULL) {
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
zombie_prepare_signals();
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (exit_code & 0x7f) {
|
|
|
|
int signr;
|
|
|
|
|
|
|
|
signr = exit_code & 0x7F;
|
|
|
|
if (!sig_fatal(signr)) {
|
2012-03-01 18:52:42 +04:00
|
|
|
pr_warn("Exit with non fatal signal ignored\n");
|
2012-01-22 20:28:30 +04:00
|
|
|
signr = SIGABRT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kill(pid, signr) < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't kill myself, will just exit");
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
exit_code = 0;
|
|
|
|
}
|
|
|
|
|
2012-01-30 17:04:24 +04:00
|
|
|
exit((exit_code >> 8) & 0x7f);
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
/* never reached */
|
|
|
|
BUG_ON(1);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-07-20 14:18:53 +04:00
|
|
|
static int check_core(int pid, CoreEntry *core)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
2012-02-29 13:39:21 +03:00
|
|
|
int fd = -1, ret = -1;
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_CORE, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
if (core->mtype != CORE_ENTRY__MARCH__X86_64) {
|
|
|
|
pr_err("Core march mismatch %d\n", (int)core->mtype);
|
2012-02-29 13:39:21 +03:00
|
|
|
goto out;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
2012-07-20 14:18:53 +04:00
|
|
|
|
|
|
|
if (!core->tc) {
|
|
|
|
pr_err("Core task state data missed\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!core->ids && core->tc->task_state != TASK_DEAD) {
|
|
|
|
pr_err("Core IDS data missed for non-zombie\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = 0;
|
2012-02-29 13:39:21 +03:00
|
|
|
out:
|
|
|
|
close_safe(&fd);
|
2012-01-22 20:28:30 +04:00
|
|
|
return ret < 0 ? ret : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int restore_one_task(int pid)
|
|
|
|
{
|
2012-07-19 13:23:01 +04:00
|
|
|
int fd, ret;
|
|
|
|
CoreEntry *core;
|
2012-01-22 20:28:30 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->state == TASK_HELPER)
|
2012-06-22 00:38:00 +04:00
|
|
|
return restore_one_fake(pid);
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
fd = open_image_ro(CR_FD_CORE, pid);
|
|
|
|
if (fd < 0)
|
2012-01-22 20:28:30 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &core, PB_CORE);
|
2012-07-19 13:23:01 +04:00
|
|
|
close(fd);
|
|
|
|
|
|
|
|
if (ret < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-07-20 14:18:53 +04:00
|
|
|
if (check_core(pid, core)) {
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch ((int)core->tc->task_state) {
|
2012-01-22 20:28:30 +04:00
|
|
|
case TASK_ALIVE:
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = restore_one_alive_task(pid, core);
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
case TASK_DEAD:
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = restore_one_zombie(pid, core->tc->exit_code);
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
default:
|
2012-07-19 13:23:01 +04:00
|
|
|
pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
|
|
|
|
ret = -1;
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
2012-07-19 13:23:01 +04:00
|
|
|
|
|
|
|
out:
|
|
|
|
core_entry__free_unpacked(core, NULL);
|
|
|
|
return ret;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
2012-08-28 23:19:28 +04:00
|
|
|
/* All arguments should be above stack, because it grows down */
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg {
|
2012-08-28 23:19:28 +04:00
|
|
|
char stack[PAGE_SIZE];
|
|
|
|
char stack_ptr[0];
|
2012-05-31 14:50:00 +04:00
|
|
|
struct pstree_item *item;
|
2012-01-26 15:27:00 +04:00
|
|
|
unsigned long clone_flags;
|
2012-05-31 14:50:00 +04:00
|
|
|
int fd;
|
2012-01-26 15:26:00 +04:00
|
|
|
};
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
static inline int fork_with_pid(struct pstree_item *item, unsigned long ns_clone_flags)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 15:26:00 +04:00
|
|
|
int ret = -1;
|
|
|
|
struct cr_clone_arg ca;
|
2012-06-22 00:38:00 +04:00
|
|
|
pid_t pid = item->pid.virt;
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-04-13 19:44:00 +04:00
|
|
|
pr_info("Forking task with %d pid (flags 0x%lx)\n", pid, ns_clone_flags);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
ca.item = item;
|
2012-01-26 15:27:00 +04:00
|
|
|
ca.clone_flags = ns_clone_flags;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-06-19 15:53:00 +04:00
|
|
|
if (!(ca.clone_flags & CLONE_NEWPID)) {
|
2012-08-14 14:09:20 +04:00
|
|
|
char buf[32];
|
|
|
|
|
|
|
|
ca.fd = open(LAST_PID_PATH, O_RDWR);
|
|
|
|
if (ca.fd < 0) {
|
|
|
|
pr_perror("%d: Can't open %s", pid, LAST_PID_PATH);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flock(ca.fd, LOCK_EX)) {
|
|
|
|
close(ca.fd);
|
|
|
|
pr_perror("%d: Can't lock %s", pid, LAST_PID_PATH);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
snprintf(buf, sizeof(buf), "%d", pid - 1);
|
2012-06-19 15:53:00 +04:00
|
|
|
if (write_img_buf(ca.fd, buf, strlen(buf)))
|
|
|
|
goto err_unlock;
|
2012-08-14 14:09:20 +04:00
|
|
|
} else {
|
|
|
|
ca.fd = -1;
|
2012-06-19 15:53:00 +04:00
|
|
|
BUG_ON(pid != 1);
|
2012-08-14 14:09:20 +04:00
|
|
|
}
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-08-10 19:14:36 +04:00
|
|
|
if (ca.clone_flags & CLONE_NEWNET)
|
|
|
|
/*
|
|
|
|
* When restoring a net namespace we need to communicate
|
|
|
|
* with the original (i.e. -- init) one. Thus, prepare for
|
|
|
|
* that before we leave the existing namespaces.
|
|
|
|
*/
|
|
|
|
if (netns_pre_create())
|
|
|
|
goto err_unlock;
|
|
|
|
|
2012-08-28 23:19:28 +04:00
|
|
|
ret = clone(restore_task_with_children, ca.stack_ptr,
|
2012-06-19 15:53:00 +04:00
|
|
|
ca.clone_flags | SIGCHLD, &ca);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
if (ret < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't fork for %d", pid);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-08-06 18:31:39 +04:00
|
|
|
if (ca.clone_flags & CLONE_NEWPID)
|
|
|
|
item->pid.real = ret;
|
2012-08-14 12:54:00 +04:00
|
|
|
|
|
|
|
if (opts.pidfile && root_item == item) {
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
fd = open(opts.pidfile, O_WRONLY | O_TRUNC | O_CREAT, 0600);
|
|
|
|
if (fd == -1) {
|
|
|
|
pr_perror("Can't open %s", opts.pidfile);
|
|
|
|
kill(ret, SIGKILL);
|
|
|
|
} else {
|
|
|
|
dprintf(fd, "%d", ret);
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-01 18:21:17 +04:00
|
|
|
err_unlock:
|
2012-08-14 14:09:20 +04:00
|
|
|
if (ca.fd >= 0) {
|
|
|
|
if (flock(ca.fd, LOCK_UN))
|
|
|
|
pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-08-14 14:09:20 +04:00
|
|
|
close(ca.fd);
|
|
|
|
}
|
2011-12-01 18:21:17 +04:00
|
|
|
err:
|
2011-09-23 12:00:45 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
|
|
|
{
|
2012-06-22 00:38:00 +04:00
|
|
|
struct pstree_item *pi;
|
|
|
|
pid_t pid = siginfo->si_pid;
|
|
|
|
int status;
|
|
|
|
int exit;
|
|
|
|
|
|
|
|
exit = siginfo->si_code & CLD_EXITED;
|
|
|
|
status = siginfo->si_status;
|
2012-09-05 19:52:55 +04:00
|
|
|
if (!current || status)
|
2012-06-22 00:38:00 +04:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Skip a helper if it was completed successfully */
|
|
|
|
while (pid) {
|
|
|
|
pid = waitpid(-1, &status, WNOHANG);
|
|
|
|
if (pid <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
exit = WIFEXITED(status);
|
|
|
|
status = exit ? WEXITSTATUS(status) : WTERMSIG(status);
|
|
|
|
if (status)
|
|
|
|
break;
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
list_for_each_entry(pi, ¤t->children, list) {
|
2012-06-22 00:38:00 +04:00
|
|
|
if (pi->state != TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
if (pi->pid.virt == siginfo->si_pid)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (&pi->list == ¤t->children)
|
2012-06-22 00:38:00 +04:00
|
|
|
break; /* The process is not a helper */
|
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (exit)
|
|
|
|
pr_err("%d exited, status=%d\n", pid, status);
|
|
|
|
else
|
|
|
|
pr_err("%d killed by signal %d\n", pid, status);
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-04-03 00:52:00 +04:00
|
|
|
futex_abort_and_wake(&task_entries->nr_in_progress);
|
2012-01-19 01:33:19 +03:00
|
|
|
}
|
|
|
|
|
2012-08-11 21:34:35 +04:00
|
|
|
/*
|
2012-04-13 19:06:35 +04:00
|
|
|
* FIXME Din't fail on xid restore failure. MySQL uses runaway
|
|
|
|
* pgid and sid and there's nothing we can do about it yet :(
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void xid_fail(void)
|
|
|
|
{
|
2012-06-22 00:39:00 +04:00
|
|
|
exit(1);
|
2012-04-13 19:06:35 +04:00
|
|
|
}
|
|
|
|
|
2012-04-11 22:11:41 +04:00
|
|
|
static void restore_sid(void)
|
|
|
|
{
|
|
|
|
pid_t sid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SID can only be reset to pid or inherited from parent.
|
|
|
|
* Thus we restore it right here to let our kids inherit
|
|
|
|
* one in case they need it.
|
|
|
|
*
|
|
|
|
* PGIDs are restored late when all tasks are forked and
|
|
|
|
* we can call setpgid() on custom values.
|
|
|
|
*/
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pid.virt == current->sid) {
|
|
|
|
pr_info("Restoring %d to %d sid\n", current->pid.virt, current->sid);
|
2012-04-11 22:11:41 +04:00
|
|
|
sid = setsid();
|
2012-09-05 19:52:55 +04:00
|
|
|
if (sid != current->sid) {
|
2012-04-11 22:11:41 +04:00
|
|
|
pr_perror("Can't restore sid (%d)", sid);
|
2012-04-13 19:06:35 +04:00
|
|
|
xid_fail();
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
} else {
|
2012-06-22 00:39:00 +04:00
|
|
|
sid = getsid(getpid());
|
2012-09-05 19:52:55 +04:00
|
|
|
if (sid != current->sid) {
|
2012-06-22 00:39:00 +04:00
|
|
|
/* Skip the root task if it's not init */
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current == root_item && root_item->pid.virt != 1)
|
2012-06-22 00:39:00 +04:00
|
|
|
return;
|
2012-04-11 22:11:41 +04:00
|
|
|
pr_err("Requested sid %d doesn't match inherited %d\n",
|
2012-09-05 19:52:55 +04:00
|
|
|
current->sid, sid);
|
2012-04-13 19:06:35 +04:00
|
|
|
xid_fail();
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void restore_pgid(void)
|
|
|
|
{
|
|
|
|
pid_t pgid;
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
pr_info("Restoring %d to %d pgid\n", current->pid.virt, current->pgid);
|
2012-04-11 22:11:41 +04:00
|
|
|
|
|
|
|
pgid = getpgrp();
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pgid == pgid)
|
2012-04-11 22:11:41 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
pr_info("\twill call setpgid, mine pgid is %d\n", pgid);
|
2012-09-05 19:52:55 +04:00
|
|
|
if (setpgid(0, current->pgid) != 0) {
|
|
|
|
pr_perror("Can't restore pgid (%d/%d->%d)", current->pid.virt, pgid, current->pgid);
|
2012-04-13 19:06:35 +04:00
|
|
|
xid_fail();
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-06 18:36:59 +04:00
|
|
|
static int mount_proc(void)
|
2012-06-27 20:57:40 +04:00
|
|
|
{
|
2012-08-01 15:01:13 +04:00
|
|
|
int ret;
|
|
|
|
char proc_mountpoint[PATH_MAX];
|
|
|
|
|
|
|
|
snprintf(proc_mountpoint, sizeof(proc_mountpoint), "crtools-proc.XXXXXX");
|
2012-06-27 20:57:40 +04:00
|
|
|
if (mkdtemp(proc_mountpoint) == NULL) {
|
2012-08-06 18:36:59 +04:00
|
|
|
pr_perror("mkdtemp failed %s", proc_mountpoint);
|
|
|
|
return -1;
|
2012-06-27 20:57:40 +04:00
|
|
|
}
|
|
|
|
|
2012-08-01 15:01:13 +04:00
|
|
|
pr_info("Mount procfs in %s\n", proc_mountpoint);
|
2012-08-06 18:36:59 +04:00
|
|
|
if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL, NULL)) {
|
|
|
|
pr_perror("mount failed");
|
|
|
|
ret = -1;
|
|
|
|
goto out_rmdir;
|
2012-06-27 20:57:40 +04:00
|
|
|
}
|
2012-08-06 18:36:59 +04:00
|
|
|
|
|
|
|
ret = set_proc_mountpoint(proc_mountpoint);
|
|
|
|
|
2012-08-01 15:01:13 +04:00
|
|
|
if (umount2(proc_mountpoint, MNT_DETACH) == -1) {
|
2012-08-06 18:36:59 +04:00
|
|
|
pr_perror("Can't umount %s", proc_mountpoint);
|
|
|
|
return -1;
|
2012-08-01 15:01:13 +04:00
|
|
|
}
|
2012-08-06 18:36:59 +04:00
|
|
|
|
|
|
|
out_rmdir:
|
2012-08-01 15:01:13 +04:00
|
|
|
if (rmdir(proc_mountpoint) == -1) {
|
2012-08-06 18:36:59 +04:00
|
|
|
pr_perror("Can't remove %s", proc_mountpoint);
|
|
|
|
return -1;
|
2012-08-01 15:01:13 +04:00
|
|
|
}
|
2012-08-06 18:36:59 +04:00
|
|
|
|
|
|
|
return ret;
|
2012-06-27 20:57:40 +04:00
|
|
|
}
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *_arg)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg *ca = _arg;
|
2012-05-31 14:50:00 +04:00
|
|
|
struct pstree_item *child;
|
2012-04-05 15:34:31 +04:00
|
|
|
pid_t pid;
|
2012-05-31 14:50:00 +04:00
|
|
|
int ret;
|
2011-12-02 16:06:00 +04:00
|
|
|
sigset_t blockmask;
|
2012-01-26 15:26:00 +04:00
|
|
|
|
|
|
|
close_safe(&ca->fd);
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
current = ca->item;
|
2012-05-31 14:50:00 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
pid = getpid();
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pid.virt != pid) {
|
|
|
|
pr_err("Pid %d do not match expected %d\n", pid, current->pid.virt);
|
2012-01-26 15:26:00 +04:00
|
|
|
exit(-1);
|
|
|
|
}
|
2011-12-02 16:06:00 +04:00
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
ret = log_init_by_pid();
|
|
|
|
if (ret < 0)
|
|
|
|
exit(1);
|
|
|
|
|
2012-08-06 18:37:13 +04:00
|
|
|
/* Restore root task */
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->parent == NULL) {
|
2012-07-15 08:43:37 +04:00
|
|
|
if (collect_mount_info())
|
|
|
|
exit(-1);
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (prepare_namespace(current->pid.virt, ca->clone_flags))
|
2012-01-26 15:27:00 +04:00
|
|
|
exit(-1);
|
2012-06-27 20:57:40 +04:00
|
|
|
|
2012-08-01 15:01:13 +04:00
|
|
|
/*
|
|
|
|
* We need non /proc proc mount for restoring pid and mount
|
|
|
|
* namespaces and do not care for the rest of the cases.
|
|
|
|
* Thus -- mount proc at custom location for any new namespace
|
|
|
|
*/
|
2012-08-06 18:36:59 +04:00
|
|
|
if (mount_proc())
|
|
|
|
exit(-1);
|
2012-01-26 15:27:00 +04:00
|
|
|
|
2012-08-02 16:08:06 +04:00
|
|
|
if (prepare_shared())
|
|
|
|
exit(-1);
|
2012-08-06 18:37:13 +04:00
|
|
|
}
|
2012-08-02 16:08:06 +04:00
|
|
|
|
2012-01-20 00:05:22 +04:00
|
|
|
/*
|
|
|
|
* The block mask will be restored in sigresturn.
|
|
|
|
*
|
|
|
|
* TODO: This code should be removed, when a freezer will be added.
|
|
|
|
*/
|
2011-12-02 16:06:00 +04:00
|
|
|
sigfillset(&blockmask);
|
2012-01-19 01:33:19 +03:00
|
|
|
sigdelset(&blockmask, SIGCHLD);
|
2011-12-02 16:06:00 +04:00
|
|
|
ret = sigprocmask(SIG_BLOCK, &blockmask, NULL);
|
|
|
|
if (ret) {
|
2012-09-05 19:52:55 +04:00
|
|
|
pr_perror("%d: Can't block signals", current->pid.virt);
|
2011-12-02 16:06:00 +04:00
|
|
|
exit(1);
|
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
pr_info("Restoring children:\n");
|
2012-09-05 19:52:55 +04:00
|
|
|
list_for_each_entry(child, ¤t->children, list) {
|
2012-06-22 00:39:00 +04:00
|
|
|
if (!restore_before_setsid(child))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
BUG_ON(child->born_sid != -1 && getsid(getpid()) != child->born_sid);
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
ret = fork_with_pid(child, 0);
|
2012-02-10 20:18:08 +04:00
|
|
|
if (ret < 0)
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2012-06-22 00:39:00 +04:00
|
|
|
restore_sid();
|
|
|
|
|
|
|
|
pr_info("Restoring children:\n");
|
2012-09-05 19:52:55 +04:00
|
|
|
list_for_each_entry(child, ¤t->children, list) {
|
2012-06-22 00:39:00 +04:00
|
|
|
if (restore_before_setsid(child))
|
|
|
|
continue;
|
|
|
|
ret = fork_with_pid(child, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pgid == current->pid.virt)
|
2012-07-02 15:25:00 +04:00
|
|
|
restore_pgid();
|
|
|
|
|
2012-07-02 15:25:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_FORKING);
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pgid != current->pid.virt)
|
2012-07-02 15:25:00 +04:00
|
|
|
restore_pgid();
|
2012-07-02 15:25:00 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->state != TASK_HELPER) {
|
2012-06-22 00:39:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
2012-07-02 15:25:00 +04:00
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE_PGID);
|
2012-06-22 00:39:00 +04:00
|
|
|
}
|
2012-04-11 22:06:36 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
return restore_one_task(current->pid.virt);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-05-18 15:39:00 +04:00
|
|
|
int ret;
|
2012-01-27 11:07:11 +04:00
|
|
|
struct sigaction act, old_act;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
ret = sigaction(SIGCHLD, NULL, &act);
|
|
|
|
if (ret < 0) {
|
2012-08-11 21:57:42 +04:00
|
|
|
pr_perror("sigaction() failed\n");
|
2012-01-19 01:33:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
|
2012-01-19 01:33:19 +03:00
|
|
|
act.sa_sigaction = sigchld_handler;
|
2012-06-22 00:38:00 +04:00
|
|
|
sigemptyset(&act.sa_mask);
|
|
|
|
sigaddset(&act.sa_mask, SIGCHLD);
|
|
|
|
|
2012-01-27 11:07:11 +04:00
|
|
|
ret = sigaction(SIGCHLD, &act, &old_act);
|
2012-01-19 01:33:19 +03:00
|
|
|
if (ret < 0) {
|
2012-08-11 21:57:42 +04:00
|
|
|
pr_perror("sigaction() failed\n");
|
2012-01-19 01:33:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
/*
|
|
|
|
* FIXME -- currently we assume that all the tasks live
|
|
|
|
* in the same set of namespaces. This is done to debug
|
|
|
|
* the ns contents dumping/restoring. Need to revisit
|
|
|
|
* this later.
|
|
|
|
*/
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
if (init->pid.virt == 1) {
|
2012-06-22 00:38:00 +04:00
|
|
|
if (!(opts->namespaces_flags & CLONE_NEWPID)) {
|
|
|
|
pr_err("This process tree can be restored in a new pid namespace.\n");
|
|
|
|
pr_err("crtools should be re-executed with --namespace pid\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if (opts->namespaces_flags & CLONE_NEWPID) {
|
|
|
|
pr_err("Can't restore pid namespace without the process init\n");
|
|
|
|
return -1;
|
2012-06-19 15:53:00 +04:00
|
|
|
}
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
ret = fork_with_pid(init, opts->namespaces_flags);
|
2011-09-23 12:00:45 +04:00
|
|
|
if (ret < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-11 22:06:36 +04:00
|
|
|
pr_info("Wait until all tasks are forked\n");
|
|
|
|
futex_wait_while_gt(&task_entries->nr_in_progress, 0);
|
|
|
|
ret = (int)futex_get(&task_entries->nr_in_progress);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
2012-07-02 15:25:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->nr_in_progress, task_entries->nr_tasks);
|
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_RESTORE_PGID);
|
|
|
|
|
|
|
|
pr_info("Wait until all tasks restored pgid\n");
|
|
|
|
futex_wait_while_gt(&task_entries->nr_in_progress, 0);
|
|
|
|
ret = (int)futex_get(&task_entries->nr_in_progress);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
2012-04-11 22:06:36 +04:00
|
|
|
futex_set_and_wake(&task_entries->nr_in_progress, task_entries->nr);
|
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_RESTORE);
|
|
|
|
|
2012-01-27 19:01:51 +03:00
|
|
|
pr_info("Wait until all tasks are restored\n");
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while_gt(&task_entries->nr_in_progress, 0);
|
|
|
|
ret = (int)futex_get(&task_entries->nr_in_progress);
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
out:
|
2012-01-19 01:33:19 +03:00
|
|
|
if (ret < 0) {
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *pi;
|
2012-05-31 14:50:00 +04:00
|
|
|
pr_err("Someone can't be restored\n");
|
2012-04-05 15:34:31 +04:00
|
|
|
|
2012-08-06 18:31:39 +04:00
|
|
|
if (opts->namespaces_flags & CLONE_NEWPID) {
|
|
|
|
/* Kill init */
|
|
|
|
if (root_item->pid.real > 0)
|
|
|
|
kill(root_item->pid.real, SIGKILL);
|
|
|
|
} else {
|
|
|
|
for_each_pstree_item(pi)
|
2012-09-07 18:52:59 +04:00
|
|
|
if (pi->pid.virt > 0)
|
|
|
|
kill(pi->pid.virt, SIGKILL);
|
2012-08-06 18:31:39 +04:00
|
|
|
}
|
2012-01-19 01:33:19 +03:00
|
|
|
return 1;
|
|
|
|
}
|
2012-01-19 01:33:16 +03:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->nr_in_progress, task_entries->nr);
|
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
|
|
|
|
futex_wait_until(&task_entries->nr_in_progress, 0);
|
2012-01-27 11:07:11 +04:00
|
|
|
|
|
|
|
ret = sigaction(SIGCHLD, &old_act, NULL);
|
|
|
|
if (ret < 0) {
|
2012-08-11 21:57:42 +04:00
|
|
|
pr_perror("sigaction() failed\n");
|
2012-01-27 11:07:11 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-04-28 17:38:46 +04:00
|
|
|
tcp_unlock_connections();
|
2012-04-13 17:54:36 +04:00
|
|
|
|
2012-01-16 23:52:15 +03:00
|
|
|
pr_info("Go on!!!\n");
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2012-01-26 15:25:00 +04:00
|
|
|
if (!opts->restore_detach)
|
2012-01-18 23:24:37 +04:00
|
|
|
wait(NULL);
|
2011-09-23 12:00:45 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
static int prepare_task_entries()
|
|
|
|
{
|
|
|
|
task_entries = mmap(NULL, TASK_ENTRIES_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
|
|
|
|
if (task_entries == MAP_FAILED) {
|
|
|
|
pr_perror("Can't map shmem");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
task_entries->nr = 0;
|
|
|
|
task_entries->nr_tasks = 0;
|
2012-07-02 15:25:00 +04:00
|
|
|
task_entries->nr_helpers = 0;
|
2012-06-26 14:51:00 +04:00
|
|
|
futex_set(&task_entries->start, CR_STATE_FORKING);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-19 17:55:34 +04:00
|
|
|
int cr_restore_tasks(pid_t pid, struct cr_options *opts)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-07-19 17:37:25 +04:00
|
|
|
if (check_img_inventory() < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
if (prepare_task_entries() < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
if (prepare_pstree() < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-06-22 00:39:00 +04:00
|
|
|
if (prepare_pstree_ids() < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-07-02 15:25:00 +04:00
|
|
|
futex_set(&task_entries->nr_in_progress, task_entries->nr_tasks + task_entries->nr_helpers);
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
return restore_root_task(root_item, opts);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
|
2012-04-05 14:08:11 +04:00
|
|
|
static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
|
|
|
|
struct list_head *self_vma_list, long vma_len)
|
2011-11-06 01:49:57 +04:00
|
|
|
{
|
2012-04-07 11:09:00 +04:00
|
|
|
struct vma_area *t_vma, *s_vma;
|
2012-04-05 14:08:11 +04:00
|
|
|
long prev_vma_end = 0;
|
2012-04-07 11:09:00 +04:00
|
|
|
struct vma_area end_vma;
|
|
|
|
|
|
|
|
end_vma.vma.start = end_vma.vma.end = TASK_SIZE_MAX;
|
|
|
|
prev_vma_end = PAGE_SIZE;
|
2011-11-06 01:49:57 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we need some heuristics -- the VMA which restorer will
|
|
|
|
* belong to should not be unmapped, so we need to gueess out
|
|
|
|
* where to put it in.
|
|
|
|
*/
|
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
s_vma = list_first_entry(self_vma_list, struct vma_area, list);
|
|
|
|
t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
while (1) {
|
|
|
|
if (prev_vma_end + vma_len > s_vma->vma.start) {
|
|
|
|
if (s_vma->list.next == self_vma_list) {
|
|
|
|
s_vma = &end_vma;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (s_vma == &end_vma)
|
|
|
|
break;
|
|
|
|
if (prev_vma_end < s_vma->vma.end)
|
|
|
|
prev_vma_end = s_vma->vma.end;
|
|
|
|
s_vma = list_entry(s_vma->list.next, struct vma_area, list);
|
|
|
|
continue;
|
|
|
|
}
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
if (prev_vma_end + vma_len > t_vma->vma.start) {
|
|
|
|
if (t_vma->list.next == tgt_vma_list) {
|
|
|
|
t_vma = &end_vma;
|
|
|
|
continue;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-04-07 11:09:00 +04:00
|
|
|
if (t_vma == &end_vma)
|
|
|
|
break;
|
|
|
|
if (prev_vma_end < t_vma->vma.end)
|
|
|
|
prev_vma_end = t_vma->vma.end;
|
|
|
|
t_vma = list_entry(t_vma->list.next, struct vma_area, list);
|
|
|
|
continue;
|
2012-03-02 19:28:13 +04:00
|
|
|
}
|
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
return prev_vma_end;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-04-05 14:08:11 +04:00
|
|
|
|
|
|
|
return -1;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
#define USEC_PER_SEC 1000000L
|
|
|
|
|
|
|
|
static inline int timeval_valid(struct timeval *tv)
|
|
|
|
{
|
|
|
|
return (tv->tv_sec >= 0) && ((unsigned long)tv->tv_usec < USEC_PER_SEC);
|
|
|
|
}
|
|
|
|
|
2012-07-18 16:27:01 +04:00
|
|
|
static inline int itimer_restore_and_fix(char *n, ItimerEntry *ie,
|
2012-01-24 16:45:19 +04:00
|
|
|
struct itimerval *val)
|
|
|
|
{
|
|
|
|
if (ie->isec == 0 && ie->iusec == 0) {
|
|
|
|
memzero_p(val);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
val->it_interval.tv_sec = ie->isec;
|
|
|
|
val->it_interval.tv_usec = ie->iusec;
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_interval)) {
|
|
|
|
pr_err("Invalid timer interval\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ie->vsec == 0 && ie->vusec == 0) {
|
|
|
|
/*
|
|
|
|
* Remaining time was too short. Set it to
|
|
|
|
* interval to make the timer armed and work.
|
|
|
|
*/
|
|
|
|
val->it_value.tv_sec = ie->isec;
|
|
|
|
val->it_value.tv_usec = ie->iusec;
|
|
|
|
} else {
|
|
|
|
val->it_value.tv_sec = ie->vsec;
|
|
|
|
val->it_value.tv_usec = ie->vusec;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_value)) {
|
|
|
|
pr_err("Invalid timer value\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("Restored %s timer to %ld.%ld -> %ld.%ld\n", n,
|
|
|
|
val->it_value.tv_sec, val->it_value.tv_usec,
|
|
|
|
val->it_interval.tv_sec, val->it_interval.tv_usec);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int prepare_itimers(int pid, struct task_restore_core_args *args)
|
|
|
|
{
|
|
|
|
int fd, ret = -1;
|
2012-07-18 16:27:01 +04:00
|
|
|
ItimerEntry *ie;
|
2012-01-24 16:45:19 +04:00
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_ITIMERS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMERS);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = itimer_restore_and_fix("real", ie, &args->itimers[0]);
|
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2012-01-24 16:45:19 +04:00
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMERS);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = itimer_restore_and_fix("virt", ie, &args->itimers[1]);
|
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMERS);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = itimer_restore_and_fix("prof", ie, &args->itimers[2]);
|
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
out:
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-24 16:45:19 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
static inline int verify_cap_size(CredsEntry *ce)
|
|
|
|
{
|
|
|
|
return ((ce->n_cap_inh == CR_CAP_SIZE) && (ce->n_cap_eff == CR_CAP_SIZE) &&
|
|
|
|
(ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE));
|
|
|
|
}
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
static int prepare_creds(int pid, struct task_restore_core_args *args)
|
|
|
|
{
|
|
|
|
int fd, ret;
|
2012-07-19 12:35:25 +04:00
|
|
|
CredsEntry *ce;
|
2012-01-27 21:43:32 +04:00
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_CREDS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &ce, PB_CREDS);
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-27 21:43:32 +04:00
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
if (!verify_cap_size(ce))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
args->creds = *ce;
|
|
|
|
args->creds.cap_inh = args->cap_inh;
|
|
|
|
memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
|
|
|
|
args->creds.cap_eff = args->cap_eff;
|
|
|
|
memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
|
|
|
|
args->creds.cap_prm = args->cap_prm;
|
|
|
|
memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
|
|
|
|
args->creds.cap_bnd = args->cap_bnd;
|
|
|
|
memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
|
|
|
|
|
|
|
|
creds_entry__free_unpacked(ce, NULL);
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
/* XXX -- validate creds here? */
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
return 0;
|
2012-01-27 21:43:32 +04:00
|
|
|
}
|
|
|
|
|
2012-07-19 12:43:36 +04:00
|
|
|
static VmaEntry *vma_list_remap(void *addr, unsigned long len, struct list_head *vmas)
|
2012-03-27 16:31:00 +04:00
|
|
|
{
|
2012-07-19 12:43:36 +04:00
|
|
|
VmaEntry *vma, *ret;
|
2012-03-27 16:31:00 +04:00
|
|
|
struct vma_area *vma_area;
|
|
|
|
|
|
|
|
ret = vma = mmap(addr, len, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
|
|
|
|
if (vma != addr) {
|
|
|
|
pr_perror("Can't remap vma area");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(vma_area, vmas, list) {
|
|
|
|
*vma = vma_area->vma;
|
|
|
|
vma++;
|
|
|
|
}
|
|
|
|
|
|
|
|
vma->start = 0;
|
|
|
|
free_mappings(vmas);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-04-09 14:51:37 +04:00
|
|
|
static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
|
|
|
|
{
|
2012-07-18 20:54:00 +04:00
|
|
|
int fd, exe_fd, ret = -1;
|
|
|
|
MmEntry *mm;
|
2012-04-09 14:51:37 +04:00
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_MM, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
if (pb_read_one(fd, &mm, PB_MM) < 0)
|
2012-04-09 14:51:37 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-07-18 20:54:00 +04:00
|
|
|
args->mm = *mm;
|
|
|
|
args->mm.n_mm_saved_auxv = 0;
|
|
|
|
args->mm.mm_saved_auxv = NULL;
|
|
|
|
|
|
|
|
if (mm->n_mm_saved_auxv != AT_VECTOR_SIZE) {
|
|
|
|
pr_err("Image corrupted on pid %d\n", pid);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(args->mm_saved_auxv, mm->mm_saved_auxv,
|
|
|
|
pb_repeated_size(mm, mm_saved_auxv));
|
|
|
|
|
2012-04-09 15:52:00 +04:00
|
|
|
exe_fd = open_reg_by_id(args->mm.exe_file_id);
|
|
|
|
if (exe_fd < 0)
|
2012-07-18 20:54:00 +04:00
|
|
|
goto out;
|
2012-04-09 15:52:00 +04:00
|
|
|
|
|
|
|
args->fd_exe_link = exe_fd;
|
2012-07-18 20:54:00 +04:00
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
mm_entry__free_unpacked(mm, NULL);
|
2012-04-09 14:51:37 +04:00
|
|
|
close(fd);
|
2012-07-18 20:54:00 +04:00
|
|
|
return ret;
|
2012-04-09 14:51:37 +04:00
|
|
|
}
|
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
static void *restorer;
|
|
|
|
static unsigned long restorer_len;
|
|
|
|
|
|
|
|
static int prepare_restorer_blob(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We map anonymous mapping, not mremap the restorer itself later.
|
|
|
|
* Otherwise the resoter vma would be tied to crtools binary which
|
|
|
|
* in turn will lead to set-exe-file prctl to fail with EBUSY.
|
|
|
|
*/
|
|
|
|
|
|
|
|
restorer_len = round_up(sizeof(restorer_blob), PAGE_SIZE);
|
|
|
|
restorer = mmap(NULL, restorer_len,
|
|
|
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
|
|
|
MAP_PRIVATE | MAP_ANON, 0, 0);
|
|
|
|
if (restorer == MAP_FAILED) {
|
|
|
|
pr_err("Can't map restorer code");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(restorer, &restorer_blob, sizeof(restorer_blob));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int remap_restorer_blob(void *addr)
|
|
|
|
{
|
|
|
|
void *mem;
|
|
|
|
|
|
|
|
mem = mremap(restorer, restorer_len, restorer_len,
|
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, addr);
|
|
|
|
if (mem != addr) {
|
|
|
|
pr_perror("Can't remap restorer blob");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_vmas, int nr_vmas)
|
2011-10-24 22:23:06 +04:00
|
|
|
{
|
2012-09-14 14:51:40 +04:00
|
|
|
long restore_task_vma_len;
|
2012-03-27 16:34:00 +04:00
|
|
|
long restore_thread_vma_len, self_vmas_len, vmas_len;
|
2011-11-16 18:19:24 +04:00
|
|
|
|
2012-03-02 19:29:35 +04:00
|
|
|
void *mem = MAP_FAILED;
|
2011-11-12 19:26:40 +04:00
|
|
|
void *restore_thread_exec_start;
|
|
|
|
void *restore_task_exec_start;
|
2012-01-14 21:22:06 +03:00
|
|
|
void *restore_code_start;
|
2011-11-16 18:19:24 +04:00
|
|
|
|
|
|
|
long new_sp, exec_mem_hint;
|
2011-10-25 21:25:42 +04:00
|
|
|
long ret;
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
struct task_restore_core_args *task_args;
|
|
|
|
struct thread_restore_args *thread_args;
|
|
|
|
|
2011-10-26 22:50:46 +04:00
|
|
|
LIST_HEAD(self_vma_list);
|
2012-03-21 09:45:00 +04:00
|
|
|
int fd_pages = -1;
|
2012-03-02 19:30:23 +04:00
|
|
|
int i;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("Restore via sigreturn\n");
|
2012-01-01 13:12:37 +04:00
|
|
|
|
2011-11-12 19:26:40 +04:00
|
|
|
restore_task_vma_len = 0;
|
2012-01-01 02:32:32 +04:00
|
|
|
restore_thread_vma_len = 0;
|
2011-10-26 22:50:46 +04:00
|
|
|
|
2012-05-10 21:07:00 +04:00
|
|
|
ret = parse_smaps(pid, &self_vma_list, false);
|
2012-06-19 15:53:00 +04:00
|
|
|
close_proc();
|
2012-03-02 19:28:46 +04:00
|
|
|
if (ret < 0)
|
2011-10-26 22:50:46 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-09-07 18:21:04 +04:00
|
|
|
/* required to unmap stack _with_ guard page */
|
|
|
|
mark_stack_vma((long) &self_vma_list, &self_vma_list);
|
|
|
|
|
2012-07-19 12:43:36 +04:00
|
|
|
self_vmas_len = round_up((ret + 1) * sizeof(VmaEntry), PAGE_SIZE);
|
|
|
|
vmas_len = round_up((nr_vmas + 1) * sizeof(VmaEntry), PAGE_SIZE);
|
2012-03-02 19:30:23 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
/* pr_info_vma_list(&self_vma_list); */
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2011-11-12 19:26:40 +04:00
|
|
|
BUILD_BUG_ON(sizeof(struct task_restore_core_args) & 1);
|
|
|
|
BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1);
|
2012-01-17 15:28:13 +03:00
|
|
|
BUILD_BUG_ON(SHMEMS_SIZE % PAGE_SIZE);
|
2012-01-16 23:52:15 +03:00
|
|
|
BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-03-21 09:45:00 +04:00
|
|
|
fd_pages = open_image_ro(CR_FD_PAGES, pid);
|
|
|
|
if (fd_pages < 0) {
|
|
|
|
pr_perror("Can't open pages-%d", pid);
|
|
|
|
goto err;
|
|
|
|
}
|
2011-10-26 22:50:46 +04:00
|
|
|
|
2012-09-13 03:01:48 +04:00
|
|
|
restore_task_vma_len = round_up(sizeof(*task_args), PAGE_SIZE);
|
|
|
|
restore_thread_vma_len = round_up(sizeof(*thread_args) * current->nr_threads, PAGE_SIZE);
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("%d threads require %ldK of memory\n",
|
2012-09-05 19:52:55 +04:00
|
|
|
current->nr_threads,
|
2011-11-24 15:07:03 +04:00
|
|
|
KBYTES(restore_thread_vma_len));
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2011-12-26 21:27:03 +04:00
|
|
|
restore_thread_vma_len = round_up(restore_thread_vma_len, PAGE_SIZE);
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
exec_mem_hint = restorer_get_vma_hint(pid, tgt_vmas, &self_vma_list,
|
2012-09-14 14:51:40 +04:00
|
|
|
restorer_len +
|
2011-11-16 18:19:24 +04:00
|
|
|
restore_task_vma_len +
|
2011-12-26 21:27:03 +04:00
|
|
|
restore_thread_vma_len +
|
2012-09-13 02:56:14 +04:00
|
|
|
self_vmas_len + vmas_len +
|
2012-01-16 23:52:15 +03:00
|
|
|
SHMEMS_SIZE + TASK_ENTRIES_SIZE);
|
2011-11-16 18:19:24 +04:00
|
|
|
if (exec_mem_hint == -1) {
|
2012-01-31 15:31:22 +04:00
|
|
|
pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
|
2011-11-12 19:26:40 +04:00
|
|
|
restore_task_vma_len + restore_thread_vma_len);
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-11-16 18:19:24 +04:00
|
|
|
}
|
2011-10-27 00:57:01 +04:00
|
|
|
|
2012-04-13 19:44:00 +04:00
|
|
|
pr_info("Found bootstrap VMA hint at: 0x%lx (needs ~%ldK)\n", exec_mem_hint,
|
2012-03-02 19:28:13 +04:00
|
|
|
KBYTES(restore_task_vma_len + restore_thread_vma_len));
|
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
ret = remap_restorer_blob((void *)exec_mem_hint);
|
|
|
|
if (ret < 0)
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-10-26 11:16:00 +04:00
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Prepare a memory map for restorer. Note a thread space
|
|
|
|
* might be completely unused so it's here just for convenience.
|
2011-10-26 11:16:00 +04:00
|
|
|
*/
|
2012-09-14 14:51:40 +04:00
|
|
|
restore_code_start = (void *)exec_mem_hint;
|
2012-04-18 15:44:00 +04:00
|
|
|
restore_thread_exec_start = restore_code_start + restorer_blob_offset____export_restore_thread;
|
|
|
|
restore_task_exec_start = restore_code_start + restorer_blob_offset____export_restore_task;
|
2012-09-13 03:01:48 +04:00
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
exec_mem_hint += restorer_len;
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2012-09-13 04:10:48 +04:00
|
|
|
/* VMA we need to run task_restore code */
|
|
|
|
mem = mmap((void *)exec_mem_hint,
|
|
|
|
restore_task_vma_len + restore_thread_vma_len,
|
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
|
|
|
|
if (mem != (void *)exec_mem_hint) {
|
|
|
|
pr_err("Can't mmap section for restore code\n");
|
|
|
|
goto err;
|
|
|
|
}
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2012-09-13 04:10:48 +04:00
|
|
|
memzero(mem, restore_task_vma_len + restore_thread_vma_len);
|
|
|
|
task_args = mem;
|
|
|
|
thread_args = mem + restore_task_vma_len;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Adjust stack.
|
2011-11-12 19:26:40 +04:00
|
|
|
*/
|
2011-11-16 18:19:24 +04:00
|
|
|
new_sp = RESTORE_ALIGN_STACK((long)task_args->mem_zone.stack, sizeof(task_args->mem_zone.stack));
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-10-26 00:30:41 +04:00
|
|
|
/*
|
2012-01-01 13:10:12 +04:00
|
|
|
* Get a reference to shared memory area which is
|
|
|
|
* used to signal if shmem restoration complete
|
|
|
|
* from low-level restore code.
|
|
|
|
*
|
|
|
|
* This shmem area is mapped right after the whole area of
|
|
|
|
* sigreturn rt code. Note we didn't allocated it before
|
|
|
|
* but this area is taken into account for 'hint' memory
|
|
|
|
* address.
|
2011-10-26 00:30:41 +04:00
|
|
|
*/
|
2012-03-02 19:29:35 +04:00
|
|
|
|
2012-09-13 04:10:48 +04:00
|
|
|
mem += restore_task_vma_len + restore_thread_vma_len;
|
2012-05-03 18:01:05 +04:00
|
|
|
ret = shmem_remap(rst_shmems, mem, SHMEMS_SIZE);
|
2012-01-03 13:05:50 +04:00
|
|
|
if (ret < 0)
|
2011-12-26 21:27:03 +04:00
|
|
|
goto err;
|
2012-03-02 19:29:35 +04:00
|
|
|
task_args->shmems = mem;
|
2011-12-26 21:27:03 +04:00
|
|
|
|
2012-03-02 19:29:35 +04:00
|
|
|
mem += SHMEMS_SIZE;
|
|
|
|
ret = shmem_remap(task_entries, mem, TASK_ENTRIES_SIZE);
|
2012-01-16 23:52:15 +03:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
2012-03-02 19:29:35 +04:00
|
|
|
task_args->task_entries = mem;
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2012-03-02 19:30:23 +04:00
|
|
|
mem += TASK_ENTRIES_SIZE;
|
2012-03-27 16:31:00 +04:00
|
|
|
task_args->self_vmas = vma_list_remap(mem, self_vmas_len, &self_vma_list);
|
|
|
|
if (!task_args->self_vmas)
|
2012-03-02 19:30:23 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
mem += self_vmas_len;
|
|
|
|
task_args->tgt_vmas = vma_list_remap(mem, vmas_len, tgt_vmas);
|
|
|
|
if (!task_args->tgt_vmas)
|
|
|
|
goto err;
|
|
|
|
|
2012-01-01 13:10:12 +04:00
|
|
|
/*
|
|
|
|
* Arguments for task restoration.
|
|
|
|
*/
|
2012-07-19 13:23:01 +04:00
|
|
|
|
|
|
|
BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64);
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
task_args->pid = pid;
|
2012-03-01 18:52:42 +04:00
|
|
|
task_args->logfd = log_get_fd();
|
2012-09-03 14:44:09 +04:00
|
|
|
task_args->loglevel = log_get_loglevel();
|
2012-01-19 01:33:19 +03:00
|
|
|
task_args->sigchld_act = sigchld_act;
|
2012-03-21 09:45:00 +04:00
|
|
|
task_args->fd_pages = fd_pages;
|
2011-11-18 16:09:01 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
|
|
|
|
|
|
|
|
task_args->clear_tid_addr = core->thread_info->clear_tid_addr;
|
|
|
|
task_args->ids = *core->ids;
|
|
|
|
task_args->gpregs = *core->thread_info->gpregs;
|
|
|
|
task_args->blk_sigset = core->tc->blk_sigset;
|
|
|
|
|
2012-08-10 20:29:01 +04:00
|
|
|
if (core->thread_core) {
|
|
|
|
task_args->has_futex = true;
|
|
|
|
task_args->futex_rla = core->thread_core->futex_rla;
|
|
|
|
task_args->futex_rla_len = core->thread_core->futex_rla_len;
|
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
/* No longer need it */
|
|
|
|
core_entry__free_unpacked(core, NULL);
|
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
ret = prepare_itimers(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
ret = prepare_creds(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-04-09 14:51:37 +04:00
|
|
|
ret = prepare_mm(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-03-26 19:38:00 +04:00
|
|
|
mutex_init(&task_args->rst_lock);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Now prepare run-time data for threads restore.
|
|
|
|
*/
|
2012-09-05 19:52:55 +04:00
|
|
|
task_args->nr_threads = current->nr_threads;
|
2012-02-10 20:18:08 +04:00
|
|
|
task_args->clone_restore_fn = (void *)restore_thread_exec_start;
|
|
|
|
task_args->thread_args = thread_args;
|
2011-11-03 11:58:45 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Fill up per-thread data.
|
|
|
|
*/
|
2012-09-05 19:52:55 +04:00
|
|
|
for (i = 0; i < current->nr_threads; i++) {
|
2012-07-19 13:23:01 +04:00
|
|
|
int fd_core;
|
2012-09-05 19:52:55 +04:00
|
|
|
thread_args[i].pid = current->threads[i].virt;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/* skip self */
|
|
|
|
if (thread_args[i].pid == pid)
|
|
|
|
continue;
|
2012-01-16 00:54:43 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
fd_core = open_image_ro(CR_FD_CORE, thread_args[i].pid);
|
|
|
|
if (fd_core < 0) {
|
|
|
|
pr_err("Can't open core data for thread %d\n",
|
|
|
|
thread_args[i].pid);
|
2012-02-10 20:18:08 +04:00
|
|
|
goto err;
|
2012-07-19 13:23:01 +04:00
|
|
|
}
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd_core, &core, PB_CORE);
|
2012-07-19 13:23:01 +04:00
|
|
|
close(fd_core);
|
|
|
|
|
|
|
|
if (core->tc || core->ids) {
|
|
|
|
pr_err("Thread has optional fields present %d\n",
|
|
|
|
thread_args[i].pid);
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Can't read core data for thread %d\n",
|
|
|
|
thread_args[i].pid);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
thread_args[i].rst_lock = &task_args->rst_lock;
|
|
|
|
thread_args[i].gpregs = *core->thread_info->gpregs;
|
|
|
|
thread_args[i].clear_tid_addr = core->thread_info->clear_tid_addr;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-08-10 20:29:01 +04:00
|
|
|
if (core->thread_core) {
|
|
|
|
thread_args[i].has_futex = true;
|
|
|
|
thread_args[i].futex_rla = core->thread_core->futex_rla;
|
|
|
|
thread_args[i].futex_rla_len = core->thread_core->futex_rla_len;
|
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
core_entry__free_unpacked(core, NULL);
|
2011-11-17 00:59:08 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p\n",
|
2012-01-31 15:31:22 +04:00
|
|
|
i, thread_args[i].mem_zone.stack,
|
2011-11-16 18:19:24 +04:00
|
|
|
thread_args[i].mem_zone.heap,
|
|
|
|
thread_args[i].mem_zone.rt_sigframe);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2012-03-16 17:24:00 +04:00
|
|
|
close_image_dir();
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
pr_info("task_args: %p\n"
|
|
|
|
"task_args->pid: %d\n"
|
|
|
|
"task_args->nr_threads: %d\n"
|
|
|
|
"task_args->clone_restore_fn: %p\n"
|
|
|
|
"task_args->thread_args: %p\n",
|
|
|
|
task_args, task_args->pid,
|
2012-03-02 19:30:23 +04:00
|
|
|
task_args->nr_threads,
|
|
|
|
task_args->clone_restore_fn,
|
2011-11-16 18:19:24 +04:00
|
|
|
task_args->thread_args);
|
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/*
|
2011-11-12 19:26:40 +04:00
|
|
|
* An indirect call to task_restore, note it never resturns
|
2011-10-26 17:35:50 +04:00
|
|
|
* and restoreing core is extremely destructive.
|
|
|
|
*/
|
2011-10-26 11:16:00 +04:00
|
|
|
asm volatile(
|
2011-11-12 19:26:40 +04:00
|
|
|
"movq %0, %%rbx \n"
|
|
|
|
"movq %1, %%rax \n"
|
2012-01-14 21:22:06 +03:00
|
|
|
"movq %2, %%rdi \n"
|
2011-11-12 19:26:40 +04:00
|
|
|
"movq %%rbx, %%rsp \n"
|
|
|
|
"callq *%%rax \n"
|
2011-10-26 17:35:50 +04:00
|
|
|
:
|
2011-11-16 18:19:24 +04:00
|
|
|
: "g"(new_sp),
|
|
|
|
"g"(restore_task_exec_start),
|
|
|
|
"g"(task_args)
|
|
|
|
: "rsp", "rdi", "rsi", "rbx", "rax", "memory");
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2011-10-26 22:50:46 +04:00
|
|
|
err:
|
2011-11-06 01:49:57 +04:00
|
|
|
free_mappings(&self_vma_list);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/* Just to be sure */
|
2012-01-17 10:56:28 +04:00
|
|
|
exit(1);
|
2012-03-21 19:37:00 +04:00
|
|
|
return -1;
|
2011-10-24 22:23:06 +04:00
|
|
|
}
|