2011-09-23 12:00:45 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
2012-10-11 16:52:52 +04:00
|
|
|
#include <grp.h>
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/vfs.h>
|
|
|
|
#include <sys/wait.h>
|
2011-12-01 18:21:17 +04:00
|
|
|
#include <sys/file.h>
|
2012-02-14 20:20:10 +03:00
|
|
|
#include <sys/shm.h>
|
2012-06-19 15:53:00 +04:00
|
|
|
#include <sys/mount.h>
|
2013-08-07 13:51:35 +04:00
|
|
|
#include <sys/prctl.h>
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
#include <sched.h>
|
|
|
|
|
|
|
|
#include <sys/sendfile.h>
|
|
|
|
|
2014-07-01 19:48:23 +04:00
|
|
|
#include "ptrace.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
#include "compiler.h"
|
2013-01-09 17:02:47 +04:00
|
|
|
#include "asm/types.h"
|
2013-05-24 16:20:19 +04:00
|
|
|
#include "asm/restorer.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-11-06 17:21:11 +04:00
|
|
|
#include "cr_options.h"
|
2013-11-05 20:17:47 +04:00
|
|
|
#include "servicefd.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
#include "image.h"
|
|
|
|
#include "util.h"
|
2013-08-11 20:00:28 +04:00
|
|
|
#include "util-pie.h"
|
2011-12-19 18:52:50 +04:00
|
|
|
#include "log.h"
|
2011-10-26 17:35:50 +04:00
|
|
|
#include "syscall.h"
|
2011-10-24 22:23:06 +04:00
|
|
|
#include "restorer.h"
|
2011-12-26 22:12:03 +04:00
|
|
|
#include "sockets.h"
|
2012-08-09 16:17:41 +04:00
|
|
|
#include "sk-packet.h"
|
2011-12-26 20:33:09 +04:00
|
|
|
#include "lock.h"
|
2012-01-10 18:03:00 +04:00
|
|
|
#include "files.h"
|
2012-06-22 16:24:00 +04:00
|
|
|
#include "files-reg.h"
|
2012-05-03 17:36:00 +04:00
|
|
|
#include "pipes.h"
|
2012-06-26 02:36:13 +04:00
|
|
|
#include "fifo.h"
|
2012-04-28 17:38:46 +04:00
|
|
|
#include "sk-inet.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "eventfd.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "eventpoll.h"
|
2012-08-02 12:26:35 +04:00
|
|
|
#include "signalfd.h"
|
2012-01-13 20:52:35 +04:00
|
|
|
#include "proc_parse.h"
|
2012-01-14 21:22:06 +03:00
|
|
|
#include "restorer-blob.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
#include "crtools.h"
|
2012-01-26 15:27:00 +04:00
|
|
|
#include "namespaces.h"
|
2014-02-03 15:12:08 +04:00
|
|
|
#include "mem.h"
|
2012-05-04 13:38:00 +04:00
|
|
|
#include "mount.h"
|
2013-01-14 20:47:51 +04:00
|
|
|
#include "fsnotify.h"
|
2012-06-26 14:51:00 +04:00
|
|
|
#include "pstree.h"
|
2012-08-10 19:14:36 +04:00
|
|
|
#include "net.h"
|
2012-09-12 20:00:54 +04:00
|
|
|
#include "tty.h"
|
2012-12-21 17:35:36 +04:00
|
|
|
#include "cpu.h"
|
2013-01-17 16:09:34 +08:00
|
|
|
#include "file-lock.h"
|
2013-04-11 17:50:26 +04:00
|
|
|
#include "page-read.h"
|
2013-05-24 01:42:13 +04:00
|
|
|
#include "vdso.h"
|
2013-08-11 13:00:45 +04:00
|
|
|
#include "stats.h"
|
2013-08-23 19:10:15 +04:00
|
|
|
#include "tun.h"
|
2013-11-05 12:33:03 +04:00
|
|
|
#include "vma.h"
|
2013-10-11 17:38:57 +04:00
|
|
|
#include "kerndat.h"
|
2013-11-02 01:05:13 +04:00
|
|
|
#include "rst-malloc.h"
|
2013-12-19 21:35:00 +04:00
|
|
|
#include "plugin.h"
|
2014-05-08 16:37:00 +04:00
|
|
|
#include "cgroup.h"
|
2014-06-30 21:58:05 +04:00
|
|
|
#include "timerfd.h"
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2013-09-23 14:33:34 +04:00
|
|
|
#include "parasite-syscall.h"
|
|
|
|
|
2012-07-18 16:25:06 +04:00
|
|
|
#include "protobuf.h"
|
|
|
|
#include "protobuf/sa.pb-c.h"
|
2013-06-27 23:32:18 +04:00
|
|
|
#include "protobuf/timer.pb-c.h"
|
2012-07-19 12:43:36 +04:00
|
|
|
#include "protobuf/vma.pb-c.h"
|
2013-01-10 20:08:38 +04:00
|
|
|
#include "protobuf/rlimit.pb-c.h"
|
2013-03-12 21:00:05 +04:00
|
|
|
#include "protobuf/pagemap.pb-c.h"
|
2013-03-25 23:39:49 +04:00
|
|
|
#include "protobuf/siginfo.pb-c.h"
|
2012-07-18 16:25:06 +04:00
|
|
|
|
2013-01-09 17:39:23 +04:00
|
|
|
#include "asm/restore.h"
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
static struct pstree_item *current;
|
2011-11-13 12:57:16 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *);
|
2012-11-20 20:39:08 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, CoreEntry *core);
|
2012-09-14 14:51:40 +04:00
|
|
|
static int prepare_restorer_blob(void);
|
2014-03-13 14:30:48 +04:00
|
|
|
static int prepare_rlimits(int pid, CoreEntry *core);
|
2014-04-15 21:59:05 +04:00
|
|
|
static int prepare_posix_timers(int pid, CoreEntry *core);
|
2013-11-03 23:47:51 +04:00
|
|
|
static int prepare_signals(int pid);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-17 15:28:13 +03:00
|
|
|
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
|
2011-12-26 21:27:03 +04:00
|
|
|
{
|
2012-03-17 11:47:00 +04:00
|
|
|
void *ret;
|
2011-12-26 21:27:03 +04:00
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
ret = mremap(old_addr, size, size,
|
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, new_addr);
|
|
|
|
if (new_addr != ret) {
|
|
|
|
pr_perror("mremap failed");
|
2011-12-26 21:27:03 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
return 0;
|
2011-12-26 21:27:03 +04:00
|
|
|
}
|
|
|
|
|
2013-05-28 21:11:13 +04:00
|
|
|
static int crtools_prepare_shared(void)
|
2012-09-17 20:06:06 +04:00
|
|
|
{
|
|
|
|
if (prepare_shared_fdinfo())
|
|
|
|
return -1;
|
|
|
|
|
2013-05-09 10:58:04 -07:00
|
|
|
/* Connections are unlocked from criu */
|
2012-09-17 20:06:06 +04:00
|
|
|
if (collect_inet_sockets())
|
|
|
|
return -1;
|
|
|
|
|
2013-05-28 21:11:13 +04:00
|
|
|
if (tty_prep_fds())
|
2012-10-18 15:51:56 +04:00
|
|
|
return -1;
|
|
|
|
|
2014-05-08 16:55:53 +04:00
|
|
|
if (prepare_cgroup())
|
|
|
|
return -1;
|
|
|
|
|
2012-09-17 20:06:06 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-05 15:26:16 +04:00
|
|
|
/*
|
|
|
|
* Collect order information:
|
|
|
|
* - reg_file should be before remap, as the latter needs
|
|
|
|
* to find file_desc objects
|
|
|
|
* - per-pid collects (mm and fd) should be after remap and
|
|
|
|
* reg_file since both per-pid ones need to get fdesc-s
|
|
|
|
* and bump counters on remaps if they exist
|
|
|
|
*/
|
|
|
|
|
2013-08-21 03:52:18 +04:00
|
|
|
static struct collect_image_info *cinfos[] = {
|
|
|
|
®_file_cinfo,
|
|
|
|
&remap_cinfo,
|
|
|
|
&nsfile_cinfo,
|
|
|
|
&pipe_cinfo,
|
|
|
|
&fifo_cinfo,
|
|
|
|
&unix_sk_cinfo,
|
|
|
|
&packet_sk_cinfo,
|
|
|
|
&netlink_sk_cinfo,
|
|
|
|
&eventfd_cinfo,
|
|
|
|
&epoll_tfd_cinfo,
|
|
|
|
&epoll_cinfo,
|
|
|
|
&signalfd_cinfo,
|
|
|
|
&inotify_cinfo,
|
|
|
|
&inotify_mark_cinfo,
|
|
|
|
&fanotify_cinfo,
|
|
|
|
&fanotify_mark_cinfo,
|
|
|
|
&tty_info_cinfo,
|
|
|
|
&tty_cinfo,
|
2013-08-23 19:10:15 +04:00
|
|
|
&tunfile_cinfo,
|
2013-12-20 16:05:17 +04:00
|
|
|
&ext_file_cinfo,
|
2014-06-30 21:58:05 +04:00
|
|
|
&timerfd_cinfo,
|
2013-08-21 03:52:18 +04:00
|
|
|
};
|
|
|
|
|
2012-09-17 20:06:06 +04:00
|
|
|
static int root_prepare_shared(void)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2013-08-21 03:52:18 +04:00
|
|
|
int ret = 0, i;
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *pi;
|
2012-01-26 20:30:31 +04:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
pr_info("Preparing info about shared resources\n");
|
|
|
|
|
2012-09-14 17:58:46 +04:00
|
|
|
if (prepare_shared_tty())
|
|
|
|
return -1;
|
|
|
|
|
2012-09-17 20:12:58 +04:00
|
|
|
if (prepare_shared_reg_files())
|
|
|
|
return -1;
|
|
|
|
|
2013-08-21 03:52:18 +04:00
|
|
|
for (i = 0; i < ARRAY_SIZE(cinfos); i++) {
|
|
|
|
ret = collect_image(cinfos[i]);
|
|
|
|
if (ret)
|
|
|
|
return -1;
|
|
|
|
}
|
2013-05-18 04:00:05 +04:00
|
|
|
|
2012-04-05 20:02:00 +04:00
|
|
|
if (collect_pipes())
|
|
|
|
return -1;
|
2012-06-26 02:36:13 +04:00
|
|
|
if (collect_fifo())
|
|
|
|
return -1;
|
2012-04-06 19:27:08 +04:00
|
|
|
if (collect_unix_sockets())
|
|
|
|
return -1;
|
2012-04-03 00:58:41 +04:00
|
|
|
|
2013-08-21 03:52:18 +04:00
|
|
|
if (tty_verify_active_pairs())
|
2012-09-12 20:00:54 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
for_each_pstree_item(pi) {
|
2012-08-02 15:54:54 +04:00
|
|
|
if (pi->state == TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
2014-02-03 15:12:08 +04:00
|
|
|
ret = prepare_mm_pid(pi);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-01-11 18:16:25 +04:00
|
|
|
ret = prepare_fd_pid(pi);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2014-07-03 19:07:44 +04:00
|
|
|
|
|
|
|
ret = prepare_fs_pid(pi);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-09-12 20:11:33 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-04-05 20:02:00 +04:00
|
|
|
mark_pipe_master();
|
2012-09-14 17:50:46 +04:00
|
|
|
|
2012-10-15 20:02:29 +04:00
|
|
|
ret = tty_setup_slavery();
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
2012-09-12 20:00:54 +04:00
|
|
|
|
2012-04-06 19:27:08 +04:00
|
|
|
ret = resolve_unix_peers();
|
2012-09-12 20:11:33 +04:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2012-04-05 20:02:00 +04:00
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
ret = prepare_restorer_blob();
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2012-09-12 20:11:33 +04:00
|
|
|
show_saved_shmems();
|
|
|
|
show_saved_files();
|
2012-09-12 20:00:54 +04:00
|
|
|
err:
|
2012-01-26 20:30:31 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2013-04-12 13:00:06 -07:00
|
|
|
/* Map a private vma, if it is not mapped by a parent yet */
|
2014-04-01 21:40:00 +04:00
|
|
|
static int map_private_vma(pid_t pid, struct vma_area *vma, void **tgt_addr,
|
2012-11-20 20:48:23 +04:00
|
|
|
struct vma_area **pvma, struct list_head *pvma_list)
|
|
|
|
{
|
2012-11-20 20:48:28 +04:00
|
|
|
int ret;
|
2012-11-20 20:48:23 +04:00
|
|
|
void *addr, *paddr = NULL;
|
2013-08-26 17:15:00 +04:00
|
|
|
unsigned long nr_pages, size;
|
2012-11-20 20:48:23 +04:00
|
|
|
struct vma_area *p = *pvma;
|
|
|
|
|
2014-01-31 21:08:41 +04:00
|
|
|
if (vma_area_is(vma, VMA_FILE_PRIVATE)) {
|
2014-02-07 13:51:29 +04:00
|
|
|
ret = get_filemap_fd(vma);
|
2012-11-20 20:48:28 +04:00
|
|
|
if (ret < 0) {
|
2013-03-20 13:31:29 +04:00
|
|
|
pr_err("Can't fixup VMA's fd\n");
|
2012-11-20 20:48:28 +04:00
|
|
|
return -1;
|
|
|
|
}
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->fd = ret;
|
2012-11-20 20:48:28 +04:00
|
|
|
}
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
nr_pages = vma_entry_len(vma->e) / PAGE_SIZE;
|
2012-11-20 20:48:33 +04:00
|
|
|
vma->page_bitmap = xzalloc(BITS_TO_LONGS(nr_pages) * sizeof(long));
|
|
|
|
if (vma->page_bitmap == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2014-05-14 01:00:31 +04:00
|
|
|
list_for_each_entry_from(p, pvma_list, list) {
|
2014-02-04 00:08:16 +04:00
|
|
|
if (p->e->start > vma->e->start)
|
2012-11-20 20:48:23 +04:00
|
|
|
break;
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (!vma_priv(p->e))
|
2013-11-22 18:19:23 +04:00
|
|
|
continue;
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (p->e->end != vma->e->end ||
|
|
|
|
p->e->start != vma->e->start)
|
2013-11-22 18:19:23 +04:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Check flags, which must be identical for both vma-s */
|
2014-02-04 00:08:16 +04:00
|
|
|
if ((vma->e->flags ^ p->e->flags) & (MAP_GROWSDOWN | MAP_ANONYMOUS))
|
2013-11-22 18:19:23 +04:00
|
|
|
break;
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (!(vma->e->flags & MAP_ANONYMOUS) &&
|
|
|
|
vma->e->shmid != p->e->shmid)
|
2012-11-20 20:48:23 +04:00
|
|
|
break;
|
|
|
|
|
2013-11-22 18:19:23 +04:00
|
|
|
pr_info("COW 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->start, vma->e->end, vma->e->pgoff);
|
2014-05-14 01:00:13 +04:00
|
|
|
paddr = decode_pointer(p->premmaped_addr);
|
2014-05-14 01:00:22 +04:00
|
|
|
|
|
|
|
break;
|
2012-11-20 20:48:23 +04:00
|
|
|
}
|
|
|
|
|
2013-08-26 17:15:00 +04:00
|
|
|
/*
|
|
|
|
* A grow-down VMA has a guard page, which protect a VMA below it.
|
|
|
|
* So one more page is mapped here to restore content of the first page
|
|
|
|
*/
|
2014-02-04 00:08:16 +04:00
|
|
|
if (vma->e->flags & MAP_GROWSDOWN) {
|
|
|
|
vma->e->start -= PAGE_SIZE;
|
2013-08-26 17:15:00 +04:00
|
|
|
if (paddr)
|
|
|
|
paddr -= PAGE_SIZE;
|
|
|
|
}
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
size = vma_entry_len(vma->e);
|
2012-11-20 20:48:23 +04:00
|
|
|
if (paddr == NULL) {
|
2013-07-25 18:29:03 +04:00
|
|
|
/*
|
|
|
|
* The respective memory area was NOT found in the parent.
|
|
|
|
* Map a new one.
|
|
|
|
*/
|
2013-01-16 19:20:08 +04:00
|
|
|
pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->start, vma->e->end, vma->e->pgoff);
|
2012-11-20 20:48:23 +04:00
|
|
|
|
2014-04-01 21:40:00 +04:00
|
|
|
addr = mmap(*tgt_addr, size,
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->prot | PROT_WRITE,
|
|
|
|
vma->e->flags | MAP_FIXED,
|
|
|
|
vma->e->fd, vma->e->pgoff);
|
2012-11-20 20:48:23 +04:00
|
|
|
|
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
pr_perror("Unable to map ANON_VMA");
|
|
|
|
return -1;
|
|
|
|
}
|
2014-05-14 01:00:31 +04:00
|
|
|
|
|
|
|
*pvma = p;
|
2012-11-20 20:48:23 +04:00
|
|
|
} else {
|
2013-07-25 18:29:03 +04:00
|
|
|
/*
|
|
|
|
* This region was found in parent -- remap it to inherit physical
|
|
|
|
* pages (if any) from it (and COW them later if required).
|
|
|
|
*/
|
2012-11-20 20:48:33 +04:00
|
|
|
vma->ppage_bitmap = p->page_bitmap;
|
|
|
|
|
2013-08-26 17:15:00 +04:00
|
|
|
addr = mremap(paddr, size, size,
|
2014-04-01 21:40:00 +04:00
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, *tgt_addr);
|
|
|
|
if (addr != *tgt_addr) {
|
2012-11-20 20:48:23 +04:00
|
|
|
pr_perror("Unable to remap a private vma");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-05-14 01:00:31 +04:00
|
|
|
*pvma = list_entry(p->list.next, struct vma_area, list);
|
2012-11-20 20:48:23 +04:00
|
|
|
}
|
|
|
|
|
2013-11-22 18:19:08 +04:00
|
|
|
vma->premmaped_addr = (unsigned long) addr;
|
2013-03-25 14:48:31 +04:00
|
|
|
pr_debug("\tpremap 0x%016"PRIx64"-0x%016"PRIx64" -> %016lx\n",
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->start, vma->e->end, (unsigned long)addr);
|
2012-11-20 20:48:23 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (vma->e->flags & MAP_GROWSDOWN) { /* Skip gurad page */
|
|
|
|
vma->e->start += PAGE_SIZE;
|
2013-11-22 18:19:08 +04:00
|
|
|
vma->premmaped_addr += PAGE_SIZE;
|
2013-08-26 17:15:00 +04:00
|
|
|
}
|
|
|
|
|
2014-01-31 21:08:41 +04:00
|
|
|
if (vma_area_is(vma, VMA_FILE_PRIVATE))
|
2014-02-04 00:08:16 +04:00
|
|
|
close(vma->e->fd);
|
2012-11-20 20:48:28 +04:00
|
|
|
|
2014-04-01 21:40:00 +04:00
|
|
|
*tgt_addr += size;
|
|
|
|
return 0;
|
2012-11-20 20:48:23 +04:00
|
|
|
}
|
|
|
|
|
2012-11-20 20:48:30 +04:00
|
|
|
static int restore_priv_vma_content(pid_t pid)
|
|
|
|
{
|
|
|
|
struct vma_area *vma;
|
2013-04-11 17:50:26 +04:00
|
|
|
int ret = 0;
|
2014-02-03 15:12:22 +04:00
|
|
|
struct list_head *vmas = ¤t->rst->vmas.h;
|
2012-11-20 20:48:30 +04:00
|
|
|
|
2012-11-20 20:48:34 +04:00
|
|
|
unsigned int nr_restored = 0;
|
|
|
|
unsigned int nr_shared = 0;
|
|
|
|
unsigned int nr_droped = 0;
|
2014-02-04 14:03:10 +04:00
|
|
|
unsigned int nr_compared = 0;
|
2013-03-25 14:48:55 +04:00
|
|
|
unsigned long va;
|
2013-04-11 17:50:26 +04:00
|
|
|
struct page_read pr;
|
2012-11-20 20:48:34 +04:00
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
vma = list_first_entry(vmas, struct vma_area, list);
|
2014-02-28 15:19:19 +04:00
|
|
|
|
|
|
|
ret = open_page_read(pid, &pr,
|
2014-03-14 10:16:54 +04:00
|
|
|
opts.auto_dedup ? O_RDWR : O_RSTR, false);
|
2014-02-28 15:19:19 +04:00
|
|
|
if (ret)
|
|
|
|
return -1;
|
2013-03-12 21:00:05 +04:00
|
|
|
|
2012-11-20 20:48:30 +04:00
|
|
|
/*
|
|
|
|
* Read page contents.
|
|
|
|
*/
|
|
|
|
while (1) {
|
2013-03-27 13:12:02 +04:00
|
|
|
unsigned long off, i, nr_pages;;
|
2013-04-11 17:50:26 +04:00
|
|
|
struct iovec iov;
|
2012-11-20 20:48:30 +04:00
|
|
|
|
2013-04-11 17:50:26 +04:00
|
|
|
ret = pr.get_pagemap(&pr, &iov);
|
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
2013-03-27 13:12:02 +04:00
|
|
|
|
2013-04-11 17:50:26 +04:00
|
|
|
va = (unsigned long)iov.iov_base;
|
|
|
|
nr_pages = iov.iov_len / PAGE_SIZE;
|
2012-11-20 20:48:30 +04:00
|
|
|
|
2013-03-27 13:12:02 +04:00
|
|
|
for (i = 0; i < nr_pages; i++) {
|
2013-03-12 21:00:05 +04:00
|
|
|
unsigned char buf[PAGE_SIZE];
|
|
|
|
void *p;
|
2012-11-20 20:48:33 +04:00
|
|
|
|
2013-03-25 17:43:07 +04:00
|
|
|
/*
|
|
|
|
* The lookup is over *all* possible VMAs
|
|
|
|
* read from image file.
|
|
|
|
*/
|
2014-02-04 00:08:16 +04:00
|
|
|
while (va >= vma->e->end) {
|
2014-02-03 15:12:22 +04:00
|
|
|
if (vma->list.next == vmas)
|
2013-03-25 14:48:55 +04:00
|
|
|
goto err_addr;
|
2013-03-15 18:24:27 +04:00
|
|
|
vma = list_entry(vma->list.next, struct vma_area, list);
|
|
|
|
}
|
|
|
|
|
2013-03-25 17:43:07 +04:00
|
|
|
/*
|
|
|
|
* Make sure the page address is inside existing VMA
|
|
|
|
* and the VMA it refers to still private one, since
|
|
|
|
* there is no guarantee that the data from pagemap is
|
|
|
|
* valid.
|
|
|
|
*/
|
2014-02-04 00:08:16 +04:00
|
|
|
if (va < vma->e->start)
|
2013-03-25 17:43:07 +04:00
|
|
|
goto err_addr;
|
2014-02-04 00:08:16 +04:00
|
|
|
else if (unlikely(!vma_priv(vma->e))) {
|
2013-03-25 17:43:07 +04:00
|
|
|
pr_err("Trying to restore page for non-private VMA\n");
|
|
|
|
goto err_addr;
|
|
|
|
}
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
off = (va - vma->e->start) / PAGE_SIZE;
|
2013-08-14 11:18:48 +04:00
|
|
|
p = decode_pointer((off) * PAGE_SIZE +
|
2013-11-22 18:19:08 +04:00
|
|
|
vma->premmaped_addr);
|
2013-03-15 18:24:27 +04:00
|
|
|
|
|
|
|
set_bit(off, vma->page_bitmap);
|
2013-08-14 11:18:48 +04:00
|
|
|
if (vma->ppage_bitmap) { /* inherited vma */
|
2013-03-15 18:24:27 +04:00
|
|
|
clear_bit(off, vma->ppage_bitmap);
|
2012-11-20 20:48:33 +04:00
|
|
|
|
2013-08-14 11:18:48 +04:00
|
|
|
ret = pr.read_page(&pr, va, buf);
|
|
|
|
if (ret < 0)
|
2013-12-17 19:45:22 +04:00
|
|
|
goto err_read;
|
2013-08-14 11:18:48 +04:00
|
|
|
va += PAGE_SIZE;
|
|
|
|
|
2014-02-04 14:03:10 +04:00
|
|
|
nr_compared++;
|
|
|
|
|
2013-08-14 11:18:48 +04:00
|
|
|
if (memcmp(p, buf, PAGE_SIZE) == 0) {
|
|
|
|
nr_shared++; /* the page is cowed */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(p, buf, PAGE_SIZE);
|
|
|
|
} else {
|
|
|
|
ret = pr.read_page(&pr, va, p);
|
|
|
|
if (ret < 0)
|
2013-12-17 19:45:22 +04:00
|
|
|
goto err_read;
|
2013-08-14 11:18:48 +04:00
|
|
|
va += PAGE_SIZE;
|
2013-03-12 21:00:05 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
nr_restored++;
|
2012-11-20 20:48:34 +04:00
|
|
|
}
|
2013-04-11 17:50:26 +04:00
|
|
|
|
|
|
|
if (pr.put_pagemap)
|
|
|
|
pr.put_pagemap(&pr);
|
2012-11-20 20:48:30 +04:00
|
|
|
}
|
2013-04-11 17:50:26 +04:00
|
|
|
|
2013-12-17 19:45:22 +04:00
|
|
|
err_read:
|
2013-04-11 17:50:26 +04:00
|
|
|
pr.close(&pr);
|
2013-03-12 21:00:05 +04:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2012-11-20 20:48:30 +04:00
|
|
|
|
2012-11-20 20:48:33 +04:00
|
|
|
/* Remove pages, which were not shared with a child */
|
2014-02-03 15:12:22 +04:00
|
|
|
list_for_each_entry(vma, vmas, list) {
|
2012-11-20 20:48:33 +04:00
|
|
|
unsigned long size, i = 0;
|
2013-11-22 18:19:08 +04:00
|
|
|
void *addr = decode_pointer(vma->premmaped_addr);
|
2012-11-20 20:48:33 +04:00
|
|
|
|
|
|
|
if (vma->ppage_bitmap == NULL)
|
|
|
|
continue;
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
size = vma_entry_len(vma->e) / PAGE_SIZE;
|
2012-11-20 20:48:33 +04:00
|
|
|
while (1) {
|
|
|
|
/* Find all pages, which are not shared with this child */
|
|
|
|
i = find_next_bit(vma->ppage_bitmap, size, i);
|
|
|
|
|
|
|
|
if ( i >= size)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = madvise(addr + PAGE_SIZE * i,
|
|
|
|
PAGE_SIZE, MADV_DONTNEED);
|
|
|
|
if (ret < 0) {
|
2013-05-02 22:44:24 +04:00
|
|
|
pr_perror("madvise failed");
|
2012-11-20 20:48:33 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
i++;
|
2012-11-20 20:48:34 +04:00
|
|
|
nr_droped++;
|
2012-11-20 20:48:33 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-04 14:03:10 +04:00
|
|
|
cnt_add(CNT_PAGES_COMPARED, nr_compared);
|
2013-08-11 13:36:24 +04:00
|
|
|
cnt_add(CNT_PAGES_SKIPPED_COW, nr_shared);
|
2014-02-04 14:03:10 +04:00
|
|
|
cnt_add(CNT_PAGES_RESTORED, nr_restored);
|
2013-08-11 13:36:24 +04:00
|
|
|
|
2012-11-20 20:48:34 +04:00
|
|
|
pr_info("nr_restored_pages: %d\n", nr_restored);
|
|
|
|
pr_info("nr_shared_pages: %d\n", nr_shared);
|
|
|
|
pr_info("nr_droped_pages: %d\n", nr_droped);
|
|
|
|
|
2012-11-20 20:48:30 +04:00
|
|
|
return 0;
|
2013-03-25 14:48:55 +04:00
|
|
|
|
|
|
|
err_addr:
|
|
|
|
pr_err("Page entry address %lx outside of VMA %lx-%lx\n",
|
2014-02-04 00:08:16 +04:00
|
|
|
va, (long)vma->e->start, (long)vma->e->end);
|
2013-03-25 14:48:55 +04:00
|
|
|
return -1;
|
2012-11-20 20:48:30 +04:00
|
|
|
}
|
|
|
|
|
2013-08-11 20:10:44 +04:00
|
|
|
static int prepare_mappings(int pid)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2014-02-03 15:12:22 +04:00
|
|
|
int ret = 0;
|
2012-11-20 20:48:23 +04:00
|
|
|
struct vma_area *pvma, *vma;
|
|
|
|
void *addr;
|
2014-02-03 15:12:22 +04:00
|
|
|
struct vm_area_list *vmas;
|
|
|
|
struct list_head *parent_vmas = NULL;
|
|
|
|
LIST_HEAD(empty);
|
2012-11-20 20:48:23 +04:00
|
|
|
|
|
|
|
void *old_premmapped_addr = NULL;
|
|
|
|
unsigned long old_premmapped_len, pstart = 0;
|
2012-03-27 16:34:00 +04:00
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
vmas = ¤t->rst->vmas;
|
|
|
|
if (vmas->nr == 0) /* Zombie */
|
|
|
|
goto out;
|
|
|
|
|
2013-07-25 18:29:03 +04:00
|
|
|
/*
|
|
|
|
* Keep parent vmas at hands to check whether we can "inherit" them.
|
|
|
|
* See comments in map_private_vma.
|
|
|
|
*/
|
2014-02-03 15:12:22 +04:00
|
|
|
if (current->parent)
|
|
|
|
parent_vmas = ¤t->parent->rst->vmas.h;
|
|
|
|
else
|
|
|
|
parent_vmas = ∅
|
2013-04-05 01:44:32 +04:00
|
|
|
|
2012-11-20 20:48:23 +04:00
|
|
|
/* Reserve a place for mapping private vma-s one by one */
|
2014-02-03 15:12:22 +04:00
|
|
|
addr = mmap(NULL, vmas->priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
|
2012-11-20 20:48:23 +04:00
|
|
|
if (addr == MAP_FAILED) {
|
2014-02-03 15:12:22 +04:00
|
|
|
pr_perror("Unable to reserve memory (%lu bytes)", vmas->priv_size);
|
2012-11-20 20:48:23 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-12-06 13:19:01 +03:00
|
|
|
old_premmapped_addr = current->rst->premmapped_addr;
|
|
|
|
old_premmapped_len = current->rst->premmapped_len;
|
|
|
|
current->rst->premmapped_addr = addr;
|
2014-02-03 15:12:22 +04:00
|
|
|
current->rst->premmapped_len = vmas->priv_size;
|
2012-11-20 20:48:23 +04:00
|
|
|
|
2014-05-14 01:00:31 +04:00
|
|
|
pvma = list_first_entry(parent_vmas, struct vma_area, list);
|
2012-11-20 20:48:23 +04:00
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
list_for_each_entry(vma, &vmas->h, list) {
|
2014-02-04 00:08:16 +04:00
|
|
|
if (pstart > vma->e->start) {
|
2012-11-20 20:48:23 +04:00
|
|
|
ret = -1;
|
|
|
|
pr_err("VMA-s are not sorted in the image file\n");
|
|
|
|
break;
|
|
|
|
}
|
2014-02-04 00:08:16 +04:00
|
|
|
pstart = vma->e->start;
|
2012-11-20 20:48:23 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (!vma_priv(vma->e))
|
2012-11-20 20:48:23 +04:00
|
|
|
continue;
|
|
|
|
|
2014-04-01 21:40:00 +04:00
|
|
|
ret = map_private_vma(pid, vma, &addr, &pvma, parent_vmas);
|
2012-11-20 20:48:23 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2012-11-20 20:39:02 +04:00
|
|
|
}
|
2012-07-19 12:43:36 +04:00
|
|
|
|
2013-08-26 17:15:00 +04:00
|
|
|
if (ret >= 0)
|
2012-11-20 20:48:30 +04:00
|
|
|
ret = restore_priv_vma_content(pid);
|
2012-11-20 20:39:06 +04:00
|
|
|
|
|
|
|
out:
|
2012-11-20 20:48:23 +04:00
|
|
|
if (old_premmapped_addr &&
|
|
|
|
munmap(old_premmapped_addr, old_premmapped_len)) {
|
|
|
|
pr_perror("Unable to unmap %p(%lx)",
|
|
|
|
old_premmapped_addr, old_premmapped_len);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-11-20 20:39:02 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-08-26 17:15:00 +04:00
|
|
|
/*
|
|
|
|
* A gard page must be unmapped after restoring content and
|
|
|
|
* forking children to restore COW memory.
|
|
|
|
*/
|
|
|
|
static int unmap_guard_pages()
|
|
|
|
{
|
|
|
|
struct vma_area *vma;
|
2014-02-03 15:12:22 +04:00
|
|
|
struct list_head *vmas = ¤t->rst->vmas.h;
|
2013-08-26 17:15:00 +04:00
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
list_for_each_entry(vma, vmas, list) {
|
2014-02-04 00:08:16 +04:00
|
|
|
if (!vma_priv(vma->e))
|
2013-08-26 17:15:00 +04:00
|
|
|
continue;
|
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (vma->e->flags & MAP_GROWSDOWN) {
|
2013-11-22 18:19:08 +04:00
|
|
|
void *addr = decode_pointer(vma->premmaped_addr);
|
2013-08-26 17:15:00 +04:00
|
|
|
|
|
|
|
if (munmap(addr - PAGE_SIZE, PAGE_SIZE)) {
|
|
|
|
pr_perror("Can't unmap guard page\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-12-10 16:08:07 +03:00
|
|
|
static int open_vmas(int pid)
|
2012-11-20 20:39:02 +04:00
|
|
|
{
|
|
|
|
struct vma_area *vma;
|
|
|
|
int ret = 0;
|
2014-02-03 15:12:22 +04:00
|
|
|
struct list_head *vmas = ¤t->rst->vmas.h;
|
2012-11-20 20:39:02 +04:00
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
list_for_each_entry(vma, vmas, list) {
|
2014-01-31 21:08:41 +04:00
|
|
|
if (!(vma_area_is(vma, VMA_AREA_REGULAR)))
|
2011-09-23 12:00:45 +04:00
|
|
|
continue;
|
|
|
|
|
2013-01-16 19:20:08 +04:00
|
|
|
pr_info("Opening 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" (%x) vma\n",
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->start, vma->e->end,
|
|
|
|
vma->e->pgoff, vma->e->status);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2014-01-31 21:08:41 +04:00
|
|
|
if (vma_area_is(vma, VMA_AREA_SYSVIPC))
|
2014-02-04 00:08:16 +04:00
|
|
|
ret = vma->e->shmid;
|
2014-01-31 21:08:41 +04:00
|
|
|
else if (vma_area_is(vma, VMA_ANON_SHARED))
|
2014-02-04 00:08:16 +04:00
|
|
|
ret = get_shmem_fd(pid, vma->e);
|
2014-01-31 21:08:41 +04:00
|
|
|
else if (vma_area_is(vma, VMA_FILE_SHARED))
|
2014-02-07 13:51:29 +04:00
|
|
|
ret = get_filemap_fd(vma);
|
2014-01-31 21:08:41 +04:00
|
|
|
else if (vma_area_is(vma, VMA_AREA_SOCKET))
|
2014-02-04 00:08:16 +04:00
|
|
|
ret = get_socket_fd(pid, vma->e);
|
2012-03-21 19:38:00 +04:00
|
|
|
else
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Can't fixup fd\n");
|
2012-03-27 16:34:00 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-21 19:38:00 +04:00
|
|
|
|
2012-11-02 15:59:20 +03:00
|
|
|
pr_info("\t`- setting %d as mapping fd\n", ret);
|
2014-02-04 00:08:16 +04:00
|
|
|
vma->e->fd = ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-27 16:34:00 +04:00
|
|
|
|
2012-11-20 20:39:02 +04:00
|
|
|
return ret < 0 ? -1 : 0;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static rt_sigaction_t sigchld_act;
|
2014-08-06 16:25:38 +04:00
|
|
|
static rt_sigaction_t parent_act[SIGMAX];
|
|
|
|
|
|
|
|
static bool sa_inherited(int sig, rt_sigaction_t *sa)
|
|
|
|
{
|
|
|
|
rt_sigaction_t *pa;
|
|
|
|
|
|
|
|
if (current == root_item)
|
|
|
|
return false; /* XXX -- inherit from CRIU? */
|
|
|
|
|
|
|
|
pa = &parent_act[sig];
|
|
|
|
return pa->rt_sa_handler == sa->rt_sa_handler &&
|
|
|
|
pa->rt_sa_flags == sa->rt_sa_flags &&
|
|
|
|
pa->rt_sa_restorer == sa->rt_sa_restorer &&
|
|
|
|
pa->rt_sa_mask.sig[0] == sa->rt_sa_mask.sig[0];
|
|
|
|
}
|
|
|
|
|
2014-08-06 16:24:17 +04:00
|
|
|
static int prepare_sigactions(void)
|
2011-11-29 15:12:25 +03:00
|
|
|
{
|
2014-08-06 16:24:17 +04:00
|
|
|
int pid = current->pid.virt;
|
2014-08-06 16:23:59 +04:00
|
|
|
rt_sigaction_t act;
|
2012-02-01 15:24:39 +04:00
|
|
|
int fd_sigact;
|
2012-07-18 16:25:06 +04:00
|
|
|
SaEntry *e;
|
2012-05-18 15:39:00 +04:00
|
|
|
int sig;
|
2014-08-06 16:25:38 +04:00
|
|
|
int ret = 0;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2013-04-09 11:13:51 +04:00
|
|
|
fd_sigact = open_image(CR_FD_SIGACT, O_RSTR, pid);
|
2011-12-29 19:56:34 +04:00
|
|
|
if (fd_sigact < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-01 17:15:00 +04:00
|
|
|
|
2012-12-04 19:26:54 +04:00
|
|
|
for (sig = 1; sig <= SIGMAX; sig++) {
|
2011-11-29 15:12:25 +03:00
|
|
|
if (sig == SIGKILL || sig == SIGSTOP)
|
|
|
|
continue;
|
|
|
|
|
2012-12-04 19:26:54 +04:00
|
|
|
ret = pb_read_one_eof(fd_sigact, &e, PB_SIGACT);
|
|
|
|
if (ret == 0) {
|
|
|
|
if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
|
|
|
|
pr_err("Unexpected EOF %d\n", sig);
|
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
2013-04-12 13:00:05 -07:00
|
|
|
pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
|
2012-12-04 19:26:54 +04:00
|
|
|
break;
|
|
|
|
}
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2013-01-18 11:08:38 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction));
|
2012-07-18 16:25:06 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_flags, e->flags);
|
2013-01-18 11:08:38 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_restorer, decode_pointer(e->restorer));
|
2012-07-18 16:25:06 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_mask.sig[0], e->mask);
|
|
|
|
|
|
|
|
sa_entry__free_unpacked(e, NULL);
|
2011-12-02 23:17:30 +04:00
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
if (sig == SIGCHLD) {
|
|
|
|
sigchld_act = act;
|
|
|
|
continue;
|
|
|
|
}
|
2014-08-06 16:25:38 +04:00
|
|
|
|
|
|
|
if (sa_inherited(sig, &act))
|
|
|
|
continue;
|
|
|
|
|
2011-12-02 23:17:30 +04:00
|
|
|
/*
|
|
|
|
* A pure syscall is used, because glibc
|
|
|
|
* sigaction overwrites se_restorer.
|
|
|
|
*/
|
2014-08-06 16:23:59 +04:00
|
|
|
ret = sys_sigaction(sig, &act, NULL, sizeof(k_rtsigset_t));
|
2011-11-29 15:12:25 +03:00
|
|
|
if (ret == -1) {
|
|
|
|
pr_err("%d: Can't restore sigaction: %m\n", pid);
|
|
|
|
goto err;
|
|
|
|
}
|
2014-08-06 16:25:38 +04:00
|
|
|
|
|
|
|
parent_act[sig] = act;
|
2011-11-29 15:12:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd_sigact);
|
2011-11-29 15:12:25 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
static int pstree_wait_helpers()
|
|
|
|
{
|
|
|
|
struct pstree_item *pi;
|
|
|
|
|
2012-10-08 18:59:26 +04:00
|
|
|
list_for_each_entry(pi, ¤t->children, sibling) {
|
2012-06-22 00:38:00 +04:00
|
|
|
int status, ret;
|
|
|
|
|
|
|
|
if (pi->state != TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Check, that a helper completed. */
|
|
|
|
ret = waitpid(pi->pid.virt, &status, 0);
|
|
|
|
if (ret == -1) {
|
|
|
|
if (errno == ECHILD)
|
|
|
|
continue; /* It has been waited in sigchld_handler */
|
|
|
|
pr_err("waitpid(%d) failed\n", pi->pid.virt);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status)) {
|
2012-11-23 16:43:33 +04:00
|
|
|
pr_err("%d exited with non-zero code (%d,%d)\n", pi->pid.virt,
|
2012-06-22 00:38:00 +04:00
|
|
|
WEXITSTATUS(status), WTERMSIG(status));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
static int restore_one_alive_task(int pid, CoreEntry *core)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("Restoring resources\n");
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-11-03 17:40:15 +04:00
|
|
|
rst_mem_switch_to_private();
|
|
|
|
|
2012-06-22 00:38:00 +04:00
|
|
|
if (pstree_wait_helpers())
|
|
|
|
return -1;
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (prepare_fds(current))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-01-17 16:09:34 +08:00
|
|
|
if (prepare_file_locks(pid))
|
|
|
|
return -1;
|
|
|
|
|
2012-12-10 16:08:07 +03:00
|
|
|
if (open_vmas(pid))
|
2012-12-10 16:04:46 +03:00
|
|
|
return -1;
|
|
|
|
|
2013-11-03 23:47:51 +04:00
|
|
|
if (prepare_signals(pid))
|
|
|
|
return -1;
|
|
|
|
|
2014-04-15 21:59:05 +04:00
|
|
|
if (prepare_posix_timers(pid, core))
|
2013-11-03 23:43:44 +04:00
|
|
|
return -1;
|
|
|
|
|
2014-03-13 14:30:48 +04:00
|
|
|
if (prepare_rlimits(pid, core) < 0)
|
2013-11-03 23:40:12 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-12-10 16:04:46 +03:00
|
|
|
return sigreturn_restore(pid, core);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
static void zombie_prepare_signals(void)
|
|
|
|
{
|
|
|
|
sigset_t blockmask;
|
|
|
|
int sig;
|
|
|
|
struct sigaction act;
|
|
|
|
|
|
|
|
sigfillset(&blockmask);
|
|
|
|
sigprocmask(SIG_UNBLOCK, &blockmask, NULL);
|
|
|
|
|
|
|
|
memset(&act, 0, sizeof(act));
|
|
|
|
act.sa_handler = SIG_DFL;
|
|
|
|
|
2012-12-04 19:26:54 +04:00
|
|
|
for (sig = 1; sig <= SIGMAX; sig++)
|
2012-01-22 20:28:30 +04:00
|
|
|
sigaction(sig, &act, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define SIG_FATAL_MASK ( \
|
|
|
|
(1 << SIGHUP) |\
|
|
|
|
(1 << SIGINT) |\
|
|
|
|
(1 << SIGQUIT) |\
|
|
|
|
(1 << SIGILL) |\
|
|
|
|
(1 << SIGTRAP) |\
|
|
|
|
(1 << SIGABRT) |\
|
|
|
|
(1 << SIGIOT) |\
|
|
|
|
(1 << SIGBUS) |\
|
|
|
|
(1 << SIGFPE) |\
|
|
|
|
(1 << SIGKILL) |\
|
|
|
|
(1 << SIGUSR1) |\
|
|
|
|
(1 << SIGSEGV) |\
|
|
|
|
(1 << SIGUSR2) |\
|
|
|
|
(1 << SIGPIPE) |\
|
|
|
|
(1 << SIGALRM) |\
|
|
|
|
(1 << SIGTERM) |\
|
|
|
|
(1 << SIGXCPU) |\
|
|
|
|
(1 << SIGXFSZ) |\
|
|
|
|
(1 << SIGVTALRM)|\
|
|
|
|
(1 << SIGPROF) |\
|
|
|
|
(1 << SIGPOLL) |\
|
|
|
|
(1 << SIGIO) |\
|
|
|
|
(1 << SIGSYS) |\
|
|
|
|
(1 << SIGUNUSED)|\
|
|
|
|
(1 << SIGSTKFLT)|\
|
|
|
|
(1 << SIGPWR) \
|
|
|
|
)
|
|
|
|
|
|
|
|
static inline int sig_fatal(int sig)
|
|
|
|
{
|
2012-12-04 19:26:54 +04:00
|
|
|
return (sig > 0) && (sig < SIGMAX) && (SIG_FATAL_MASK & (1UL << sig));
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
struct task_entries *task_entries;
|
|
|
|
|
2013-07-12 18:12:12 +04:00
|
|
|
static int restore_one_zombie(int pid, CoreEntry *core)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
2013-07-12 18:12:12 +04:00
|
|
|
int exit_code = core->tc->exit_code;
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
pr_info("Restoring zombie with %d code\n", exit_code);
|
|
|
|
|
2013-07-12 18:12:12 +04:00
|
|
|
sys_prctl(PR_SET_NAME, (long)(void *)core->tc->comm, 0, 0, 0);
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
if (task_entries != NULL) {
|
2012-12-04 16:59:41 +03:00
|
|
|
restore_finish_stage(CR_STATE_RESTORE);
|
2012-01-22 20:28:30 +04:00
|
|
|
zombie_prepare_signals();
|
2013-03-25 23:39:53 +04:00
|
|
|
mutex_lock(&task_entries->zombie_lock);
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (exit_code & 0x7f) {
|
|
|
|
int signr;
|
|
|
|
|
2013-07-12 18:14:23 +04:00
|
|
|
/* prevent generating core files */
|
2013-08-07 13:51:35 +04:00
|
|
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0))
|
|
|
|
pr_perror("Can't drop the dumpable flag");
|
2013-07-12 18:14:23 +04:00
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
signr = exit_code & 0x7F;
|
|
|
|
if (!sig_fatal(signr)) {
|
2012-03-01 18:52:42 +04:00
|
|
|
pr_warn("Exit with non fatal signal ignored\n");
|
2012-01-22 20:28:30 +04:00
|
|
|
signr = SIGABRT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kill(pid, signr) < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't kill myself, will just exit");
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
exit_code = 0;
|
|
|
|
}
|
|
|
|
|
2012-01-30 17:04:24 +04:00
|
|
|
exit((exit_code >> 8) & 0x7f);
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
/* never reached */
|
|
|
|
BUG_ON(1);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-08-12 09:04:56 +04:00
|
|
|
static int check_core(CoreEntry *core, struct pstree_item *me)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
2012-11-27 22:03:36 +03:00
|
|
|
int ret = -1;
|
2012-01-22 20:28:30 +04:00
|
|
|
|
2013-01-14 11:25:50 +04:00
|
|
|
if (core->mtype != CORE_ENTRY__MARCH) {
|
2012-07-19 13:23:01 +04:00
|
|
|
pr_err("Core march mismatch %d\n", (int)core->mtype);
|
2012-02-29 13:39:21 +03:00
|
|
|
goto out;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
2012-07-20 14:18:53 +04:00
|
|
|
|
|
|
|
if (!core->tc) {
|
|
|
|
pr_err("Core task state data missed\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-11-12 12:34:56 +04:00
|
|
|
if (core->tc->task_state != TASK_DEAD) {
|
2013-08-12 09:04:56 +04:00
|
|
|
if (!core->ids && !me->ids) {
|
2012-11-12 12:34:56 +04:00
|
|
|
pr_err("Core IDS data missed for non-zombie\n");
|
|
|
|
goto out;
|
|
|
|
}
|
2012-11-12 12:35:03 +04:00
|
|
|
|
2013-01-14 17:19:06 +04:00
|
|
|
if (!CORE_THREAD_ARCH_INFO(core)) {
|
2012-11-12 12:35:03 +04:00
|
|
|
pr_err("Core info data missed for non-zombie\n");
|
|
|
|
goto out;
|
|
|
|
}
|
2012-07-20 14:18:53 +04:00
|
|
|
}
|
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = 0;
|
2012-02-29 13:39:21 +03:00
|
|
|
out:
|
2013-07-08 19:10:11 +04:00
|
|
|
return ret;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
2013-03-25 23:39:52 +04:00
|
|
|
static int restore_one_task(int pid, CoreEntry *core)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
2013-03-25 23:39:52 +04:00
|
|
|
int ret;
|
2012-07-19 13:23:01 +04:00
|
|
|
|
2013-11-03 17:37:10 +04:00
|
|
|
/* No more fork()-s => no more per-pid logs */
|
|
|
|
|
2014-08-05 12:56:04 +04:00
|
|
|
switch (current->state) {
|
2012-01-22 20:28:30 +04:00
|
|
|
case TASK_ALIVE:
|
2013-10-01 11:21:34 +04:00
|
|
|
case TASK_STOPPED:
|
2012-07-19 13:23:01 +04:00
|
|
|
ret = restore_one_alive_task(pid, core);
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
case TASK_DEAD:
|
2013-07-12 18:12:12 +04:00
|
|
|
ret = restore_one_zombie(pid, core);
|
2012-07-19 13:23:01 +04:00
|
|
|
break;
|
2014-08-05 12:56:04 +04:00
|
|
|
case TASK_HELPER:
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
default:
|
2012-07-19 13:23:01 +04:00
|
|
|
pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
|
|
|
|
ret = -1;
|
|
|
|
break;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
2012-07-19 13:23:01 +04:00
|
|
|
|
2014-08-05 12:56:04 +04:00
|
|
|
if (core)
|
|
|
|
core_entry__free_unpacked(core, NULL);
|
2012-07-19 13:23:01 +04:00
|
|
|
return ret;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
2012-08-28 23:19:28 +04:00
|
|
|
/* All arguments should be above stack, because it grows down */
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg {
|
2013-01-29 09:21:46 +04:00
|
|
|
char stack[PAGE_SIZE] __attribute__((aligned (8)));
|
2012-08-28 23:19:28 +04:00
|
|
|
char stack_ptr[0];
|
2012-05-31 14:50:00 +04:00
|
|
|
struct pstree_item *item;
|
2012-01-26 15:27:00 +04:00
|
|
|
unsigned long clone_flags;
|
2012-05-31 14:50:00 +04:00
|
|
|
int fd;
|
2013-03-25 23:39:52 +04:00
|
|
|
|
|
|
|
CoreEntry *core;
|
2012-01-26 15:26:00 +04:00
|
|
|
};
|
|
|
|
|
2013-01-19 01:16:19 +04:00
|
|
|
static inline int fork_with_pid(struct pstree_item *item)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2013-03-25 23:39:52 +04:00
|
|
|
int ret = -1, fd;
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg ca;
|
2012-06-22 00:38:00 +04:00
|
|
|
pid_t pid = item->pid.virt;
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2013-03-25 23:39:52 +04:00
|
|
|
if (item->state != TASK_HELPER) {
|
2013-04-09 11:13:51 +04:00
|
|
|
fd = open_image(CR_FD_CORE, O_RSTR, pid);
|
2013-03-25 23:39:52 +04:00
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ret = pb_read_one(fd, &ca.core, PB_CORE);
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
if (ret < 0)
|
|
|
|
return -1;
|
|
|
|
|
2013-08-12 09:04:56 +04:00
|
|
|
if (check_core(ca.core, item))
|
|
|
|
return -1;
|
|
|
|
|
2013-09-23 14:33:33 +04:00
|
|
|
item->state = ca.core->tc->task_state;
|
2014-05-08 16:55:53 +04:00
|
|
|
item->rst->cg_set = ca.core->tc->cg_set;
|
2013-09-23 14:33:33 +04:00
|
|
|
|
2013-10-01 11:21:34 +04:00
|
|
|
switch (item->state) {
|
|
|
|
case TASK_ALIVE:
|
|
|
|
case TASK_STOPPED:
|
|
|
|
break;
|
|
|
|
case TASK_DEAD:
|
2013-03-25 23:39:52 +04:00
|
|
|
item->parent->rst->nr_zombies++;
|
2013-10-01 11:21:34 +04:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
pr_err("Unknown task state %d\n", item->state);
|
|
|
|
return -1;
|
|
|
|
}
|
2014-05-08 16:55:53 +04:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Helper entry will not get moved around and thus
|
|
|
|
* will live in the parent's cgset.
|
|
|
|
*/
|
|
|
|
item->rst->cg_set = item->parent->rst->cg_set;
|
2013-03-25 23:39:52 +04:00
|
|
|
ca.core = NULL;
|
2014-05-08 16:55:53 +04:00
|
|
|
}
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2014-01-06 01:08:06 +04:00
|
|
|
ret = -1;
|
|
|
|
|
2012-05-31 14:50:00 +04:00
|
|
|
ca.item = item;
|
2013-01-19 01:16:19 +04:00
|
|
|
ca.clone_flags = item->rst->clone_flags;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-01-19 01:16:19 +04:00
|
|
|
pr_info("Forking task with %d pid (flags 0x%lx)\n", pid, ca.clone_flags);
|
2013-01-12 00:44:26 +04:00
|
|
|
|
2012-06-19 15:53:00 +04:00
|
|
|
if (!(ca.clone_flags & CLONE_NEWPID)) {
|
2012-08-14 14:09:20 +04:00
|
|
|
char buf[32];
|
|
|
|
|
2014-06-05 20:16:41 +04:00
|
|
|
ca.fd = open_proc_rw(PROC_GEN, LAST_PID_PATH);
|
2012-08-14 14:09:20 +04:00
|
|
|
if (ca.fd < 0) {
|
|
|
|
pr_perror("%d: Can't open %s", pid, LAST_PID_PATH);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flock(ca.fd, LOCK_EX)) {
|
|
|
|
close(ca.fd);
|
|
|
|
pr_perror("%d: Can't lock %s", pid, LAST_PID_PATH);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
snprintf(buf, sizeof(buf), "%d", pid - 1);
|
2012-06-19 15:53:00 +04:00
|
|
|
if (write_img_buf(ca.fd, buf, strlen(buf)))
|
|
|
|
goto err_unlock;
|
2012-08-14 14:09:20 +04:00
|
|
|
} else {
|
|
|
|
ca.fd = -1;
|
2012-10-09 19:57:15 +04:00
|
|
|
BUG_ON(pid != INIT_PID);
|
2012-08-14 14:09:20 +04:00
|
|
|
}
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-08-10 19:14:36 +04:00
|
|
|
if (ca.clone_flags & CLONE_NEWNET)
|
|
|
|
/*
|
|
|
|
* When restoring a net namespace we need to communicate
|
|
|
|
* with the original (i.e. -- init) one. Thus, prepare for
|
|
|
|
* that before we leave the existing namespaces.
|
|
|
|
*/
|
|
|
|
if (netns_pre_create())
|
|
|
|
goto err_unlock;
|
|
|
|
|
2012-08-28 23:19:28 +04:00
|
|
|
ret = clone(restore_task_with_children, ca.stack_ptr,
|
2012-06-19 15:53:00 +04:00
|
|
|
ca.clone_flags | SIGCHLD, &ca);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2014-08-06 22:06:00 +04:00
|
|
|
if (ret < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't fork for %d", pid);
|
2014-08-06 22:06:00 +04:00
|
|
|
goto err_unlock;
|
|
|
|
}
|
|
|
|
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2013-09-23 14:33:25 +04:00
|
|
|
if (item == root_item)
|
2012-08-06 18:31:39 +04:00
|
|
|
item->pid.real = ret;
|
2012-08-14 12:54:00 +04:00
|
|
|
|
2013-09-16 15:45:01 +04:00
|
|
|
if (opts.pidfile && root_item == item) {
|
2013-11-03 12:51:13 +04:00
|
|
|
int pid;
|
|
|
|
|
|
|
|
pid = ret;
|
|
|
|
|
2013-11-20 14:26:41 +04:00
|
|
|
ret = write_pidfile(pid);
|
2013-11-03 12:51:13 +04:00
|
|
|
if (ret < 0) {
|
2013-09-16 15:45:01 +04:00
|
|
|
pr_perror("Can't write pidfile");
|
2013-11-03 12:51:13 +04:00
|
|
|
kill(pid, SIGKILL);
|
|
|
|
}
|
2013-09-16 15:45:01 +04:00
|
|
|
}
|
2012-08-14 12:54:00 +04:00
|
|
|
|
2011-12-01 18:21:17 +04:00
|
|
|
err_unlock:
|
2012-08-14 14:09:20 +04:00
|
|
|
if (ca.fd >= 0) {
|
|
|
|
if (flock(ca.fd, LOCK_UN))
|
|
|
|
pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-08-14 14:09:20 +04:00
|
|
|
close(ca.fd);
|
|
|
|
}
|
2011-12-01 18:21:17 +04:00
|
|
|
err:
|
2013-03-25 23:39:52 +04:00
|
|
|
if (ca.core)
|
|
|
|
core_entry__free_unpacked(ca.core, NULL);
|
2011-09-23 12:00:45 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
|
|
|
{
|
2012-06-22 00:38:00 +04:00
|
|
|
struct pstree_item *pi;
|
|
|
|
pid_t pid = siginfo->si_pid;
|
|
|
|
int status;
|
|
|
|
int exit;
|
|
|
|
|
2013-03-27 17:27:36 +04:00
|
|
|
exit = (siginfo->si_code == CLD_EXITED);
|
2012-06-22 00:38:00 +04:00
|
|
|
status = siginfo->si_status;
|
2013-05-31 19:01:31 +04:00
|
|
|
|
|
|
|
/* skip scripts */
|
|
|
|
if (!current && root_item->pid.real != pid) {
|
|
|
|
pid = waitpid(root_item->pid.real, &status, WNOHANG);
|
|
|
|
if (pid <= 0)
|
|
|
|
return;
|
2013-08-28 17:16:55 +04:00
|
|
|
exit = WIFEXITED(status);
|
|
|
|
status = exit ? WEXITSTATUS(status) : WTERMSIG(status);
|
2013-05-31 19:01:31 +04:00
|
|
|
}
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (!current || status)
|
2012-06-22 00:38:00 +04:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
while (pid) {
|
|
|
|
pid = waitpid(-1, &status, WNOHANG);
|
|
|
|
if (pid <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
exit = WIFEXITED(status);
|
|
|
|
status = exit ? WEXITSTATUS(status) : WTERMSIG(status);
|
|
|
|
if (status)
|
|
|
|
break;
|
|
|
|
|
2012-12-06 13:08:56 +03:00
|
|
|
/* Exited (with zero code) helpers are OK */
|
|
|
|
list_for_each_entry(pi, ¤t->children, sibling)
|
2012-06-22 00:38:00 +04:00
|
|
|
if (pi->pid.virt == siginfo->si_pid)
|
|
|
|
break;
|
|
|
|
|
2012-12-06 13:08:56 +03:00
|
|
|
BUG_ON(&pi->sibling == ¤t->children);
|
|
|
|
if (pi->state != TASK_HELPER)
|
|
|
|
break;
|
2012-06-22 00:38:00 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (exit)
|
|
|
|
pr_err("%d exited, status=%d\n", pid, status);
|
|
|
|
else
|
|
|
|
pr_err("%d killed by signal %d\n", pid, status);
|
2012-01-19 01:33:19 +03:00
|
|
|
|
2012-04-03 00:52:00 +04:00
|
|
|
futex_abort_and_wake(&task_entries->nr_in_progress);
|
2012-01-19 01:33:19 +03:00
|
|
|
}
|
|
|
|
|
2014-08-06 16:25:08 +04:00
|
|
|
static int criu_signals_setup(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct sigaction act;
|
2014-08-06 16:25:25 +04:00
|
|
|
sigset_t blockmask;
|
2014-08-06 16:25:08 +04:00
|
|
|
|
|
|
|
ret = sigaction(SIGCHLD, NULL, &act);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_perror("sigaction() failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
|
|
|
|
if (opts.swrk_restore)
|
|
|
|
/*
|
|
|
|
* Root task will be our sibling. This means, that
|
|
|
|
* we will not notice when (if) it dies in SIGCHLD
|
|
|
|
* handler, but we should. To do this -- attach to
|
|
|
|
* the guy with ptrace (below) and (!) make the kernel
|
|
|
|
* deliver us the signal when it will get stopped.
|
|
|
|
* It will in case of e.g. segfault before handling
|
|
|
|
* the signal.
|
|
|
|
*/
|
|
|
|
act.sa_flags &= ~SA_NOCLDSTOP;
|
|
|
|
|
|
|
|
act.sa_sigaction = sigchld_handler;
|
|
|
|
sigemptyset(&act.sa_mask);
|
|
|
|
sigaddset(&act.sa_mask, SIGCHLD);
|
|
|
|
|
|
|
|
ret = sigaction(SIGCHLD, &act, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_perror("sigaction() failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-08-06 16:25:25 +04:00
|
|
|
/*
|
|
|
|
* The block mask will be restored in sigreturn.
|
|
|
|
*
|
|
|
|
* TODO: This code should be removed, when a freezer will be added.
|
|
|
|
*/
|
|
|
|
sigfillset(&blockmask);
|
|
|
|
sigdelset(&blockmask, SIGCHLD);
|
|
|
|
ret = sigprocmask(SIG_BLOCK, &blockmask, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_perror("Can't block signals");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-08-06 16:25:08 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-11 22:11:41 +04:00
|
|
|
static void restore_sid(void)
|
|
|
|
{
|
|
|
|
pid_t sid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SID can only be reset to pid or inherited from parent.
|
|
|
|
* Thus we restore it right here to let our kids inherit
|
|
|
|
* one in case they need it.
|
|
|
|
*
|
|
|
|
* PGIDs are restored late when all tasks are forked and
|
|
|
|
* we can call setpgid() on custom values.
|
|
|
|
*/
|
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pid.virt == current->sid) {
|
|
|
|
pr_info("Restoring %d to %d sid\n", current->pid.virt, current->sid);
|
2012-04-11 22:11:41 +04:00
|
|
|
sid = setsid();
|
2012-09-05 19:52:55 +04:00
|
|
|
if (sid != current->sid) {
|
2012-04-11 22:11:41 +04:00
|
|
|
pr_perror("Can't restore sid (%d)", sid);
|
2012-12-26 18:15:03 +03:00
|
|
|
exit(1);
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
} else {
|
2012-06-22 00:39:00 +04:00
|
|
|
sid = getsid(getpid());
|
2012-09-05 19:52:55 +04:00
|
|
|
if (sid != current->sid) {
|
2012-06-22 00:39:00 +04:00
|
|
|
/* Skip the root task if it's not init */
|
2012-10-09 19:57:15 +04:00
|
|
|
if (current == root_item && root_item->pid.virt != INIT_PID)
|
2012-06-22 00:39:00 +04:00
|
|
|
return;
|
2012-04-11 22:11:41 +04:00
|
|
|
pr_err("Requested sid %d doesn't match inherited %d\n",
|
2012-09-05 19:52:55 +04:00
|
|
|
current->sid, sid);
|
2012-12-26 18:15:03 +03:00
|
|
|
exit(1);
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void restore_pgid(void)
|
|
|
|
{
|
2013-09-27 04:38:00 +04:00
|
|
|
/*
|
|
|
|
* Unlike sessions, process groups (a.k.a. pgids) can be joined
|
|
|
|
* by any task, provided the task with pid == pgid (group leader)
|
|
|
|
* exists. Thus, in order to restore pgid we must make sure that
|
|
|
|
* group leader was born and created the group, then join one.
|
|
|
|
*
|
|
|
|
* We do this _before_ finishing the forking stage to make sure
|
|
|
|
* helpers are still with us.
|
|
|
|
*/
|
|
|
|
|
|
|
|
pid_t pgid, my_pgid = current->pgid;
|
2012-04-11 22:11:41 +04:00
|
|
|
|
2013-09-27 04:38:00 +04:00
|
|
|
pr_info("Restoring %d to %d pgid\n", current->pid.virt, my_pgid);
|
2012-04-11 22:11:41 +04:00
|
|
|
|
|
|
|
pgid = getpgrp();
|
2013-09-27 04:38:00 +04:00
|
|
|
if (my_pgid == pgid)
|
2012-04-11 22:11:41 +04:00
|
|
|
return;
|
|
|
|
|
2013-09-27 04:38:00 +04:00
|
|
|
if (my_pgid != current->pid.virt) {
|
|
|
|
struct pstree_item *leader;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for leader to become such.
|
|
|
|
* Missing leader means we're going to crtools
|
|
|
|
* group (-j option).
|
|
|
|
*/
|
|
|
|
|
|
|
|
leader = current->rst->pgrp_leader;
|
|
|
|
if (leader) {
|
|
|
|
BUG_ON(my_pgid != leader->pid.virt);
|
|
|
|
futex_wait_until(&leader->rst->pgrp_set, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-11 22:11:41 +04:00
|
|
|
pr_info("\twill call setpgid, mine pgid is %d\n", pgid);
|
2013-09-27 04:38:00 +04:00
|
|
|
if (setpgid(0, my_pgid) != 0) {
|
2012-09-05 19:52:55 +04:00
|
|
|
pr_perror("Can't restore pgid (%d/%d->%d)", current->pid.virt, pgid, current->pgid);
|
2012-12-26 18:15:03 +03:00
|
|
|
exit(1);
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
2013-09-27 04:38:00 +04:00
|
|
|
|
|
|
|
if (my_pgid == current->pid.virt)
|
|
|
|
futex_set_and_wake(¤t->rst->pgrp_set, 1);
|
2012-04-11 22:11:41 +04:00
|
|
|
}
|
|
|
|
|
2012-08-06 18:36:59 +04:00
|
|
|
static int mount_proc(void)
|
2012-06-27 20:57:40 +04:00
|
|
|
{
|
2013-08-11 20:00:28 +04:00
|
|
|
int fd, ret;
|
2012-12-06 15:50:41 +03:00
|
|
|
char proc_mountpoint[] = "crtools-proc.XXXXXX";
|
2012-08-01 15:01:13 +04:00
|
|
|
|
2012-06-27 20:57:40 +04:00
|
|
|
if (mkdtemp(proc_mountpoint) == NULL) {
|
2012-08-06 18:36:59 +04:00
|
|
|
pr_perror("mkdtemp failed %s", proc_mountpoint);
|
|
|
|
return -1;
|
2012-06-27 20:57:40 +04:00
|
|
|
}
|
|
|
|
|
2012-08-01 15:01:13 +04:00
|
|
|
pr_info("Mount procfs in %s\n", proc_mountpoint);
|
2012-08-06 18:36:59 +04:00
|
|
|
if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL, NULL)) {
|
|
|
|
pr_perror("mount failed");
|
2013-08-11 20:00:28 +04:00
|
|
|
rmdir(proc_mountpoint);
|
2012-08-06 18:36:59 +04:00
|
|
|
return -1;
|
2012-08-01 15:01:13 +04:00
|
|
|
}
|
2012-08-06 18:36:59 +04:00
|
|
|
|
2013-08-11 20:00:28 +04:00
|
|
|
ret = fd = open_detach_mount(proc_mountpoint);
|
|
|
|
if (fd >= 0) {
|
|
|
|
ret = set_proc_fd(fd);
|
|
|
|
close(fd);
|
2012-08-01 15:01:13 +04:00
|
|
|
}
|
2012-08-06 18:36:59 +04:00
|
|
|
|
|
|
|
return ret;
|
2012-06-27 20:57:40 +04:00
|
|
|
}
|
|
|
|
|
2013-08-11 20:15:43 +04:00
|
|
|
/*
|
|
|
|
* Tasks cannot change sid (session id) arbitrary, but can either
|
|
|
|
* inherit one from ancestor, or create a new one with id equal to
|
|
|
|
* their pid. Thus sid-s restore is tied with children creation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int create_children_and_session(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct pstree_item *child;
|
|
|
|
|
|
|
|
pr_info("Restoring children in alien sessions:\n");
|
|
|
|
list_for_each_entry(child, ¤t->children, sibling) {
|
|
|
|
if (!restore_before_setsid(child))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
BUG_ON(child->born_sid != -1 && getsid(getpid()) != child->born_sid);
|
|
|
|
|
|
|
|
ret = fork_with_pid(child);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
restore_sid();
|
|
|
|
|
|
|
|
pr_info("Restoring children in our session:\n");
|
|
|
|
list_for_each_entry(child, ¤t->children, sibling) {
|
|
|
|
if (restore_before_setsid(child))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = fork_with_pid(child);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *_arg)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg *ca = _arg;
|
2012-04-05 15:34:31 +04:00
|
|
|
pid_t pid;
|
2012-05-31 14:50:00 +04:00
|
|
|
int ret;
|
2012-01-26 15:26:00 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
current = ca->item;
|
2012-05-31 14:50:00 +04:00
|
|
|
|
2013-09-23 14:33:25 +04:00
|
|
|
if (current != root_item) {
|
|
|
|
char buf[PATH_MAX];
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
/* Determine PID in CRIU's namespace */
|
|
|
|
fd = get_service_fd(CR_PROC_FD_OFF);
|
|
|
|
if (fd < 0)
|
|
|
|
exit(1);
|
|
|
|
|
|
|
|
ret = readlinkat(fd, "self", buf, sizeof(buf) - 1);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_perror("Unable to read the /proc/self link");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
buf[ret] = '\0';
|
|
|
|
|
|
|
|
current->pid.real = atoi(buf);
|
|
|
|
pr_debug("PID: real %d virt %d\n",
|
|
|
|
current->pid.real, current->pid.virt);
|
|
|
|
}
|
|
|
|
|
2014-04-21 14:48:05 +04:00
|
|
|
if ( !(ca->clone_flags & CLONE_FILES))
|
|
|
|
close_safe(&ca->fd);
|
|
|
|
|
2013-01-12 00:44:26 +04:00
|
|
|
if (current->state != TASK_HELPER) {
|
|
|
|
ret = clone_service_fd(current->rst->service_fd_id);
|
|
|
|
if (ret)
|
|
|
|
exit(1);
|
|
|
|
}
|
2013-01-11 18:16:24 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
pid = getpid();
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->pid.virt != pid) {
|
|
|
|
pr_err("Pid %d do not match expected %d\n", pid, current->pid.virt);
|
2012-01-26 15:26:00 +04:00
|
|
|
exit(-1);
|
|
|
|
}
|
2011-12-02 16:06:00 +04:00
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
ret = log_init_by_pid();
|
|
|
|
if (ret < 0)
|
|
|
|
exit(1);
|
|
|
|
|
2012-08-06 18:37:13 +04:00
|
|
|
/* Restore root task */
|
2012-09-05 19:52:55 +04:00
|
|
|
if (current->parent == NULL) {
|
2013-12-26 17:00:38 +04:00
|
|
|
if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
|
|
|
|
exit(1);
|
|
|
|
|
2013-09-30 17:16:51 +04:00
|
|
|
if (prepare_namespace(current, ca->clone_flags))
|
2013-03-27 14:16:09 +04:00
|
|
|
exit(1);
|
2012-06-27 20:57:40 +04:00
|
|
|
|
2012-08-01 15:01:13 +04:00
|
|
|
/*
|
|
|
|
* We need non /proc proc mount for restoring pid and mount
|
|
|
|
* namespaces and do not care for the rest of the cases.
|
|
|
|
* Thus -- mount proc at custom location for any new namespace
|
|
|
|
*/
|
2012-08-06 18:36:59 +04:00
|
|
|
if (mount_proc())
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
2012-01-26 15:27:00 +04:00
|
|
|
|
2014-04-11 15:21:00 +04:00
|
|
|
if (close_old_fds(current))
|
|
|
|
exit(1);
|
|
|
|
|
2012-09-17 20:06:06 +04:00
|
|
|
if (root_prepare_shared())
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
2012-08-06 18:37:13 +04:00
|
|
|
}
|
2012-08-02 16:08:06 +04:00
|
|
|
|
2013-08-11 20:10:44 +04:00
|
|
|
if (prepare_mappings(pid))
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
2012-11-20 20:39:09 +04:00
|
|
|
|
2014-04-21 14:48:05 +04:00
|
|
|
if (!(ca->clone_flags & CLONE_FILES)) {
|
|
|
|
ret = close_old_fds(current);
|
|
|
|
if (ret)
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2014-05-08 16:55:53 +04:00
|
|
|
/*
|
|
|
|
* Call this _before_ forking to optimize cgroups
|
|
|
|
* restore -- if all tasks live in one set of cgroups
|
|
|
|
* we will only move the root one there, others will
|
|
|
|
* just have it inherited.
|
|
|
|
*/
|
|
|
|
if (prepare_task_cgroup(current) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2014-08-06 16:25:38 +04:00
|
|
|
if (prepare_sigactions() < 0)
|
|
|
|
return -1;
|
|
|
|
|
2013-08-11 20:15:43 +04:00
|
|
|
if (create_children_and_session())
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
2012-06-22 00:39:00 +04:00
|
|
|
|
2014-04-22 20:40:06 +04:00
|
|
|
if (restore_task_mnt_ns(current))
|
|
|
|
goto err;
|
2014-04-21 18:23:20 +04:00
|
|
|
|
2013-08-26 17:15:00 +04:00
|
|
|
if (unmap_guard_pages())
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
2013-08-11 20:23:18 +04:00
|
|
|
|
2013-09-27 04:38:00 +04:00
|
|
|
restore_pgid();
|
2012-07-02 15:25:00 +04:00
|
|
|
|
2013-08-20 15:17:30 +04:00
|
|
|
if (restore_finish_stage(CR_STATE_FORKING) < 0)
|
2014-04-21 18:23:19 +04:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (current->parent == NULL && fini_mnt_ns())
|
|
|
|
exit (1);
|
2012-07-02 15:25:00 +04:00
|
|
|
|
2013-03-25 23:39:52 +04:00
|
|
|
return restore_one_task(current->pid.virt, ca->core);
|
2014-04-21 18:23:19 +04:00
|
|
|
err:
|
|
|
|
if (current->parent == NULL)
|
|
|
|
fini_mnt_ns();
|
|
|
|
|
|
|
|
exit(1);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-12-04 17:37:13 +03:00
|
|
|
static inline int stage_participants(int next_stage)
|
|
|
|
{
|
|
|
|
switch (next_stage) {
|
2013-08-20 15:17:30 +04:00
|
|
|
case CR_STATE_FAIL:
|
|
|
|
return 0;
|
2013-05-31 19:01:31 +04:00
|
|
|
case CR_STATE_RESTORE_NS:
|
|
|
|
return 1;
|
2012-12-04 17:37:13 +03:00
|
|
|
case CR_STATE_FORKING:
|
2013-08-16 18:55:26 +04:00
|
|
|
return task_entries->nr_tasks + task_entries->nr_helpers;
|
2012-12-04 17:37:13 +03:00
|
|
|
case CR_STATE_RESTORE:
|
|
|
|
case CR_STATE_RESTORE_SIGCHLD:
|
|
|
|
return task_entries->nr_threads;
|
2013-04-19 15:58:50 +04:00
|
|
|
case CR_STATE_RESTORE_CREDS:
|
|
|
|
return task_entries->nr_threads;
|
2012-12-04 17:37:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
BUG();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-05-31 19:01:31 +04:00
|
|
|
static int restore_wait_inprogress_tasks()
|
2012-12-04 17:37:13 +03:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
futex_t *np = &task_entries->nr_in_progress;
|
|
|
|
|
|
|
|
futex_wait_while_gt(np, 0);
|
|
|
|
ret = (int)futex_get(np);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2013-05-31 19:01:31 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-15 18:14:11 +04:00
|
|
|
static void __restore_switch_stage(int next_stage)
|
|
|
|
{
|
|
|
|
futex_set(&task_entries->nr_in_progress,
|
|
|
|
stage_participants(next_stage));
|
|
|
|
futex_set_and_wake(&task_entries->start, next_stage);
|
|
|
|
}
|
|
|
|
|
2013-05-31 19:01:31 +04:00
|
|
|
static int restore_switch_stage(int next_stage)
|
|
|
|
{
|
2013-07-15 18:14:11 +04:00
|
|
|
__restore_switch_stage(next_stage);
|
2013-08-12 06:17:04 +04:00
|
|
|
return restore_wait_inprogress_tasks();
|
2012-12-04 17:37:13 +03:00
|
|
|
}
|
|
|
|
|
2014-06-30 20:30:44 +04:00
|
|
|
static int attach_to_tasks(bool root_seized)
|
2013-09-23 14:33:34 +04:00
|
|
|
{
|
|
|
|
struct pstree_item *item;
|
|
|
|
|
|
|
|
for_each_pstree_item(item) {
|
|
|
|
pid_t pid = item->pid.real;
|
|
|
|
int status, i;
|
|
|
|
|
|
|
|
if (item->state == TASK_DEAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (item->state == TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (parse_threads(item->pid.real, &item->threads, &item->nr_threads))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (i = 0; i < item->nr_threads; i++) {
|
|
|
|
pid = item->threads[i].real;
|
|
|
|
|
2014-06-30 20:30:44 +04:00
|
|
|
if (item != root_item || !root_seized) {
|
|
|
|
if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
|
|
|
|
pr_perror("Can't attach to %d", pid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Root item is SEIZE-d, so we only need
|
|
|
|
* to stop one (INTERRUPT) to make wait4
|
|
|
|
* and SYSCALL below work.
|
|
|
|
*/
|
|
|
|
if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
|
|
|
|
pr_perror("Can't interrupt task");
|
|
|
|
return -1;
|
|
|
|
}
|
2013-09-23 14:33:34 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (wait4(pid, &status, __WALL, NULL) != pid) {
|
|
|
|
pr_perror("waitpid() failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ptrace(PTRACE_SYSCALL, pid, NULL, NULL)) {
|
|
|
|
pr_perror("Unable to start %d", pid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void finalize_restore(int status)
|
|
|
|
{
|
|
|
|
struct pstree_item *item;
|
|
|
|
|
|
|
|
for_each_pstree_item(item) {
|
|
|
|
pid_t pid = item->pid.real;
|
2013-09-23 14:33:35 +04:00
|
|
|
struct parasite_ctl *ctl;
|
2013-09-23 14:33:34 +04:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (item->state == TASK_DEAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (item->state == TASK_HELPER)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (status < 0)
|
|
|
|
goto detach;
|
|
|
|
|
2013-09-23 14:33:35 +04:00
|
|
|
/* Unmap the restorer blob */
|
|
|
|
ctl = parasite_prep_ctl(pid, NULL);
|
|
|
|
if (ctl == NULL)
|
|
|
|
goto detach;
|
|
|
|
|
|
|
|
parasite_unmap(ctl, (unsigned long) item->rst->munmap_restorer);
|
|
|
|
|
|
|
|
xfree(ctl);
|
|
|
|
|
2013-10-01 11:21:34 +04:00
|
|
|
if (item->state == TASK_STOPPED)
|
|
|
|
kill(item->pid.real, SIGSTOP);
|
2013-09-23 14:33:34 +04:00
|
|
|
detach:
|
|
|
|
for (i = 0; i < item->nr_threads; i++) {
|
|
|
|
pid = item->threads[i].real;
|
|
|
|
if (pid < 0) {
|
|
|
|
BUG_ON(status >= 0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ptrace(PTRACE_DETACH, pid, NULL, 0))
|
|
|
|
pr_perror("Unable to execute %d", pid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-06 16:24:52 +04:00
|
|
|
static void ignore_kids(void)
|
|
|
|
{
|
|
|
|
struct sigaction sa = { .sa_handler = SIG_DFL };
|
|
|
|
|
|
|
|
if (sigaction(SIGCHLD, &sa, NULL) < 0)
|
|
|
|
pr_perror("Restoring CHLD sigaction failed");
|
|
|
|
}
|
|
|
|
|
2013-05-28 21:11:13 +04:00
|
|
|
static int restore_root_task(struct pstree_item *init)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2013-09-23 14:33:25 +04:00
|
|
|
int ret, fd;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-09-23 14:33:25 +04:00
|
|
|
fd = open("/proc", O_DIRECTORY | O_RDONLY);
|
|
|
|
if (fd < 0) {
|
|
|
|
pr_perror("Unable to open /proc");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = install_service_fd(CR_PROC_FD_OFF, fd);
|
|
|
|
close(fd);
|
|
|
|
if (ret < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
/*
|
|
|
|
* FIXME -- currently we assume that all the tasks live
|
|
|
|
* in the same set of namespaces. This is done to debug
|
|
|
|
* the ns contents dumping/restoring. Need to revisit
|
|
|
|
* this later.
|
|
|
|
*/
|
|
|
|
|
2012-10-09 19:57:15 +04:00
|
|
|
if (init->pid.virt == INIT_PID) {
|
2014-04-21 18:23:22 +04:00
|
|
|
if (!(root_ns_mask & CLONE_NEWPID)) {
|
2012-12-06 10:38:46 +03:00
|
|
|
pr_err("This process tree can only be restored "
|
|
|
|
"in a new pid namespace.\n"
|
2013-05-09 10:58:03 -07:00
|
|
|
"criu should be re-executed with the "
|
2012-12-06 10:38:46 +03:00
|
|
|
"\"--namespace pid\" option.\n");
|
2012-06-22 00:38:00 +04:00
|
|
|
return -1;
|
|
|
|
}
|
2014-04-21 18:23:22 +04:00
|
|
|
} else if (root_ns_mask & CLONE_NEWPID) {
|
2012-06-22 00:38:00 +04:00
|
|
|
pr_err("Can't restore pid namespace without the process init\n");
|
|
|
|
return -1;
|
2012-06-19 15:53:00 +04:00
|
|
|
}
|
|
|
|
|
2013-05-31 19:01:31 +04:00
|
|
|
futex_set(&task_entries->nr_in_progress,
|
|
|
|
stage_participants(CR_STATE_RESTORE_NS));
|
2012-06-22 00:38:00 +04:00
|
|
|
|
2014-06-30 20:30:44 +04:00
|
|
|
/*
|
|
|
|
* This means we're called from lib's criu_restore_child().
|
|
|
|
* In that case create the root task as the child one to+
|
|
|
|
* the caller. This is the only way to correctly restore the
|
|
|
|
* pdeath_sig of the root task. But also looks nice.
|
|
|
|
*/
|
|
|
|
if (opts.swrk_restore)
|
|
|
|
init->rst->clone_flags |= CLONE_PARENT;
|
|
|
|
|
2013-01-19 01:16:19 +04:00
|
|
|
ret = fork_with_pid(init);
|
2011-09-23 12:00:45 +04:00
|
|
|
if (ret < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2014-06-30 20:30:44 +04:00
|
|
|
if (opts.swrk_restore) {
|
|
|
|
if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
|
|
|
|
pr_perror("Can't attach to init");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-31 19:01:31 +04:00
|
|
|
pr_info("Wait until namespaces are created\n");
|
|
|
|
ret = restore_wait_inprogress_tasks();
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = run_scripts("setup-namespaces");
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2013-08-11 21:22:40 +04:00
|
|
|
timing_start(TIME_FORK);
|
|
|
|
|
2013-08-12 06:17:04 +04:00
|
|
|
ret = restore_switch_stage(CR_STATE_FORKING);
|
|
|
|
if (ret < 0)
|
2012-04-11 22:06:36 +04:00
|
|
|
goto out;
|
|
|
|
|
2013-08-11 21:22:40 +04:00
|
|
|
timing_stop(TIME_FORK);
|
|
|
|
|
2013-08-12 06:17:04 +04:00
|
|
|
ret = restore_switch_stage(CR_STATE_RESTORE);
|
2012-09-25 15:54:51 +04:00
|
|
|
if (ret < 0)
|
2013-08-20 15:17:30 +04:00
|
|
|
goto out_kill;
|
2012-03-26 23:11:00 +04:00
|
|
|
|
2013-08-12 06:17:04 +04:00
|
|
|
ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
|
2013-04-19 15:58:50 +04:00
|
|
|
if (ret < 0)
|
2013-08-20 15:17:30 +04:00
|
|
|
goto out_kill;
|
2013-04-19 15:58:50 +04:00
|
|
|
|
2014-03-31 22:00:00 +04:00
|
|
|
ret = run_scripts("post-restore");
|
|
|
|
if (ret != 0) {
|
|
|
|
pr_err("Aborting restore due to script ret code %d\n", ret);
|
|
|
|
timing_stop(TIME_RESTORE);
|
|
|
|
write_stats(RESTORE_STATS);
|
|
|
|
goto out_kill;
|
|
|
|
}
|
|
|
|
|
2013-07-15 18:14:12 +04:00
|
|
|
/* Unlock network before disabling repair mode on sockets */
|
2012-09-17 20:06:14 +04:00
|
|
|
network_unlock();
|
2013-07-15 18:14:12 +04:00
|
|
|
|
2014-08-06 16:24:52 +04:00
|
|
|
/*
|
|
|
|
* Stop getting sigchld, after we resume the tasks they
|
|
|
|
* may start to exit poking criu in vain.
|
|
|
|
*/
|
|
|
|
ignore_kids();
|
|
|
|
|
2013-07-15 18:14:12 +04:00
|
|
|
/*
|
|
|
|
* -------------------------------------------------------------
|
|
|
|
* Below this line nothing can fail, because network is unlocked
|
|
|
|
*/
|
|
|
|
|
2013-08-12 06:23:11 +04:00
|
|
|
ret = restore_switch_stage(CR_STATE_RESTORE_CREDS);
|
|
|
|
BUG_ON(ret);
|
2012-01-19 01:33:16 +03:00
|
|
|
|
2013-08-11 21:25:42 +04:00
|
|
|
timing_stop(TIME_RESTORE);
|
|
|
|
|
2014-06-30 20:30:44 +04:00
|
|
|
ret = attach_to_tasks(opts.swrk_restore);
|
2013-09-23 14:33:34 +04:00
|
|
|
|
2013-04-12 13:00:05 -07:00
|
|
|
pr_info("Restore finished successfully. Resuming tasks.\n");
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2013-09-23 14:33:34 +04:00
|
|
|
if (ret == 0)
|
|
|
|
ret = parasite_stop_on_syscall(task_entries->nr_threads, __NR_rt_sigreturn);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* finalize_restore() always detaches from processes and
|
|
|
|
* they continue run through sigreturn.
|
|
|
|
*/
|
|
|
|
finalize_restore(ret);
|
|
|
|
|
2013-08-11 13:00:45 +04:00
|
|
|
write_stats(RESTORE_STATS);
|
|
|
|
|
2014-03-22 20:14:00 +04:00
|
|
|
if (!opts.restore_detach && !opts.exec_cmd)
|
2012-01-18 23:24:37 +04:00
|
|
|
wait(NULL);
|
2013-08-12 06:33:21 +04:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
return 0;
|
2013-08-12 06:33:21 +04:00
|
|
|
|
2013-08-20 15:17:30 +04:00
|
|
|
out_kill:
|
|
|
|
/*
|
|
|
|
* The processes can be killed only when all of them have been created,
|
|
|
|
* otherwise an external proccesses can be killed.
|
|
|
|
*/
|
2014-04-21 18:23:22 +04:00
|
|
|
if (root_ns_mask & CLONE_NEWPID) {
|
2013-08-12 06:33:21 +04:00
|
|
|
/* Kill init */
|
|
|
|
if (root_item->pid.real > 0)
|
|
|
|
kill(root_item->pid.real, SIGKILL);
|
|
|
|
} else {
|
|
|
|
struct pstree_item *pi;
|
|
|
|
|
|
|
|
for_each_pstree_item(pi)
|
|
|
|
if (pi->pid.virt > 0)
|
|
|
|
kill(pi->pid.virt, SIGKILL);
|
|
|
|
}
|
|
|
|
|
2013-08-20 15:17:30 +04:00
|
|
|
out:
|
|
|
|
__restore_switch_stage(CR_STATE_FAIL);
|
2013-08-12 06:33:21 +04:00
|
|
|
pr_err("Restoring FAILED.\n");
|
|
|
|
return 1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
static int prepare_task_entries()
|
|
|
|
{
|
|
|
|
task_entries = mmap(NULL, TASK_ENTRIES_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
|
|
|
|
if (task_entries == MAP_FAILED) {
|
|
|
|
pr_perror("Can't map shmem");
|
|
|
|
return -1;
|
|
|
|
}
|
2012-12-04 17:22:45 +03:00
|
|
|
task_entries->nr_threads = 0;
|
2012-06-26 14:51:00 +04:00
|
|
|
task_entries->nr_tasks = 0;
|
2012-07-02 15:25:00 +04:00
|
|
|
task_entries->nr_helpers = 0;
|
2013-05-31 19:01:31 +04:00
|
|
|
futex_set(&task_entries->start, CR_STATE_RESTORE_NS);
|
2013-03-25 23:39:53 +04:00
|
|
|
mutex_init(&task_entries->zombie_lock);
|
2012-06-26 14:51:00 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-28 21:11:13 +04:00
|
|
|
int cr_restore_tasks(void)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2013-12-19 21:35:00 +04:00
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
if (cr_plugin_init())
|
2012-07-19 17:37:25 +04:00
|
|
|
return -1;
|
|
|
|
|
2013-12-19 21:35:00 +04:00
|
|
|
if (check_img_inventory() < 0)
|
|
|
|
goto err;
|
|
|
|
|
2013-08-11 13:00:45 +04:00
|
|
|
if (init_stats(RESTORE_STATS))
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2013-08-11 13:00:45 +04:00
|
|
|
|
2013-10-11 17:38:57 +04:00
|
|
|
if (kerndat_init_rst())
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2013-10-11 17:38:57 +04:00
|
|
|
|
2013-08-11 21:25:42 +04:00
|
|
|
timing_start(TIME_RESTORE);
|
|
|
|
|
2012-12-21 17:35:36 +04:00
|
|
|
if (cpu_init() < 0)
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2012-12-21 17:35:36 +04:00
|
|
|
|
2013-05-24 01:42:13 +04:00
|
|
|
if (vdso_init())
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2013-05-24 01:42:13 +04:00
|
|
|
|
2012-06-26 14:51:00 +04:00
|
|
|
if (prepare_task_entries() < 0)
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2012-06-26 14:51:00 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
if (prepare_pstree() < 0)
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2013-05-28 21:11:13 +04:00
|
|
|
if (crtools_prepare_shared() < 0)
|
2013-12-19 21:35:00 +04:00
|
|
|
goto err;
|
2012-09-17 20:06:06 +04:00
|
|
|
|
2014-08-06 16:25:08 +04:00
|
|
|
if (criu_signals_setup() < 0)
|
|
|
|
goto err;
|
|
|
|
|
2013-12-19 21:35:00 +04:00
|
|
|
ret = restore_root_task(root_item);
|
2014-05-08 16:55:53 +04:00
|
|
|
|
|
|
|
fini_cgroup();
|
2013-12-19 21:35:00 +04:00
|
|
|
err:
|
|
|
|
cr_plugin_fini();
|
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
|
|
|
|
struct list_head *self_vma_list, long vma_len)
|
2011-11-06 01:49:57 +04:00
|
|
|
{
|
2012-04-07 11:09:00 +04:00
|
|
|
struct vma_area *t_vma, *s_vma;
|
2012-04-05 14:08:11 +04:00
|
|
|
long prev_vma_end = 0;
|
2012-04-07 11:09:00 +04:00
|
|
|
struct vma_area end_vma;
|
2014-02-04 00:08:16 +04:00
|
|
|
VmaEntry end_e;
|
2012-04-07 11:09:00 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
end_vma.e = &end_e;
|
|
|
|
end_e.start = end_e.end = TASK_SIZE;
|
2012-12-21 18:58:14 +04:00
|
|
|
prev_vma_end = PAGE_SIZE * 0x10; /* CONFIG_LSM_MMAP_MIN_ADDR=65536 */
|
2011-11-06 01:49:57 +04:00
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
s_vma = list_first_entry(self_vma_list, struct vma_area, list);
|
|
|
|
t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
while (1) {
|
2014-02-04 00:08:16 +04:00
|
|
|
if (prev_vma_end + vma_len > s_vma->e->start) {
|
2012-04-07 11:09:00 +04:00
|
|
|
if (s_vma->list.next == self_vma_list) {
|
|
|
|
s_vma = &end_vma;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (s_vma == &end_vma)
|
|
|
|
break;
|
2014-02-04 00:08:16 +04:00
|
|
|
if (prev_vma_end < s_vma->e->end)
|
|
|
|
prev_vma_end = s_vma->e->end;
|
2012-04-07 11:09:00 +04:00
|
|
|
s_vma = list_entry(s_vma->list.next, struct vma_area, list);
|
|
|
|
continue;
|
|
|
|
}
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (prev_vma_end + vma_len > t_vma->e->start) {
|
2012-04-07 11:09:00 +04:00
|
|
|
if (t_vma->list.next == tgt_vma_list) {
|
|
|
|
t_vma = &end_vma;
|
|
|
|
continue;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-04-07 11:09:00 +04:00
|
|
|
if (t_vma == &end_vma)
|
|
|
|
break;
|
2014-02-04 00:08:16 +04:00
|
|
|
if (prev_vma_end < t_vma->e->end)
|
|
|
|
prev_vma_end = t_vma->e->end;
|
2012-04-07 11:09:00 +04:00
|
|
|
t_vma = list_entry(t_vma->list.next, struct vma_area, list);
|
|
|
|
continue;
|
2012-03-02 19:28:13 +04:00
|
|
|
}
|
|
|
|
|
2012-04-07 11:09:00 +04:00
|
|
|
return prev_vma_end;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-04-05 14:08:11 +04:00
|
|
|
|
|
|
|
return -1;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
static inline int timeval_valid(struct timeval *tv)
|
|
|
|
{
|
|
|
|
return (tv->tv_sec >= 0) && ((unsigned long)tv->tv_usec < USEC_PER_SEC);
|
|
|
|
}
|
|
|
|
|
2014-04-15 21:58:49 +04:00
|
|
|
static inline int decode_itimer(char *n, ItimerEntry *ie, struct itimerval *val)
|
2012-01-24 16:45:19 +04:00
|
|
|
{
|
|
|
|
if (ie->isec == 0 && ie->iusec == 0) {
|
|
|
|
memzero_p(val);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
val->it_interval.tv_sec = ie->isec;
|
|
|
|
val->it_interval.tv_usec = ie->iusec;
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_interval)) {
|
|
|
|
pr_err("Invalid timer interval\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ie->vsec == 0 && ie->vusec == 0) {
|
|
|
|
/*
|
|
|
|
* Remaining time was too short. Set it to
|
|
|
|
* interval to make the timer armed and work.
|
|
|
|
*/
|
|
|
|
val->it_value.tv_sec = ie->isec;
|
|
|
|
val->it_value.tv_usec = ie->iusec;
|
|
|
|
} else {
|
|
|
|
val->it_value.tv_sec = ie->vsec;
|
|
|
|
val->it_value.tv_usec = ie->vusec;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_value)) {
|
|
|
|
pr_err("Invalid timer value\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("Restored %s timer to %ld.%ld -> %ld.%ld\n", n,
|
|
|
|
val->it_value.tv_sec, val->it_value.tv_usec,
|
|
|
|
val->it_interval.tv_sec, val->it_interval.tv_usec);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
/*
|
|
|
|
* Legacy itimers restore from CR_FD_ITIMERS
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int prepare_itimers_from_fd(int pid, struct task_restore_args *args)
|
2012-01-24 16:45:19 +04:00
|
|
|
{
|
|
|
|
int fd, ret = -1;
|
2012-07-18 16:27:01 +04:00
|
|
|
ItimerEntry *ie;
|
2012-01-24 16:45:19 +04:00
|
|
|
|
2013-04-09 11:13:51 +04:00
|
|
|
fd = open_image(CR_FD_ITIMERS, O_RSTR, pid);
|
2012-01-24 16:45:19 +04:00
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
2013-08-23 21:47:31 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMER);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2014-04-15 21:58:49 +04:00
|
|
|
ret = decode_itimer("real", ie, &args->itimers[0]);
|
2012-07-18 16:27:01 +04:00
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2012-01-24 16:45:19 +04:00
|
|
|
|
2013-08-23 21:47:31 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMER);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2014-04-15 21:58:49 +04:00
|
|
|
ret = decode_itimer("virt", ie, &args->itimers[1]);
|
2012-07-18 16:27:01 +04:00
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
2013-08-23 21:47:31 +04:00
|
|
|
ret = pb_read_one(fd, &ie, PB_ITIMER);
|
2012-07-18 16:27:01 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2014-04-15 21:58:49 +04:00
|
|
|
ret = decode_itimer("prof", ie, &args->itimers[2]);
|
2012-07-18 16:27:01 +04:00
|
|
|
itimer_entry__free_unpacked(ie, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
out:
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-24 16:45:19 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
static int prepare_itimers(int pid, CoreEntry *core, struct task_restore_args *args)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
TaskTimersEntry *tte = core->tc->timers;
|
|
|
|
|
|
|
|
if (!tte)
|
|
|
|
return prepare_itimers_from_fd(pid, args);
|
|
|
|
|
|
|
|
ret |= decode_itimer("real", tte->real, &args->itimers[0]);
|
|
|
|
ret |= decode_itimer("virt", tte->virt, &args->itimers[1]);
|
|
|
|
ret |= decode_itimer("prof", tte->prof, &args->itimers[2]);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-06-27 23:32:24 +04:00
|
|
|
static inline int timespec_valid(struct timespec *ts)
|
|
|
|
{
|
|
|
|
return (ts->tv_sec >= 0) && ((unsigned long)ts->tv_nsec < NSEC_PER_SEC);
|
|
|
|
}
|
|
|
|
|
2014-04-15 21:59:05 +04:00
|
|
|
static inline int decode_posix_timer(PosixTimerEntry *pte,
|
2013-06-27 23:32:24 +04:00
|
|
|
struct restore_posix_timer *pt)
|
|
|
|
{
|
|
|
|
pt->val.it_interval.tv_sec = pte->isec;
|
|
|
|
pt->val.it_interval.tv_nsec = pte->insec;
|
|
|
|
|
|
|
|
if (!timespec_valid(&pt->val.it_interval)) {
|
|
|
|
pr_err("Invalid timer interval(posix)\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pte->vsec == 0 && pte->vnsec == 0) {
|
|
|
|
// Remaining time was too short. Set it to
|
|
|
|
// interval to make the timer armed and work.
|
|
|
|
pt->val.it_value.tv_sec = pte->isec;
|
|
|
|
pt->val.it_value.tv_nsec = pte->insec;
|
|
|
|
} else {
|
|
|
|
pt->val.it_value.tv_sec = pte->vsec;
|
|
|
|
pt->val.it_value.tv_nsec = pte->vnsec;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!timespec_valid(&pt->val.it_value)) {
|
|
|
|
pr_err("Invalid timer value(posix)\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pt->spt.it_id = pte->it_id;
|
|
|
|
pt->spt.clock_id = pte->clock_id;
|
|
|
|
pt->spt.si_signo = pte->si_signo;
|
|
|
|
pt->spt.it_sigev_notify = pte->it_sigev_notify;
|
|
|
|
pt->spt.sival_ptr = decode_pointer(pte->sival_ptr);
|
|
|
|
pt->overrun = pte->overrun;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-27 23:32:25 +04:00
|
|
|
static int cmp_posix_timer_proc_id(const void *p1, const void *p2)
|
|
|
|
{
|
|
|
|
return ((struct restore_posix_timer *)p1)->spt.it_id - ((struct restore_posix_timer *)p2)->spt.it_id;
|
|
|
|
}
|
|
|
|
|
2013-11-03 23:43:44 +04:00
|
|
|
static unsigned long posix_timers_cpos;
|
|
|
|
static unsigned int posix_timers_nr;
|
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
static void sort_posix_timers(void)
|
2013-06-27 23:32:25 +04:00
|
|
|
{
|
2014-04-15 21:59:55 +04:00
|
|
|
/*
|
|
|
|
* This is required for restorer's create_posix_timers(),
|
|
|
|
* it will probe them one-by-one for the desired ID, since
|
|
|
|
* kernel doesn't provide another API for timer creation
|
|
|
|
* with given ID.
|
|
|
|
*/
|
2014-04-15 21:59:05 +04:00
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
if (posix_timers_nr > 0)
|
|
|
|
qsort(rst_mem_remap_ptr(posix_timers_cpos, RM_PRIVATE),
|
|
|
|
posix_timers_nr,
|
|
|
|
sizeof(struct restore_posix_timer),
|
|
|
|
cmp_posix_timer_proc_id);
|
|
|
|
}
|
2014-04-15 21:59:05 +04:00
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
/*
|
|
|
|
* Legacy posix timers restoration from CR_FD_POSIX_TIMERS
|
|
|
|
*/
|
2014-04-15 21:59:05 +04:00
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
static int prepare_posix_timers_from_fd(int pid)
|
|
|
|
{
|
|
|
|
int fd = -1;
|
|
|
|
int ret = -1;
|
|
|
|
struct restore_posix_timer *t;
|
2014-04-15 21:59:05 +04:00
|
|
|
|
2013-06-27 23:32:25 +04:00
|
|
|
fd = open_image(CR_FD_POSIX_TIMERS, O_RSTR, pid);
|
2013-10-29 13:04:47 +04:00
|
|
|
if (fd < 0) {
|
|
|
|
if (errno == ENOENT) /* backward compatibility */
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return fd;
|
|
|
|
}
|
2013-06-27 23:32:25 +04:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
PosixTimerEntry *pte;
|
|
|
|
|
2013-08-23 21:47:31 +04:00
|
|
|
ret = pb_read_one_eof(fd, &pte, PB_POSIX_TIMER);
|
2014-04-15 21:59:38 +04:00
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
2013-06-27 23:32:25 +04:00
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
|
2013-07-05 15:00:08 +04:00
|
|
|
if (!t)
|
2014-04-15 21:59:38 +04:00
|
|
|
break;
|
2013-06-27 23:32:25 +04:00
|
|
|
|
2014-04-15 21:59:05 +04:00
|
|
|
ret = decode_posix_timer(pte, t);
|
2013-06-27 23:32:25 +04:00
|
|
|
if (ret < 0)
|
2014-04-15 21:59:38 +04:00
|
|
|
break;
|
2013-06-27 23:32:25 +04:00
|
|
|
|
|
|
|
posix_timer_entry__free_unpacked(pte, NULL);
|
2013-11-03 23:43:44 +04:00
|
|
|
posix_timers_nr++;
|
2013-06-27 23:32:25 +04:00
|
|
|
}
|
2014-04-15 21:59:38 +04:00
|
|
|
|
|
|
|
close_safe(&fd);
|
2014-04-15 21:59:55 +04:00
|
|
|
if (!ret)
|
|
|
|
sort_posix_timers();
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int prepare_posix_timers(int pid, CoreEntry *core)
|
|
|
|
{
|
|
|
|
int i, ret = -1;
|
|
|
|
TaskTimersEntry *tte = core->tc->timers;
|
|
|
|
struct restore_posix_timer *t;
|
2013-07-05 15:00:08 +04:00
|
|
|
|
2014-04-15 21:59:55 +04:00
|
|
|
posix_timers_cpos = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
|
|
|
|
if (!tte)
|
|
|
|
return prepare_posix_timers_from_fd(pid);
|
|
|
|
|
|
|
|
posix_timers_nr = tte->n_posix;
|
|
|
|
for (i = 0; i < posix_timers_nr; i++) {
|
|
|
|
t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
|
|
|
|
if (!t)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (decode_posix_timer(tte->posix[i], t))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
sort_posix_timers();
|
|
|
|
out:
|
2013-06-27 23:32:25 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
static inline int verify_cap_size(CredsEntry *ce)
|
|
|
|
{
|
|
|
|
return ((ce->n_cap_inh == CR_CAP_SIZE) && (ce->n_cap_eff == CR_CAP_SIZE) &&
|
|
|
|
(ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE));
|
|
|
|
}
|
|
|
|
|
2013-11-08 17:32:07 +04:00
|
|
|
static int prepare_creds(int pid, struct task_restore_args *args)
|
2012-01-27 21:43:32 +04:00
|
|
|
{
|
|
|
|
int fd, ret;
|
2012-07-19 12:35:25 +04:00
|
|
|
CredsEntry *ce;
|
2012-01-27 21:43:32 +04:00
|
|
|
|
2013-04-09 11:13:51 +04:00
|
|
|
fd = open_image(CR_FD_CREDS, O_RSTR, pid);
|
2012-01-27 21:43:32 +04:00
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
2012-08-07 02:42:58 +04:00
|
|
|
ret = pb_read_one(fd, &ce, PB_CREDS);
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-27 21:43:32 +04:00
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2013-03-27 16:32:46 +04:00
|
|
|
if (!verify_cap_size(ce)) {
|
|
|
|
pr_err("Caps size mismatch %d %d %d %d\n",
|
|
|
|
(int)ce->n_cap_inh, (int)ce->n_cap_eff,
|
|
|
|
(int)ce->n_cap_prm, (int)ce->n_cap_bnd);
|
2012-07-19 12:35:25 +04:00
|
|
|
return -1;
|
2013-03-27 16:32:46 +04:00
|
|
|
}
|
2012-07-19 12:35:25 +04:00
|
|
|
|
2013-09-28 15:48:44 +04:00
|
|
|
if (!may_restore(ce))
|
2013-10-02 17:11:17 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
args->creds = *ce;
|
|
|
|
args->creds.cap_inh = args->cap_inh;
|
|
|
|
memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
|
|
|
|
args->creds.cap_eff = args->cap_eff;
|
|
|
|
memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
|
|
|
|
args->creds.cap_prm = args->cap_prm;
|
|
|
|
memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
|
|
|
|
args->creds.cap_bnd = args->cap_bnd;
|
|
|
|
memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
|
|
|
|
|
2012-10-11 16:52:52 +04:00
|
|
|
/*
|
|
|
|
* We can set supplementary groups here. This won't affect any
|
|
|
|
* permission checks for us (we're still root) and will not be
|
|
|
|
* reset by subsequent creds changes in restorer.
|
|
|
|
*/
|
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(*ce->groups) != sizeof(gid_t));
|
|
|
|
if (setgroups(ce->n_groups, ce->groups) < 0) {
|
|
|
|
pr_perror("Can't set supplementary groups");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
creds_entry__free_unpacked(ce, NULL);
|
|
|
|
|
2013-10-12 00:03:25 +04:00
|
|
|
args->cap_last_cap = kern_last_cap;
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
/* XXX -- validate creds here? */
|
|
|
|
|
2012-07-19 12:35:25 +04:00
|
|
|
return 0;
|
2012-01-27 21:43:32 +04:00
|
|
|
}
|
|
|
|
|
2013-11-08 17:32:07 +04:00
|
|
|
static int prepare_mm(pid_t pid, struct task_restore_args *args)
|
2012-04-09 14:51:37 +04:00
|
|
|
{
|
2014-02-04 00:08:44 +04:00
|
|
|
int exe_fd, i, ret = -1;
|
|
|
|
MmEntry *mm = current->rst->mm;
|
2012-04-09 14:51:37 +04:00
|
|
|
|
2012-07-18 20:54:00 +04:00
|
|
|
args->mm = *mm;
|
|
|
|
args->mm.n_mm_saved_auxv = 0;
|
|
|
|
args->mm.mm_saved_auxv = NULL;
|
|
|
|
|
2012-10-29 19:54:12 +04:00
|
|
|
if (mm->n_mm_saved_auxv > AT_VECTOR_SIZE) {
|
2012-07-18 20:54:00 +04:00
|
|
|
pr_err("Image corrupted on pid %d\n", pid);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2013-01-09 17:40:33 +04:00
|
|
|
args->mm_saved_auxv_size = mm->n_mm_saved_auxv*sizeof(auxv_t);
|
|
|
|
for (i = 0; i < mm->n_mm_saved_auxv; ++i) {
|
|
|
|
args->mm_saved_auxv[i] = (auxv_t)mm->mm_saved_auxv[i];
|
|
|
|
}
|
2012-07-18 20:54:00 +04:00
|
|
|
|
2014-02-04 00:08:44 +04:00
|
|
|
exe_fd = open_reg_by_id(mm->exe_file_id);
|
2012-04-09 15:52:00 +04:00
|
|
|
if (exe_fd < 0)
|
2012-07-18 20:54:00 +04:00
|
|
|
goto out;
|
2012-04-09 15:52:00 +04:00
|
|
|
|
|
|
|
args->fd_exe_link = exe_fd;
|
2012-07-18 20:54:00 +04:00
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
return ret;
|
2012-04-09 14:51:37 +04:00
|
|
|
}
|
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
static void *restorer;
|
|
|
|
static unsigned long restorer_len;
|
|
|
|
|
|
|
|
static int prepare_restorer_blob(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We map anonymous mapping, not mremap the restorer itself later.
|
2013-05-09 10:58:04 -07:00
|
|
|
* Otherwise the restorer vma would be tied to criu binary which
|
2012-09-14 14:51:40 +04:00
|
|
|
* in turn will lead to set-exe-file prctl to fail with EBUSY.
|
|
|
|
*/
|
|
|
|
|
|
|
|
restorer_len = round_up(sizeof(restorer_blob), PAGE_SIZE);
|
|
|
|
restorer = mmap(NULL, restorer_len,
|
|
|
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
|
|
|
MAP_PRIVATE | MAP_ANON, 0, 0);
|
|
|
|
if (restorer == MAP_FAILED) {
|
2012-11-23 16:43:33 +04:00
|
|
|
pr_perror("Can't map restorer code");
|
2012-09-14 14:51:40 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(restorer, &restorer_blob, sizeof(restorer_blob));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int remap_restorer_blob(void *addr)
|
|
|
|
{
|
|
|
|
void *mem;
|
|
|
|
|
|
|
|
mem = mremap(restorer, restorer_len, restorer_len,
|
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, addr);
|
|
|
|
if (mem != addr) {
|
|
|
|
pr_perror("Can't remap restorer blob");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-10-17 00:23:25 +04:00
|
|
|
static int validate_sched_parm(struct rst_sched_param *sp)
|
|
|
|
{
|
|
|
|
if ((sp->nice < -20) || (sp->nice > 19))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
switch (sp->policy) {
|
|
|
|
case SCHED_RR:
|
|
|
|
case SCHED_FIFO:
|
|
|
|
return ((sp->prio > 0) && (sp->prio < 100));
|
|
|
|
case SCHED_IDLE:
|
|
|
|
case SCHED_OTHER:
|
|
|
|
case SCHED_BATCH:
|
|
|
|
return sp->prio == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc)
|
|
|
|
{
|
|
|
|
if (!tc->has_sched_policy) {
|
|
|
|
sp->policy = SCHED_OTHER;
|
|
|
|
sp->nice = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
sp->policy = tc->sched_policy;
|
|
|
|
sp->nice = tc->sched_nice;
|
|
|
|
sp->prio = tc->sched_prio;
|
|
|
|
|
|
|
|
if (!validate_sched_parm(sp)) {
|
|
|
|
pr_err("Inconsistent sched params received (%d.%d.%d)\n",
|
|
|
|
sp->policy, sp->nice, sp->prio);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-01-10 20:08:38 +04:00
|
|
|
static unsigned long decode_rlim(u_int64_t ival)
|
|
|
|
{
|
|
|
|
return ival == -1 ? RLIM_INFINITY : ival;
|
|
|
|
}
|
|
|
|
|
2013-11-03 23:40:12 +04:00
|
|
|
static unsigned long rlims_cpos;
|
|
|
|
static unsigned int rlims_nr;
|
|
|
|
|
2014-04-15 22:00:28 +04:00
|
|
|
/*
|
|
|
|
* Legacy rlimits restore from CR_FD_RLIMIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int prepare_rlimits_from_fd(int pid)
|
2013-01-10 20:08:38 +04:00
|
|
|
{
|
2013-07-09 00:21:31 +04:00
|
|
|
struct rlimit *r;
|
2013-01-10 20:08:38 +04:00
|
|
|
int fd, ret;
|
|
|
|
|
2014-03-13 14:30:48 +04:00
|
|
|
/*
|
|
|
|
* Old image -- read from the file.
|
|
|
|
*/
|
2014-03-14 17:38:00 +04:00
|
|
|
fd = open_image(CR_FD_RLIMIT, O_RSTR | O_OPT, pid);
|
2013-01-10 20:08:38 +04:00
|
|
|
if (fd < 0) {
|
2014-03-14 17:38:00 +04:00
|
|
|
if (fd == -ENOENT) {
|
2013-01-10 20:08:38 +04:00
|
|
|
pr_info("Skip rlimits for %d\n", pid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
RlimitEntry *re;
|
|
|
|
|
|
|
|
ret = pb_read_one_eof(fd, &re, PB_RLIMIT);
|
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
|
2013-07-09 00:21:31 +04:00
|
|
|
if (!r) {
|
|
|
|
pr_err("Can't allocate memory for resource %d\n",
|
2013-11-03 23:40:12 +04:00
|
|
|
rlims_nr);
|
2013-07-09 00:21:31 +04:00
|
|
|
return -1;
|
2013-01-10 20:08:38 +04:00
|
|
|
}
|
|
|
|
|
2013-07-09 00:21:31 +04:00
|
|
|
r->rlim_cur = decode_rlim(re->cur);
|
|
|
|
r->rlim_max = decode_rlim(re->max);
|
|
|
|
if (r->rlim_cur > r->rlim_max) {
|
2013-11-03 23:40:12 +04:00
|
|
|
pr_err("Can't restore cur > max for %d.%d\n",
|
|
|
|
pid, rlims_nr);
|
2013-07-09 00:21:31 +04:00
|
|
|
r->rlim_cur = r->rlim_max;
|
2013-01-10 20:08:38 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
rlimit_entry__free_unpacked(re, NULL);
|
|
|
|
|
2013-11-03 23:40:12 +04:00
|
|
|
rlims_nr++;
|
2013-01-10 20:08:38 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
close(fd);
|
2013-11-03 23:40:12 +04:00
|
|
|
|
|
|
|
return 0;
|
2013-01-10 20:08:38 +04:00
|
|
|
}
|
|
|
|
|
2014-04-15 22:00:28 +04:00
|
|
|
static int prepare_rlimits(int pid, CoreEntry *core)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
TaskRlimitsEntry *rls = core->tc->rlimits;
|
|
|
|
struct rlimit *r;
|
|
|
|
|
|
|
|
rlims_cpos = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
|
|
|
|
if (!rls)
|
|
|
|
return prepare_rlimits_from_fd(pid);
|
|
|
|
|
|
|
|
for (i = 0; i < rls->n_rlimits; i++) {
|
|
|
|
r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
|
|
|
|
if (!r) {
|
|
|
|
pr_err("Can't allocate memory for resource %d\n", i);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->rlim_cur = decode_rlim(rls->rlimits[i]->cur);
|
|
|
|
r->rlim_max = decode_rlim(rls->rlimits[i]->max);
|
|
|
|
|
|
|
|
if (r->rlim_cur > r->rlim_max) {
|
|
|
|
pr_warn("Can't restore cur > max for %d.%d\n", pid, i);
|
|
|
|
r->rlim_cur = r->rlim_max;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
rlims_nr = rls->n_rlimits;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-03 23:47:51 +04:00
|
|
|
static int open_signal_image(int type, pid_t pid, unsigned int *nr)
|
2013-03-25 23:39:49 +04:00
|
|
|
{
|
2013-07-05 15:02:46 +04:00
|
|
|
int fd, ret;
|
2013-03-25 23:39:49 +04:00
|
|
|
|
2014-03-14 17:38:00 +04:00
|
|
|
fd = open_image(type, O_RSTR | O_OPT, pid);
|
2013-10-29 13:04:47 +04:00
|
|
|
if (fd < 0) {
|
2014-03-14 17:38:00 +04:00
|
|
|
if (fd == -ENOENT) /* backward compatibility */
|
2013-10-29 13:04:47 +04:00
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
2013-03-25 23:39:49 +04:00
|
|
|
|
2013-07-05 15:02:46 +04:00
|
|
|
*nr = 0;
|
2013-03-25 23:39:49 +04:00
|
|
|
while (1) {
|
|
|
|
SiginfoEntry *sie;
|
2013-07-05 15:02:46 +04:00
|
|
|
siginfo_t *info, *t;
|
2013-03-25 23:39:49 +04:00
|
|
|
|
|
|
|
ret = pb_read_one_eof(fd, &sie, PB_SIGINFO);
|
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
|
|
|
if (sie->siginfo.len != sizeof(siginfo_t)) {
|
|
|
|
pr_err("Unknown image format");
|
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
info = (siginfo_t *) sie->siginfo.data;
|
2013-11-02 01:05:13 +04:00
|
|
|
t = rst_mem_alloc(sizeof(siginfo_t), RM_PRIVATE);
|
2013-07-05 15:02:46 +04:00
|
|
|
if (!t) {
|
|
|
|
ret = -1;
|
|
|
|
break;
|
2013-03-25 23:39:49 +04:00
|
|
|
}
|
|
|
|
|
2013-07-05 15:02:46 +04:00
|
|
|
memcpy(t, info, sizeof(*info));
|
2013-03-25 23:39:49 +04:00
|
|
|
(*nr)++;
|
|
|
|
|
|
|
|
siginfo_entry__free_unpacked(sie, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
|
2013-07-05 15:02:46 +04:00
|
|
|
return ret ? : 0;
|
2013-03-25 23:39:49 +04:00
|
|
|
}
|
|
|
|
|
2013-11-03 23:47:51 +04:00
|
|
|
static unsigned long siginfo_cpos;
|
|
|
|
static unsigned int siginfo_nr, *siginfo_priv_nr;
|
|
|
|
|
|
|
|
static int prepare_signals(int pid)
|
|
|
|
{
|
|
|
|
int ret = -1, i;
|
|
|
|
|
|
|
|
siginfo_cpos = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
siginfo_priv_nr = xmalloc(sizeof(int) * current->nr_threads);
|
|
|
|
if (siginfo_priv_nr == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = open_signal_image(CR_FD_SIGNAL, pid, &siginfo_nr);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
for (i = 0; i < current->nr_threads; i++) {
|
|
|
|
ret = open_signal_image(CR_FD_PSIGNAL,
|
|
|
|
current->threads[i].virt, &siginfo_priv_nr[i]);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-12-25 22:43:14 +04:00
|
|
|
extern void __gcov_flush(void) __attribute__((weak));
|
|
|
|
void __gcov_flush(void) {}
|
|
|
|
|
2012-11-20 20:39:08 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, CoreEntry *core)
|
2011-10-24 22:23:06 +04:00
|
|
|
{
|
2012-03-02 19:29:35 +04:00
|
|
|
void *mem = MAP_FAILED;
|
2011-11-12 19:26:40 +04:00
|
|
|
void *restore_thread_exec_start;
|
|
|
|
void *restore_task_exec_start;
|
2011-11-16 18:19:24 +04:00
|
|
|
|
|
|
|
long new_sp, exec_mem_hint;
|
2011-10-25 21:25:42 +04:00
|
|
|
long ret;
|
2013-09-23 14:33:35 +04:00
|
|
|
|
2012-11-01 14:44:14 +04:00
|
|
|
long restore_bootstrap_len;
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2013-11-08 17:32:07 +04:00
|
|
|
struct task_restore_args *task_args;
|
2011-11-16 18:19:24 +04:00
|
|
|
struct thread_restore_args *thread_args;
|
2013-11-08 17:29:55 +04:00
|
|
|
long args_len;
|
2013-07-05 15:02:46 +04:00
|
|
|
|
2013-10-29 12:57:54 +04:00
|
|
|
struct vma_area *vma;
|
|
|
|
unsigned long tgt_vmas;
|
|
|
|
|
2013-07-05 15:04:57 +04:00
|
|
|
void *tcp_socks_mem;
|
|
|
|
unsigned long tcp_socks;
|
|
|
|
|
2014-06-30 21:58:05 +04:00
|
|
|
void *timerfd_mem;
|
|
|
|
unsigned long timerfd_mem_cpos;
|
|
|
|
|
2014-05-26 11:50:14 +04:00
|
|
|
#ifdef CONFIG_VDSO
|
2013-05-24 01:42:14 +04:00
|
|
|
unsigned long vdso_rt_size = 0;
|
|
|
|
unsigned long vdso_rt_delta = 0;
|
2014-05-26 11:50:14 +04:00
|
|
|
#endif
|
2013-05-24 01:42:14 +04:00
|
|
|
|
2013-03-01 20:11:51 +04:00
|
|
|
struct vm_area_list self_vmas;
|
2014-02-03 15:12:22 +04:00
|
|
|
struct vm_area_list *vmas = ¤t->rst->vmas;
|
2012-03-02 19:30:23 +04:00
|
|
|
int i;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("Restore via sigreturn\n");
|
2012-01-01 13:12:37 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
/* pr_info_vma_list(&self_vma_list); */
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2013-11-08 17:32:07 +04:00
|
|
|
BUILD_BUG_ON(sizeof(struct task_restore_args) & 1);
|
2011-11-12 19:26:40 +04:00
|
|
|
BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1);
|
2012-01-16 23:52:15 +03:00
|
|
|
BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2013-11-08 17:29:55 +04:00
|
|
|
args_len = round_up(sizeof(*task_args) + sizeof(*thread_args) * current->nr_threads, PAGE_SIZE);
|
2012-05-02 14:42:00 +04:00
|
|
|
pr_info("%d threads require %ldK of memory\n",
|
2013-11-08 17:29:55 +04:00
|
|
|
current->nr_threads, KBYTES(args_len));
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2013-11-03 23:51:45 +04:00
|
|
|
/*
|
|
|
|
* Copy VMAs to private rst memory so that it's able to
|
|
|
|
* walk them and m(un|re)map.
|
|
|
|
*/
|
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
tgt_vmas = rst_mem_cpos(RM_PRIVATE);
|
2014-02-03 15:12:22 +04:00
|
|
|
list_for_each_entry(vma, &vmas->h, list) {
|
2013-10-29 12:57:54 +04:00
|
|
|
VmaEntry *vme;
|
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
vme = rst_mem_alloc(sizeof(*vme), RM_PRIVATE);
|
2013-10-29 12:57:54 +04:00
|
|
|
if (!vme)
|
2013-11-03 17:23:31 +04:00
|
|
|
goto err_nv;
|
2013-10-29 12:57:54 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
*vme = *vma->e;
|
2013-11-22 18:19:08 +04:00
|
|
|
|
2014-02-04 00:08:16 +04:00
|
|
|
if (vma_priv(vma->e))
|
2013-11-22 18:19:08 +04:00
|
|
|
vma_premmaped_start(vme) = vma->premmaped_addr;
|
2013-10-29 12:57:54 +04:00
|
|
|
}
|
|
|
|
|
2013-11-03 23:51:45 +04:00
|
|
|
/*
|
|
|
|
* Copy tcp sockets fds to rst memory -- restorer will
|
|
|
|
* turn repair off before going sigreturn
|
|
|
|
*/
|
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
tcp_socks = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
tcp_socks_mem = rst_mem_alloc(rst_tcp_socks_len(), RM_PRIVATE);
|
2013-07-05 15:04:57 +04:00
|
|
|
if (!tcp_socks_mem)
|
2013-11-03 17:23:31 +04:00
|
|
|
goto err_nv;
|
2013-07-05 15:04:57 +04:00
|
|
|
|
|
|
|
memcpy(tcp_socks_mem, rst_tcp_socks, rst_tcp_socks_len());
|
|
|
|
|
2014-06-30 21:58:05 +04:00
|
|
|
/*
|
|
|
|
* Copy timerfd params for restorer args, we need to proceed
|
|
|
|
* timer setting at the very late.
|
|
|
|
*/
|
|
|
|
timerfd_mem_cpos = rst_mem_cpos(RM_PRIVATE);
|
|
|
|
timerfd_mem = rst_mem_alloc(rst_timerfd_len(), RM_PRIVATE);
|
|
|
|
if (!timerfd_mem)
|
|
|
|
goto err_nv;
|
|
|
|
memcpy(timerfd_mem, rst_timerfd, rst_timerfd_len());
|
|
|
|
|
2013-11-03 17:23:31 +04:00
|
|
|
/*
|
|
|
|
* We're about to search for free VM area and inject the restorer blob
|
|
|
|
* into it. No irrelevent mmaps/mremaps beyond this point, otherwise
|
|
|
|
* this unwanted mapping might get overlapped by the restorer.
|
|
|
|
*/
|
|
|
|
|
2014-02-07 13:32:21 +04:00
|
|
|
ret = parse_self_maps_lite(&self_vmas);
|
2013-11-03 17:23:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2013-11-08 17:29:55 +04:00
|
|
|
restore_bootstrap_len = restorer_len + args_len +
|
2013-11-02 01:06:31 +04:00
|
|
|
TASK_ENTRIES_SIZE +
|
2013-11-02 01:05:13 +04:00
|
|
|
rst_mem_remap_size();
|
2012-11-27 22:16:00 +03:00
|
|
|
|
2014-05-26 11:50:14 +04:00
|
|
|
#ifdef CONFIG_VDSO
|
2013-05-24 01:42:14 +04:00
|
|
|
/*
|
2014-06-20 19:35:08 +04:00
|
|
|
* Figure out how much memory runtime vdso and vvar will need.
|
2013-05-24 01:42:14 +04:00
|
|
|
*/
|
2014-07-18 18:44:51 +04:00
|
|
|
vdso_rt_size = vdso_vma_size(&vdso_sym_rt);
|
|
|
|
if (vdso_rt_size) {
|
2013-05-24 01:42:14 +04:00
|
|
|
vdso_rt_delta = ALIGN(restore_bootstrap_len, PAGE_SIZE) - restore_bootstrap_len;
|
2014-07-18 18:44:51 +04:00
|
|
|
vdso_rt_size += vdso_rt_delta;
|
2014-06-20 19:35:08 +04:00
|
|
|
if (vvar_vma_size(&vdso_sym_rt))
|
|
|
|
vdso_rt_size += ALIGN(vvar_vma_size(&vdso_sym_rt), PAGE_SIZE);
|
2013-05-24 01:42:14 +04:00
|
|
|
}
|
|
|
|
|
2013-09-23 14:33:35 +04:00
|
|
|
restore_bootstrap_len += vdso_rt_size;
|
2014-05-26 11:50:14 +04:00
|
|
|
#endif
|
2013-09-23 14:33:35 +04:00
|
|
|
|
2012-11-27 22:16:00 +03:00
|
|
|
/*
|
|
|
|
* Restorer is a blob (code + args) that will get mapped in some
|
|
|
|
* place, that should _not_ intersect with both -- current mappings
|
|
|
|
* and mappings of the task we're restoring here. The subsequent
|
|
|
|
* call finds the start address for the restorer.
|
|
|
|
*
|
|
|
|
* After the start address is found we populate it with the restorer
|
|
|
|
* parts one by one (some are remap-ed, some are mmap-ed and copied
|
|
|
|
* or inited from scratch).
|
|
|
|
*/
|
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
exec_mem_hint = restorer_get_vma_hint(pid, &vmas->h, &self_vmas.h,
|
2013-09-23 14:33:35 +04:00
|
|
|
restore_bootstrap_len);
|
2011-11-16 18:19:24 +04:00
|
|
|
if (exec_mem_hint == -1) {
|
2012-01-31 15:31:22 +04:00
|
|
|
pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
|
2013-09-23 14:33:35 +04:00
|
|
|
restore_bootstrap_len);
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-11-16 18:19:24 +04:00
|
|
|
}
|
2011-10-27 00:57:01 +04:00
|
|
|
|
2012-04-13 19:44:00 +04:00
|
|
|
pr_info("Found bootstrap VMA hint at: 0x%lx (needs ~%ldK)\n", exec_mem_hint,
|
2013-09-23 14:33:35 +04:00
|
|
|
KBYTES(restore_bootstrap_len));
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
ret = remap_restorer_blob((void *)exec_mem_hint);
|
|
|
|
if (ret < 0)
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-10-26 11:16:00 +04:00
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Prepare a memory map for restorer. Note a thread space
|
|
|
|
* might be completely unused so it's here just for convenience.
|
2011-10-26 11:16:00 +04:00
|
|
|
*/
|
2012-11-13 20:15:13 +03:00
|
|
|
restore_thread_exec_start = restorer_sym(exec_mem_hint, __export_restore_thread);
|
|
|
|
restore_task_exec_start = restorer_sym(exec_mem_hint, __export_restore_task);
|
2013-09-23 14:33:35 +04:00
|
|
|
current->rst->munmap_restorer = restorer_sym(exec_mem_hint, __export_unmap);
|
2012-09-13 03:01:48 +04:00
|
|
|
|
2012-09-14 14:51:40 +04:00
|
|
|
exec_mem_hint += restorer_len;
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2012-09-13 04:10:48 +04:00
|
|
|
/* VMA we need to run task_restore code */
|
2013-11-08 17:29:55 +04:00
|
|
|
mem = mmap((void *)exec_mem_hint, args_len,
|
2012-09-13 04:10:48 +04:00
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
|
|
|
|
if (mem != (void *)exec_mem_hint) {
|
|
|
|
pr_err("Can't mmap section for restore code\n");
|
|
|
|
goto err;
|
|
|
|
}
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2013-11-08 17:57:04 +04:00
|
|
|
exec_mem_hint -= restorer_len;
|
|
|
|
|
2013-11-08 17:29:55 +04:00
|
|
|
memzero(mem, args_len);
|
2012-09-13 04:10:48 +04:00
|
|
|
task_args = mem;
|
2013-11-08 17:29:55 +04:00
|
|
|
thread_args = (struct thread_restore_args *)(task_args + 1);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2011-10-26 00:30:41 +04:00
|
|
|
/*
|
2012-01-01 13:10:12 +04:00
|
|
|
* Get a reference to shared memory area which is
|
|
|
|
* used to signal if shmem restoration complete
|
|
|
|
* from low-level restore code.
|
|
|
|
*
|
|
|
|
* This shmem area is mapped right after the whole area of
|
|
|
|
* sigreturn rt code. Note we didn't allocated it before
|
|
|
|
* but this area is taken into account for 'hint' memory
|
|
|
|
* address.
|
2011-10-26 00:30:41 +04:00
|
|
|
*/
|
2012-03-02 19:29:35 +04:00
|
|
|
|
2013-11-08 17:29:55 +04:00
|
|
|
mem += args_len;
|
2012-03-02 19:29:35 +04:00
|
|
|
ret = shmem_remap(task_entries, mem, TASK_ENTRIES_SIZE);
|
2012-01-16 23:52:15 +03:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
2012-03-02 19:30:23 +04:00
|
|
|
mem += TASK_ENTRIES_SIZE;
|
|
|
|
|
2013-11-08 17:57:04 +04:00
|
|
|
if (rst_mem_remap(mem))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
task_args->task_entries = mem - TASK_ENTRIES_SIZE;
|
2012-03-27 16:34:00 +04:00
|
|
|
|
2013-11-02 01:05:13 +04:00
|
|
|
task_args->rst_mem = mem;
|
|
|
|
task_args->rst_mem_size = rst_mem_remap_size();
|
2013-11-08 17:57:04 +04:00
|
|
|
|
|
|
|
task_args->bootstrap_start = (void *)exec_mem_hint;
|
|
|
|
task_args->bootstrap_len = restore_bootstrap_len;
|
2014-05-26 11:50:14 +04:00
|
|
|
#ifdef CONFIG_VDSO
|
2013-11-08 17:57:04 +04:00
|
|
|
task_args->vdso_rt_size = vdso_rt_size;
|
2014-05-26 11:50:14 +04:00
|
|
|
#endif
|
2013-11-08 17:57:04 +04:00
|
|
|
|
|
|
|
task_args->premmapped_addr = (unsigned long) current->rst->premmapped_addr;
|
|
|
|
task_args->premmapped_len = current->rst->premmapped_len;
|
2012-09-17 20:02:57 +04:00
|
|
|
|
2013-11-02 01:06:31 +04:00
|
|
|
task_args->shmems = rst_mem_remap_ptr(rst_shmems, RM_SHREMAP);
|
|
|
|
task_args->nr_shmems = nr_shmems;
|
|
|
|
|
2014-02-03 15:12:22 +04:00
|
|
|
task_args->nr_vmas = vmas->nr;
|
2013-11-02 01:05:13 +04:00
|
|
|
task_args->tgt_vmas = rst_mem_remap_ptr(tgt_vmas, RM_PRIVATE);
|
2013-10-29 12:57:54 +04:00
|
|
|
|
2013-07-05 15:00:08 +04:00
|
|
|
task_args->timer_n = posix_timers_nr;
|
2013-11-03 23:43:44 +04:00
|
|
|
task_args->posix_timers = rst_mem_remap_ptr(posix_timers_cpos, RM_PRIVATE);
|
2013-07-05 15:00:08 +04:00
|
|
|
|
2014-06-30 21:58:05 +04:00
|
|
|
task_args->timerfd_n = rst_timerfd_nr;
|
|
|
|
task_args->timerfd = rst_mem_remap_ptr(timerfd_mem_cpos, RM_PRIVATE);
|
|
|
|
|
2013-07-05 15:02:46 +04:00
|
|
|
task_args->siginfo_nr = siginfo_nr;
|
2013-11-03 23:47:51 +04:00
|
|
|
task_args->siginfo = rst_mem_remap_ptr(siginfo_cpos, RM_PRIVATE);
|
2013-07-05 15:02:46 +04:00
|
|
|
|
2013-07-05 15:04:57 +04:00
|
|
|
task_args->tcp_socks_nr = rst_tcp_socks_nr;
|
2013-11-02 01:05:13 +04:00
|
|
|
task_args->tcp_socks = rst_mem_remap_ptr(tcp_socks, RM_PRIVATE);
|
2013-07-05 14:56:45 +04:00
|
|
|
|
2012-01-01 13:10:12 +04:00
|
|
|
/*
|
|
|
|
* Arguments for task restoration.
|
|
|
|
*/
|
2012-07-19 13:23:01 +04:00
|
|
|
|
2013-01-14 11:25:50 +04:00
|
|
|
BUG_ON(core->mtype != CORE_ENTRY__MARCH);
|
2012-07-19 13:23:01 +04:00
|
|
|
|
2012-03-01 18:52:42 +04:00
|
|
|
task_args->logfd = log_get_fd();
|
2012-09-03 14:44:09 +04:00
|
|
|
task_args->loglevel = log_get_loglevel();
|
2012-01-19 01:33:19 +03:00
|
|
|
task_args->sigchld_act = sigchld_act;
|
2011-11-18 16:09:01 +04:00
|
|
|
|
2012-07-19 13:23:01 +04:00
|
|
|
strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
|
|
|
|
|
2013-11-03 23:40:12 +04:00
|
|
|
task_args->nr_rlim = rlims_nr;
|
|
|
|
if (rlims_nr)
|
|
|
|
task_args->rlims = rst_mem_remap_ptr(rlims_cpos, RM_PRIVATE);
|
2013-01-10 20:08:38 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Fill up per-thread data.
|
|
|
|
*/
|
2012-09-05 19:52:55 +04:00
|
|
|
for (i = 0; i < current->nr_threads; i++) {
|
2012-07-19 13:23:01 +04:00
|
|
|
int fd_core;
|
2012-12-21 18:58:16 +04:00
|
|
|
CoreEntry *tcore;
|
2013-05-24 16:20:19 +04:00
|
|
|
struct rt_sigframe *sigframe;
|
2012-12-21 18:58:23 +04:00
|
|
|
|
2012-09-05 19:52:55 +04:00
|
|
|
thread_args[i].pid = current->threads[i].virt;
|
2013-03-25 23:39:49 +04:00
|
|
|
thread_args[i].siginfo_nr = siginfo_priv_nr[i];
|
2013-11-03 23:47:51 +04:00
|
|
|
thread_args[i].siginfo = rst_mem_remap_ptr(siginfo_cpos, RM_PRIVATE);
|
2013-07-05 15:02:46 +04:00
|
|
|
thread_args[i].siginfo += siginfo_nr;
|
|
|
|
siginfo_nr += thread_args[i].siginfo_nr;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/* skip self */
|
2012-12-21 18:58:23 +04:00
|
|
|
if (thread_args[i].pid == pid) {
|
|
|
|
task_args->t = thread_args + i;
|
|
|
|
tcore = core;
|
|
|
|
} else {
|
2013-04-09 11:13:51 +04:00
|
|
|
fd_core = open_image(CR_FD_CORE, O_RSTR, thread_args[i].pid);
|
2012-12-21 18:58:23 +04:00
|
|
|
if (fd_core < 0) {
|
|
|
|
pr_err("Can't open core data for thread %d\n",
|
|
|
|
thread_args[i].pid);
|
|
|
|
goto err;
|
|
|
|
}
|
2012-01-16 00:54:43 +04:00
|
|
|
|
2012-12-21 18:58:23 +04:00
|
|
|
ret = pb_read_one(fd_core, &tcore, PB_CORE);
|
|
|
|
close(fd_core);
|
2012-07-19 13:23:01 +04:00
|
|
|
}
|
|
|
|
|
2012-12-21 18:58:23 +04:00
|
|
|
if ((tcore->tc || tcore->ids) && thread_args[i].pid != pid) {
|
2012-07-19 13:23:01 +04:00
|
|
|
pr_err("Thread has optional fields present %d\n",
|
|
|
|
thread_args[i].pid);
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Can't read core data for thread %d\n",
|
|
|
|
thread_args[i].pid);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-10-30 10:04:37 +03:00
|
|
|
thread_args[i].ta = task_args;
|
2013-01-14 17:19:06 +04:00
|
|
|
thread_args[i].gpregs = *CORE_THREAD_ARCH_INFO(tcore)->gpregs;
|
|
|
|
thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
|
2013-01-09 18:48:00 +04:00
|
|
|
core_get_tls(tcore, &thread_args[i].tls);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-12-21 18:58:16 +04:00
|
|
|
if (tcore->thread_core) {
|
2012-08-10 20:29:01 +04:00
|
|
|
thread_args[i].has_futex = true;
|
2012-12-21 18:58:16 +04:00
|
|
|
thread_args[i].futex_rla = tcore->thread_core->futex_rla;
|
|
|
|
thread_args[i].futex_rla_len = tcore->thread_core->futex_rla_len;
|
2014-06-27 19:26:52 +04:00
|
|
|
thread_args[i].pdeath_sig = tcore->thread_core->pdeath_sig;
|
|
|
|
if (tcore->thread_core->pdeath_sig > _KNSIG) {
|
|
|
|
pr_err("Pdeath signal is too big\n");
|
|
|
|
goto err;
|
|
|
|
}
|
2012-10-17 00:23:25 +04:00
|
|
|
|
2012-12-21 18:58:16 +04:00
|
|
|
ret = prep_sched_info(&thread_args[i].sp, tcore->thread_core);
|
2012-10-17 00:23:25 +04:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2012-08-10 20:29:01 +04:00
|
|
|
}
|
|
|
|
|
2013-05-24 16:20:19 +04:00
|
|
|
sigframe = (struct rt_sigframe *)thread_args[i].mem_zone.rt_sigframe;
|
|
|
|
|
|
|
|
if (construct_sigframe(sigframe, sigframe, tcore))
|
2012-12-21 17:35:43 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-12-21 18:58:23 +04:00
|
|
|
if (thread_args[i].pid != pid)
|
|
|
|
core_entry__free_unpacked(tcore, NULL);
|
2011-11-17 00:59:08 +04:00
|
|
|
|
2013-10-29 12:19:13 +04:00
|
|
|
pr_info("Thread %4d stack %8p rt_sigframe %8p\n",
|
2012-01-31 15:31:22 +04:00
|
|
|
i, thread_args[i].mem_zone.stack,
|
2011-11-16 18:19:24 +04:00
|
|
|
thread_args[i].mem_zone.rt_sigframe);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-05-26 11:50:14 +04:00
|
|
|
#ifdef CONFIG_VDSO
|
2013-05-24 01:42:14 +04:00
|
|
|
/*
|
|
|
|
* Restorer needs own copy of vdso parameters. Runtime
|
|
|
|
* vdso must be kept non intersecting with anything else,
|
|
|
|
* since we need it being accessible even when own
|
|
|
|
* self-vmas are unmaped.
|
|
|
|
*/
|
2013-11-02 01:05:13 +04:00
|
|
|
mem += rst_mem_remap_size();
|
2013-05-24 01:42:14 +04:00
|
|
|
task_args->vdso_rt_parked_at = (unsigned long)mem + vdso_rt_delta;
|
|
|
|
task_args->vdso_sym_rt = vdso_sym_rt;
|
2014-05-26 11:50:14 +04:00
|
|
|
#endif
|
2013-05-24 01:42:14 +04:00
|
|
|
|
2013-10-29 12:28:11 +04:00
|
|
|
new_sp = restorer_stack(task_args->t);
|
2012-12-21 18:58:16 +04:00
|
|
|
|
2014-04-15 21:58:49 +04:00
|
|
|
ret = prepare_itimers(pid, core, task_args);
|
2012-12-21 18:58:16 +04:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
ret = prepare_creds(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
ret = prepare_mm(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2014-07-31 02:00:00 +04:00
|
|
|
/* No longer need it */
|
|
|
|
core_entry__free_unpacked(core, NULL);
|
|
|
|
|
2013-09-25 13:53:02 +04:00
|
|
|
/*
|
|
|
|
* Open the last_pid syscl early, since restorer (maybe) lives
|
|
|
|
* in chroot and has no access to "/proc/..." paths.
|
|
|
|
*/
|
2014-06-05 20:16:41 +04:00
|
|
|
task_args->fd_last_pid = open_proc_rw(PROC_GEN, LAST_PID_PATH);
|
2013-09-25 13:53:02 +04:00
|
|
|
if (task_args->fd_last_pid < 0) {
|
|
|
|
pr_perror("Can't open sys.ns_last_pid");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-12-21 18:58:16 +04:00
|
|
|
/*
|
|
|
|
* Now prepare run-time data for threads restore.
|
|
|
|
*/
|
|
|
|
task_args->nr_threads = current->nr_threads;
|
2013-03-25 23:39:53 +04:00
|
|
|
task_args->nr_zombies = current->rst->nr_zombies;
|
2012-12-21 18:58:16 +04:00
|
|
|
task_args->clone_restore_fn = (void *)restore_thread_exec_start;
|
|
|
|
task_args->thread_args = thread_args;
|
|
|
|
|
2013-09-25 13:46:01 +04:00
|
|
|
/*
|
|
|
|
* Make root and cwd restore _that_ late not to break any
|
|
|
|
* attempts to open files by paths above (e.g. /proc).
|
|
|
|
*/
|
|
|
|
|
2014-07-03 19:07:44 +04:00
|
|
|
if (restore_fs(current))
|
2013-09-25 13:46:01 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-03-16 17:24:00 +04:00
|
|
|
close_image_dir();
|
2014-04-21 18:23:30 +04:00
|
|
|
close_proc();
|
2014-04-16 09:04:27 +04:00
|
|
|
close_service_fd(ROOT_FD_OFF);
|
2012-03-16 17:24:00 +04:00
|
|
|
|
2012-12-25 22:43:14 +04:00
|
|
|
__gcov_flush();
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
pr_info("task_args: %p\n"
|
|
|
|
"task_args->pid: %d\n"
|
|
|
|
"task_args->nr_threads: %d\n"
|
|
|
|
"task_args->clone_restore_fn: %p\n"
|
|
|
|
"task_args->thread_args: %p\n",
|
2012-12-21 18:58:23 +04:00
|
|
|
task_args, task_args->t->pid,
|
2012-03-02 19:30:23 +04:00
|
|
|
task_args->nr_threads,
|
|
|
|
task_args->clone_restore_fn,
|
2011-11-16 18:19:24 +04:00
|
|
|
task_args->thread_args);
|
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/*
|
2013-04-12 13:00:06 -07:00
|
|
|
* An indirect call to task_restore, note it never returns
|
|
|
|
* and restoring core is extremely destructive.
|
2011-10-26 17:35:50 +04:00
|
|
|
*/
|
2013-01-09 17:39:23 +04:00
|
|
|
|
|
|
|
JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, task_args);
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2011-10-26 22:50:46 +04:00
|
|
|
err:
|
2013-03-01 20:11:51 +04:00
|
|
|
free_mappings(&self_vmas);
|
2013-11-03 17:23:31 +04:00
|
|
|
err_nv:
|
2011-10-26 17:35:50 +04:00
|
|
|
/* Just to be sure */
|
2012-01-17 10:56:28 +04:00
|
|
|
exit(1);
|
2012-03-21 19:37:00 +04:00
|
|
|
return -1;
|
2011-10-24 22:23:06 +04:00
|
|
|
}
|