2011-09-23 12:00:45 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/vfs.h>
|
|
|
|
#include <sys/ptrace.h>
|
|
|
|
#include <sys/user.h>
|
|
|
|
#include <sys/wait.h>
|
2011-12-01 18:21:17 +04:00
|
|
|
#include <sys/file.h>
|
2012-02-14 20:20:10 +03:00
|
|
|
#include <sys/shm.h>
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
#include <sched.h>
|
|
|
|
|
|
|
|
#include <sys/sendfile.h>
|
|
|
|
|
|
|
|
#include "compiler.h"
|
|
|
|
#include "types.h"
|
|
|
|
|
|
|
|
#include "image.h"
|
|
|
|
#include "util.h"
|
2011-12-19 18:52:50 +04:00
|
|
|
#include "log.h"
|
2011-10-26 17:35:50 +04:00
|
|
|
#include "syscall.h"
|
2011-10-24 22:23:06 +04:00
|
|
|
#include "restorer.h"
|
2011-12-26 22:12:03 +04:00
|
|
|
#include "sockets.h"
|
2011-12-26 20:33:09 +04:00
|
|
|
#include "lock.h"
|
2012-01-10 18:03:00 +04:00
|
|
|
#include "files.h"
|
2012-01-13 20:52:35 +04:00
|
|
|
#include "proc_parse.h"
|
2012-01-14 21:22:06 +03:00
|
|
|
#include "restorer-blob.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
#include "crtools.h"
|
2012-01-26 15:27:00 +04:00
|
|
|
#include "namespaces.h"
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-10-13 17:31:52 +04:00
|
|
|
/*
|
2011-10-13 18:15:09 +04:00
|
|
|
* real_pid member formerly served cases when
|
|
|
|
* no fork-with-pid functionality were in kernel,
|
|
|
|
* so now it is being kept here just in case if
|
|
|
|
* we need it again.
|
2011-10-13 17:31:52 +04:00
|
|
|
*/
|
|
|
|
|
2011-09-30 09:00:45 +04:00
|
|
|
#define PIPE_NONE (0 << 0)
|
|
|
|
#define PIPE_RDONLY (1 << 1)
|
|
|
|
#define PIPE_WRONLY (1 << 2)
|
|
|
|
#define PIPE_RDWR (PIPE_RDONLY | PIPE_WRONLY)
|
|
|
|
#define PIPE_MODE_MASK (0x7)
|
|
|
|
#define PIPE_CREATED (1 << 3)
|
|
|
|
|
|
|
|
#define pipe_is_rw(p) (((p)->status & PIPE_MODE_MASK) == PIPE_RDWR)
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
struct pipe_info {
|
2011-09-29 00:50:09 +04:00
|
|
|
unsigned int pipeid;
|
2011-09-23 12:00:45 +04:00
|
|
|
int pid;
|
|
|
|
int read_fd;
|
|
|
|
int write_fd;
|
2011-09-30 09:00:45 +04:00
|
|
|
int status;
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_t real_pid;
|
|
|
|
futex_t users;
|
2011-09-23 12:00:45 +04:00
|
|
|
};
|
|
|
|
|
2011-09-28 14:45:30 +04:00
|
|
|
struct shmem_id {
|
|
|
|
struct shmem_id *next;
|
|
|
|
unsigned long addr;
|
|
|
|
unsigned long end;
|
2011-09-28 15:55:12 +04:00
|
|
|
unsigned long shmid;
|
2011-09-28 14:45:30 +04:00
|
|
|
};
|
|
|
|
|
2011-12-09 15:39:08 +04:00
|
|
|
struct pipe_list_entry {
|
|
|
|
struct list_head list;
|
|
|
|
struct pipe_entry e;
|
|
|
|
off_t offset;
|
|
|
|
};
|
|
|
|
|
2012-01-16 23:52:15 +03:00
|
|
|
static struct task_entries *task_entries;
|
|
|
|
|
2011-09-28 14:45:30 +04:00
|
|
|
static struct shmem_id *shmem_ids;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-26 21:15:30 +04:00
|
|
|
static struct shmems *shmems;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
static struct pipe_info *pipes;
|
|
|
|
static int nr_pipes;
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
static struct pstree_item *me;
|
|
|
|
static LIST_HEAD(tasks);
|
2011-11-13 12:57:16 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *);
|
2012-03-27 16:34:00 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, struct list_head *vmas, int nr_vmas);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
static void show_saved_shmems(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pr_info("\tSaved shmems:\n");
|
|
|
|
|
2011-12-26 21:15:30 +04:00
|
|
|
for (i = 0; i < shmems->nr_shmems; i++)
|
2011-09-28 15:55:12 +04:00
|
|
|
pr_info("\t\tstart: %016lx shmid: %lx pid: %d\n",
|
2011-12-26 21:15:30 +04:00
|
|
|
shmems->entries[i].start,
|
|
|
|
shmems->entries[i].shmid,
|
|
|
|
shmems->entries[i].pid);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void show_saved_pipes(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pr_info("\tSaved pipes:\n");
|
|
|
|
for (i = 0; i < nr_pipes; i++)
|
2011-09-30 09:00:45 +04:00
|
|
|
pr_info("\t\tpipeid %x pid %d users %d status %d\n",
|
|
|
|
pipes[i].pipeid, pipes[i].pid,
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_get(&pipes[i].users), pipes[i].status);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-09-28 14:45:30 +04:00
|
|
|
static struct pipe_info *find_pipe(unsigned int pipeid)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
struct pipe_info *pi;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_pipes; i++) {
|
|
|
|
pi = pipes + i;
|
2011-09-29 00:50:09 +04:00
|
|
|
if (pi->pipeid == pipeid)
|
2011-09-23 12:00:45 +04:00
|
|
|
return pi;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-01-20 00:05:22 +04:00
|
|
|
static int shmem_wait_and_open(int pid, struct shmem_info *si)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2011-12-26 23:50:45 +04:00
|
|
|
unsigned long time = 1;
|
2011-12-09 15:14:16 +04:00
|
|
|
char path[128];
|
2011-12-26 23:50:45 +04:00
|
|
|
int ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
sprintf(path, "/proc/%d/map_files/%lx-%lx",
|
2012-01-20 00:05:22 +04:00
|
|
|
si->pid, si->start, si->end);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-20 00:05:22 +04:00
|
|
|
pr_info("%d: Waiting for [%s] to appear\n", pid, path);
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_until(&si->lock, 1);
|
2011-12-26 23:50:45 +04:00
|
|
|
|
2012-01-20 00:05:22 +04:00
|
|
|
pr_info("%d: Opening shmem [%s] \n", pid, path);
|
2011-12-26 23:50:45 +04:00
|
|
|
ret = open(path, O_RDWR);
|
2012-03-21 19:39:00 +04:00
|
|
|
if (ret < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror(" %d: Can't stat shmem at %s",
|
2012-03-21 19:39:00 +04:00
|
|
|
si->pid, path);
|
2012-01-03 13:07:45 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
static int collect_shmem(int pid, struct vma_entry *vi)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
int i;
|
2011-12-26 21:15:30 +04:00
|
|
|
struct shmem_info *entries = shmems->entries;
|
|
|
|
int nr_shmems = shmems->nr_shmems;
|
2012-03-21 10:12:00 +04:00
|
|
|
unsigned long size = vi->pgoff + vi->end - vi->start;
|
|
|
|
struct shmem_info *si;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
si = find_shmem(shmems, vi->shmid);
|
|
|
|
if (si) {
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
if (si->size < size)
|
|
|
|
si->size = size;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-09-28 15:55:12 +04:00
|
|
|
/*
|
2011-11-14 17:23:23 +04:00
|
|
|
* Only the shared mapping with a lowest
|
2011-09-28 15:55:12 +04:00
|
|
|
* pid will be created in real, other processes
|
|
|
|
* will wait until the kernel propagate this mapping
|
|
|
|
* into /proc
|
|
|
|
*/
|
2012-03-21 10:12:00 +04:00
|
|
|
if (si->pid <= pid)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
si->pid = pid;
|
|
|
|
si->start = vi->start;
|
|
|
|
si->end = vi->end;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-12-26 21:15:30 +04:00
|
|
|
if ((nr_shmems + 1) * sizeof(struct shmem_info) +
|
|
|
|
sizeof (struct shmems) >= SHMEMS_SIZE) {
|
2012-03-01 18:52:42 +04:00
|
|
|
pr_err("OOM storing shmems\n");
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
pr_info("Add new shmem %lx (0x016%lx-0x016%lx)",
|
|
|
|
vi->shmid, vi->start, vi->end);
|
2011-09-30 01:13:16 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
si = &shmems->entries[nr_shmems];
|
|
|
|
shmems->nr_shmems++;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
si->start = vi->start;
|
|
|
|
si->end = vi->end;
|
|
|
|
si->shmid = vi->shmid;
|
|
|
|
si->pid = pid;
|
|
|
|
si->size = size;
|
|
|
|
si->fd = -1;
|
2012-01-02 20:29:39 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_init(&si->lock);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
static int prepare_shmem_pid(int pid)
|
|
|
|
{
|
|
|
|
int fd, ret = -1;
|
|
|
|
struct vma_entry vi;
|
|
|
|
struct task_core_entry tc;
|
|
|
|
struct image_header hdr;
|
|
|
|
|
2012-03-21 14:22:00 +04:00
|
|
|
fd = open_image_ro(CR_FD_VMAS, pid);
|
|
|
|
if (fd < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
2012-03-21 10:12:00 +04:00
|
|
|
|
|
|
|
while (1) {
|
2012-03-21 14:22:00 +04:00
|
|
|
ret = read_img_eof(fd, &vi);
|
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
2012-03-21 10:12:00 +04:00
|
|
|
|
|
|
|
pr_info("%d: vma %lx %lx\n", pid, vi.start, vi.end);
|
|
|
|
|
|
|
|
if (!vma_entry_is(&vi, VMA_ANON_SHARED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (vma_entry_is(&vi, VMA_AREA_SYSVIPC))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = collect_shmem(pid, &vi);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
close(fd);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-09-28 01:09:34 +04:00
|
|
|
static int collect_pipe(int pid, struct pipe_entry *e, int p_fd)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2011-09-28 01:09:34 +04:00
|
|
|
/*
|
|
|
|
* All pipes get collected into the one array,
|
|
|
|
* note the highest PID is the sign of which
|
|
|
|
* process pipe should be really created, all other
|
|
|
|
* processes (if they have pipes with pipeid matched)
|
|
|
|
* will be attached.
|
|
|
|
*/
|
2011-09-23 12:00:45 +04:00
|
|
|
for (i = 0; i < nr_pipes; i++) {
|
2011-09-29 00:50:09 +04:00
|
|
|
if (pipes[i].pipeid != e->pipeid)
|
2011-09-23 12:00:45 +04:00
|
|
|
continue;
|
|
|
|
|
2011-09-30 09:00:45 +04:00
|
|
|
if (pipes[i].pid > pid && !pipe_is_rw(&pipes[i])) {
|
2011-09-23 12:00:45 +04:00
|
|
|
pipes[i].pid = pid;
|
2011-12-08 16:27:00 +04:00
|
|
|
pipes[i].status = 0;
|
|
|
|
pipes[i].read_fd = -1;
|
|
|
|
pipes[i].write_fd = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pipes[i].pid == pid) {
|
2011-11-30 10:51:29 +03:00
|
|
|
switch (e->flags & O_ACCMODE) {
|
2011-09-30 09:00:45 +04:00
|
|
|
case O_RDONLY:
|
|
|
|
pipes[i].status |= PIPE_RDONLY;
|
2011-12-08 16:27:00 +04:00
|
|
|
pipes[i].read_fd = e->fd;
|
2011-09-30 09:00:45 +04:00
|
|
|
break;
|
|
|
|
case O_WRONLY:
|
|
|
|
pipes[i].status |= PIPE_WRONLY;
|
2011-12-08 16:27:00 +04:00
|
|
|
pipes[i].write_fd = e->fd;
|
2011-09-30 09:00:45 +04:00
|
|
|
break;
|
|
|
|
}
|
2011-12-08 16:27:00 +04:00
|
|
|
} else
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_inc(&pipes[i].users);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((nr_pipes + 1) * sizeof(struct pipe_info) >= 4096) {
|
2012-03-01 18:52:42 +04:00
|
|
|
pr_err("OOM storing pipes\n");
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
memset(&pipes[nr_pipes], 0, sizeof(pipes[nr_pipes]));
|
|
|
|
|
2011-09-29 00:50:09 +04:00
|
|
|
pipes[nr_pipes].pipeid = e->pipeid;
|
2011-09-28 01:09:34 +04:00
|
|
|
pipes[nr_pipes].pid = pid;
|
2011-12-16 15:32:00 +04:00
|
|
|
pipes[nr_pipes].read_fd = -1;
|
|
|
|
pipes[nr_pipes].write_fd = -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_init(&pipes[nr_pipes].users);
|
|
|
|
|
2011-11-30 10:51:29 +03:00
|
|
|
switch (e->flags & O_ACCMODE) {
|
2011-09-30 09:00:45 +04:00
|
|
|
case O_RDONLY:
|
|
|
|
pipes[nr_pipes].status = PIPE_RDONLY;
|
2011-12-08 16:27:00 +04:00
|
|
|
pipes[i].read_fd = e->fd;
|
2011-09-30 09:00:45 +04:00
|
|
|
break;
|
|
|
|
case O_WRONLY:
|
|
|
|
pipes[nr_pipes].status = PIPE_WRONLY;
|
2011-12-08 16:27:00 +04:00
|
|
|
pipes[i].write_fd = e->fd;
|
2011-09-30 09:00:45 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
nr_pipes++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int prepare_pipes_pid(int pid)
|
|
|
|
{
|
2012-01-26 20:30:31 +04:00
|
|
|
int p_fd, ret = 0;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-29 19:56:34 +04:00
|
|
|
p_fd = open_image_ro(CR_FD_PIPES, pid);
|
2012-01-22 20:26:51 +04:00
|
|
|
if (p_fd < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct pipe_entry e;
|
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
ret = read_img_eof(p_fd, &e);
|
|
|
|
if (ret <= 0)
|
2011-09-23 12:00:45 +04:00
|
|
|
break;
|
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
ret = collect_pipe(pid, &e, p_fd);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
if (e.bytes)
|
|
|
|
lseek(p_fd, e.bytes, SEEK_CUR);
|
|
|
|
}
|
|
|
|
|
|
|
|
close(p_fd);
|
2012-01-26 20:30:31 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-17 15:28:13 +03:00
|
|
|
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
|
2011-12-26 21:27:03 +04:00
|
|
|
{
|
2012-03-17 11:47:00 +04:00
|
|
|
void *ret;
|
2011-12-26 21:27:03 +04:00
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
ret = mremap(old_addr, size, size,
|
|
|
|
MREMAP_FIXED | MREMAP_MAYMOVE, new_addr);
|
|
|
|
if (new_addr != ret) {
|
|
|
|
pr_perror("mremap failed");
|
2011-12-26 21:27:03 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-03-17 11:47:00 +04:00
|
|
|
return 0;
|
2011-12-26 21:27:03 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
static int prepare_pstree(void)
|
2012-03-22 21:06:41 +04:00
|
|
|
{
|
2012-04-05 15:34:31 +04:00
|
|
|
int ret = 0, ps_fd;
|
|
|
|
|
|
|
|
pr_info("Reading image tree\n");
|
|
|
|
|
|
|
|
task_entries = mmap(NULL, TASK_ENTRIES_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
|
|
|
|
if (task_entries == MAP_FAILED) {
|
|
|
|
pr_perror("Can't map shmem");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
task_entries->nr = 0;
|
|
|
|
futex_set(&task_entries->start, CR_STATE_RESTORE);
|
|
|
|
|
|
|
|
ps_fd = open_image_ro(CR_FD_PSTREE);
|
|
|
|
if (ps_fd < 0)
|
|
|
|
return ps_fd;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct pstree_entry e;
|
|
|
|
struct pstree_item *pi;
|
|
|
|
|
|
|
|
ret = read_img_eof(ps_fd, &e);
|
|
|
|
if (ret <= 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = -1;
|
|
|
|
pi = xmalloc(sizeof(*pi));
|
|
|
|
if (pi == NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
pi->pid = e.pid;
|
|
|
|
|
|
|
|
ret = -1;
|
|
|
|
pi->nr_children = e.nr_children;
|
|
|
|
pi->children = xmalloc(e.nr_children * sizeof(u32));
|
|
|
|
if (!pi->children)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = read_img_buf(ps_fd, pi->children,
|
|
|
|
e.nr_children * sizeof(u32));
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = -1;
|
|
|
|
pi->nr_threads = e.nr_threads;
|
|
|
|
pi->threads = xmalloc(e.nr_threads * sizeof(u32));
|
|
|
|
if (!pi->threads)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ret = read_img_buf(ps_fd, pi->threads,
|
|
|
|
e.nr_threads * sizeof(u32));
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
list_add_tail(&pi->list, &tasks);
|
|
|
|
task_entries->nr += e.nr_threads;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
futex_set(&task_entries->nr_in_progress, task_entries->nr);
|
|
|
|
|
|
|
|
close(ps_fd);
|
|
|
|
return ret;
|
2012-03-22 21:06:41 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
static int prepare_shared(void)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 20:30:31 +04:00
|
|
|
int ret = 0;
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *pi;
|
2012-01-26 20:30:31 +04:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
pr_info("Preparing info about shared resources\n");
|
|
|
|
|
2011-12-26 21:15:30 +04:00
|
|
|
shmems = mmap(NULL, SHMEMS_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
|
2011-09-23 12:00:45 +04:00
|
|
|
if (shmems == MAP_FAILED) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't map shmem");
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-12-26 21:15:30 +04:00
|
|
|
shmems->nr_shmems = 0;
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
pipes = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
|
|
|
|
if (pipes == MAP_FAILED) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't map pipes");
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-02-22 18:51:27 +04:00
|
|
|
if (prepare_shared_fdinfo())
|
2012-01-11 15:45:00 +04:00
|
|
|
return -1;
|
|
|
|
|
2012-04-03 00:50:50 +04:00
|
|
|
if (collect_reg_files())
|
|
|
|
return -1;
|
|
|
|
|
2012-04-03 00:54:52 +04:00
|
|
|
if (collect_inet_sockets())
|
|
|
|
return -1;
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
list_for_each_entry(pi, &tasks, list) {
|
|
|
|
ret = collect_unix_sockets(pi->pid);
|
2012-04-03 00:58:41 +04:00
|
|
|
if (ret < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
ret = prepare_shmem_pid(pi->pid);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
ret = prepare_pipes_pid(pi->pid);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
ret = prepare_fd_pid(pi->pid);
|
2012-01-26 20:30:31 +04:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
if (!ret) {
|
|
|
|
show_saved_shmems();
|
|
|
|
show_saved_pipes();
|
2012-04-05 12:48:57 +04:00
|
|
|
show_saved_files();
|
2012-01-26 20:30:31 +04:00
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
static int restore_shmem_content(void *addr, struct shmem_info *si)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-03-21 10:12:00 +04:00
|
|
|
u64 offset;
|
|
|
|
int fd, ret = 0;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
fd = open_image_ro(CR_FD_SHMEM_PAGES, si->shmid);
|
|
|
|
if (fd < 0) {
|
|
|
|
munmap(addr, si->size);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2012-03-21 10:12:00 +04:00
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
while (1) {
|
2012-04-05 12:57:29 +04:00
|
|
|
ret = read_img_buf_eof(fd, &offset, sizeof(offset));
|
|
|
|
if (ret <= 0)
|
2012-01-26 20:30:31 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
if (offset + PAGE_SIZE > si->size)
|
|
|
|
break;
|
2012-01-04 18:31:48 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
ret = read_img_buf(fd, addr + offset, PAGE_SIZE);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2012-01-04 18:31:48 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
close(fd);
|
|
|
|
return ret;
|
2012-01-04 18:31:48 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 19:38:00 +04:00
|
|
|
static int get_shmem_fd(int pid, struct vma_entry *vi)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
struct shmem_info *si;
|
2012-02-14 20:19:56 +03:00
|
|
|
struct shmem_id *shmid;
|
2012-02-14 20:20:10 +03:00
|
|
|
int sh_fd;
|
2012-03-21 19:38:00 +04:00
|
|
|
void *addr;
|
|
|
|
int f;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
si = find_shmem(shmems, vi->shmid);
|
|
|
|
pr_info("%d: Search for %016lx shmem %lx %p/%d\n", pid, vi->start, vi->shmid, si, si ? si->pid : -1);
|
|
|
|
if (!si) {
|
|
|
|
pr_err("Can't find my shmem %016lx\n", vi->start);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-03-21 19:38:00 +04:00
|
|
|
if (si->pid != pid)
|
|
|
|
return shmem_wait_and_open(pid, si);
|
2012-03-21 10:12:00 +04:00
|
|
|
|
2012-03-21 19:41:00 +04:00
|
|
|
if (si->fd != -1)
|
2012-03-21 19:38:00 +04:00
|
|
|
return dup(si->fd);
|
2012-03-21 10:12:00 +04:00
|
|
|
|
2012-03-21 19:38:00 +04:00
|
|
|
/* The following hack solves problems:
|
|
|
|
* vi->pgoff may be not zero in a target process.
|
|
|
|
* This mapping may be mapped more then once.
|
|
|
|
* The restorer doesn't have snprintf.
|
|
|
|
* Here is a good place to restore content
|
|
|
|
*/
|
|
|
|
addr = mmap(NULL, si->size,
|
|
|
|
PROT_WRITE | PROT_READ,
|
|
|
|
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
pr_err("Can't mmap shmid=0x%lx size=%ld\n",
|
|
|
|
vi->shmid, si->size);
|
|
|
|
return -1;
|
|
|
|
}
|
2012-03-21 10:12:00 +04:00
|
|
|
|
2012-03-21 19:38:00 +04:00
|
|
|
if (restore_shmem_content(addr, si) < 0) {
|
|
|
|
pr_err("Can't restore shmem content\n");
|
|
|
|
return -1;
|
2012-03-21 10:12:00 +04:00
|
|
|
}
|
|
|
|
|
2012-03-21 19:38:00 +04:00
|
|
|
f = open_proc_rw(getpid(), "map_files/%lx-%lx",
|
|
|
|
(unsigned long) addr,
|
|
|
|
(unsigned long) addr + si->size);
|
|
|
|
munmap(addr, si->size);
|
|
|
|
if (f < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
si->fd = f;
|
|
|
|
return f;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
static int read_and_open_vmas(int pid, struct list_head *vmas, int *nr_vmas)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-03-27 16:34:00 +04:00
|
|
|
int fd, ret = -1;
|
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_VMAS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
|
|
|
*nr_vmas = 0;
|
2011-09-23 12:00:45 +04:00
|
|
|
while (1) {
|
2012-03-27 16:34:00 +04:00
|
|
|
struct vma_area *vma;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
ret = -1;
|
|
|
|
vma = alloc_vma_area();
|
|
|
|
if (!vma)
|
|
|
|
break;
|
|
|
|
|
|
|
|
(*nr_vmas)++;
|
|
|
|
list_add_tail(&vma->list, vmas);
|
|
|
|
ret = read_img_eof(fd, &vma->vma);
|
2012-03-21 19:36:00 +04:00
|
|
|
if (ret <= 0)
|
2012-03-27 16:34:00 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
|
2011-09-23 12:00:45 +04:00
|
|
|
continue;
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
pr_info("%d: Opening %016lx-%016lx %016lx vma\n",
|
|
|
|
pid, vma->vma.start, vma->vma.end, vma->vma.pgoff);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
if (vma_entry_is(&vma->vma, VMA_AREA_SYSVIPC))
|
|
|
|
ret = vma->vma.shmid;
|
|
|
|
else if (vma_entry_is(&vma->vma, VMA_ANON_SHARED))
|
|
|
|
ret = get_shmem_fd(pid, &vma->vma);
|
|
|
|
else if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE) ||
|
|
|
|
vma_entry_is(&vma->vma, VMA_FILE_SHARED))
|
|
|
|
ret = get_filemap_fd(pid, &vma->vma);
|
2012-03-21 19:38:00 +04:00
|
|
|
else
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Can't fixup fd\n");
|
2012-03-27 16:34:00 +04:00
|
|
|
break;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-21 19:38:00 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
vma->vma.fd = ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2012-03-27 16:34:00 +04:00
|
|
|
|
|
|
|
close(fd);
|
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-11-14 17:23:23 +04:00
|
|
|
static int prepare_and_sigreturn(int pid)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-03-27 16:34:00 +04:00
|
|
|
int err, nr_vmas;
|
|
|
|
LIST_HEAD(vma_list);
|
2012-03-21 19:37:00 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
err = read_and_open_vmas(pid, &vma_list, &nr_vmas);
|
2012-02-29 13:39:21 +03:00
|
|
|
if (err)
|
|
|
|
return err;
|
2012-03-21 19:37:00 +04:00
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
return sigreturn_restore(pid, &vma_list, nr_vmas);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-11-30 10:51:29 +03:00
|
|
|
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
|
|
|
|
|
|
|
|
static int set_fd_flags(int fd, int flags)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
|
|
|
|
old = fcntl(fd, F_GETFL, 0);
|
|
|
|
if (old < 0)
|
|
|
|
return old;
|
|
|
|
|
|
|
|
flags = (SETFL_MASK & flags) | (old & ~SETFL_MASK);
|
|
|
|
|
|
|
|
return fcntl(fd, F_SETFL, flags);
|
|
|
|
}
|
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
static int reopen_pipe(int src, int *dst, int *other, int *pipes_fd)
|
2011-12-09 20:22:00 +04:00
|
|
|
{
|
|
|
|
int tmp;
|
|
|
|
|
|
|
|
if (*dst != -1) {
|
2011-12-26 21:48:00 +04:00
|
|
|
if (move_img_fd(other, *dst))
|
|
|
|
return -1;
|
2011-12-09 20:22:00 +04:00
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
if (move_img_fd(pipes_fd, *dst))
|
|
|
|
return -1;
|
|
|
|
|
2012-01-16 11:57:45 +04:00
|
|
|
return reopen_fd_as(*dst, src);
|
2011-12-26 21:47:00 +04:00
|
|
|
}
|
2011-12-09 20:22:00 +04:00
|
|
|
|
2011-12-26 21:47:00 +04:00
|
|
|
*dst = src;
|
2011-12-09 20:22:00 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-12-09 20:22:00 +04:00
|
|
|
static int restore_pipe_data(struct pipe_entry *e, int wfd, int pipes_fd)
|
|
|
|
{
|
|
|
|
int ret, size = 0;
|
|
|
|
|
2012-01-15 20:04:13 +04:00
|
|
|
pr_info("\t%x: Splicing data to %d\n", e->pipeid, wfd);
|
2011-12-09 20:22:00 +04:00
|
|
|
|
|
|
|
while (size != e->bytes) {
|
2012-02-01 16:02:02 +03:00
|
|
|
ret = splice(pipes_fd, NULL, wfd, NULL, e->bytes - size, 0);
|
2011-12-09 20:22:00 +04:00
|
|
|
if (ret < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("\t%x: Error splicing data", e->pipeid);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-09 20:22:00 +04:00
|
|
|
}
|
|
|
|
if (ret == 0) {
|
2012-01-15 20:04:13 +04:00
|
|
|
pr_err("\t%x: Wanted to restore %d bytes, but got %d\n",
|
2011-12-09 20:22:00 +04:00
|
|
|
e->pipeid, e->bytes, size);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-09 20:22:00 +04:00
|
|
|
}
|
|
|
|
|
2012-01-31 12:58:34 +04:00
|
|
|
size += ret;
|
2011-12-09 20:22:00 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
static int create_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int *pipes_fd)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
unsigned long time = 1000;
|
2011-09-27 20:23:26 +04:00
|
|
|
int pfd[2], tmp;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
pr_info("\t%d: Creating pipe %x%s\n", pid, e->pipeid, pipe_is_rw(pi) ? "(rw)" : "");
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
if (pipe(pfd) < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't create pipe", pid);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
if (restore_pipe_data(e, pfd[1], *pipes_fd))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
if (reopen_pipe(pfd[0], &pi->read_fd, &pfd[1], pipes_fd))
|
2011-12-09 20:22:00 +04:00
|
|
|
return -1;
|
2012-01-16 09:40:00 +04:00
|
|
|
if (reopen_pipe(pfd[1], &pi->write_fd, &pi->read_fd, pipes_fd))
|
2011-12-09 20:22:00 +04:00
|
|
|
return -1;
|
2011-12-08 16:27:00 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&pi->real_pid, pid);
|
2011-09-30 09:00:45 +04:00
|
|
|
|
|
|
|
pi->status |= PIPE_CREATED;
|
|
|
|
|
2011-09-28 01:09:34 +04:00
|
|
|
pr_info("\t%d: Done, waiting for others (users %d) on %d pid with r:%d w:%d\n",
|
2012-03-26 23:11:00 +04:00
|
|
|
pid, futex_get(&pi->users), futex_get(&pi->real_pid), pi->read_fd, pi->write_fd);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
if (!pipe_is_rw(pi)) {
|
|
|
|
pr_info("\t%d: Waiting for %x pipe to attach (%d users left)\n",
|
2012-03-26 23:11:00 +04:00
|
|
|
pid, e->pipeid, futex_get(&pi->users));
|
2012-01-16 09:40:00 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_until(&pi->users, 0);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
if ((e->flags & O_ACCMODE) == O_WRONLY)
|
2011-09-30 09:00:45 +04:00
|
|
|
close_safe(&pi->read_fd);
|
2011-12-08 16:27:00 +04:00
|
|
|
else
|
2011-09-30 09:00:45 +04:00
|
|
|
close_safe(&pi->write_fd);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-12-09 20:22:00 +04:00
|
|
|
tmp = 0;
|
|
|
|
if (pi->write_fd != e->fd && pi->read_fd != e->fd) {
|
2012-01-16 09:40:00 +04:00
|
|
|
if (move_img_fd(pipes_fd, e->fd))
|
|
|
|
return -1;
|
|
|
|
|
2011-12-09 20:22:00 +04:00
|
|
|
switch (e->flags & O_ACCMODE) {
|
|
|
|
case O_WRONLY:
|
|
|
|
tmp = dup2(pi->write_fd, e->fd);
|
|
|
|
break;
|
|
|
|
case O_RDONLY:
|
|
|
|
tmp = dup2(pi->read_fd, e->fd);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (tmp < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-09 20:22:00 +04:00
|
|
|
|
2011-11-30 10:51:29 +03:00
|
|
|
tmp = set_fd_flags(e->fd, e->flags);
|
2011-09-27 18:48:57 +04:00
|
|
|
if (tmp < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
pr_info("\t%d: All is ok - reopening pipe for %d\n", pid, e->fd);
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
static int attach_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int *pipes_fd)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
char path[128];
|
|
|
|
int tmp, fd;
|
|
|
|
|
2012-02-01 02:08:04 +04:00
|
|
|
pr_info("\t%d: Waiting for pipe %x to appear\n",
|
2011-09-28 01:09:34 +04:00
|
|
|
pid, e->pipeid);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while(&pi->real_pid, 0);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-16 09:40:00 +04:00
|
|
|
if (move_img_fd(pipes_fd, e->fd))
|
|
|
|
return -1;
|
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
if ((e->flags & O_ACCMODE) == O_WRONLY)
|
2011-09-23 12:00:45 +04:00
|
|
|
tmp = pi->write_fd;
|
|
|
|
else
|
|
|
|
tmp = pi->read_fd;
|
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
if (pid == pi->pid) {
|
|
|
|
if (tmp != e->fd)
|
|
|
|
tmp = dup2(tmp, e->fd);
|
|
|
|
|
|
|
|
if (tmp < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't duplicate %d->%d",
|
2011-12-08 16:27:00 +04:00
|
|
|
pid, tmp, e->fd);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-08 16:27:00 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
goto out;
|
2011-09-27 20:23:26 +04:00
|
|
|
}
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
sprintf(path, "/proc/%d/fd/%d", futex_get(&pi->real_pid), tmp);
|
2011-09-28 01:09:34 +04:00
|
|
|
pr_info("\t%d: Attaching pipe %s (%d users left)\n",
|
2012-03-26 23:11:00 +04:00
|
|
|
pid, path, futex_get(&pi->users) - 1);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
fd = open(path, e->flags);
|
|
|
|
if (fd < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't attach pipe", pid);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("\t%d: Done, reopening for %d\n", pid, e->fd);
|
2012-01-16 11:57:45 +04:00
|
|
|
if (reopen_fd_as(e->fd, fd))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-11-22 13:12:30 +04:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&pi->users);
|
2011-12-08 16:27:00 +04:00
|
|
|
out:
|
2011-11-30 10:51:29 +03:00
|
|
|
tmp = set_fd_flags(e->fd, e->flags);
|
|
|
|
if (tmp < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-11-30 10:51:29 +03:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static int open_pipe(int pid, struct pipe_entry *e, int *pipes_fd)
|
|
|
|
{
|
|
|
|
struct pipe_info *pi;
|
|
|
|
|
|
|
|
pr_info("\t%d: Opening pipe %x on fd %d\n", pid, e->pipeid, e->fd);
|
|
|
|
|
2011-09-28 14:45:30 +04:00
|
|
|
pi = find_pipe(e->pipeid);
|
2011-09-28 01:09:34 +04:00
|
|
|
if (!pi) {
|
2011-09-30 14:37:12 +04:00
|
|
|
pr_err("BUG: can't find my pipe %x\n", e->pipeid);
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2011-09-28 01:09:34 +04:00
|
|
|
/*
|
2011-09-30 09:00:45 +04:00
|
|
|
* This is somewhat tricky -- in case if a process uses
|
|
|
|
* both pipe ends the pipe should be created but only one
|
|
|
|
* pipe end get connected immediately in create_pipe the
|
|
|
|
* other pipe end should be connected via pipe attaching.
|
2011-09-28 01:09:34 +04:00
|
|
|
*/
|
2011-09-30 09:00:45 +04:00
|
|
|
if (pi->pid == pid && !(pi->status & PIPE_CREATED))
|
2012-01-16 09:40:00 +04:00
|
|
|
return create_pipe(pid, e, pi, pipes_fd);
|
2011-09-23 12:00:45 +04:00
|
|
|
else
|
2012-01-16 09:40:00 +04:00
|
|
|
return attach_pipe(pid, e, pi, pipes_fd);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static rt_sigaction_t sigchld_act;
|
2011-11-29 15:12:25 +03:00
|
|
|
static int prepare_sigactions(int pid)
|
|
|
|
{
|
2011-12-03 17:24:05 +04:00
|
|
|
rt_sigaction_t act, oact;
|
2012-02-01 15:24:39 +04:00
|
|
|
int fd_sigact;
|
2011-12-02 23:17:30 +04:00
|
|
|
struct sa_entry e;
|
2011-12-03 17:24:05 +04:00
|
|
|
int sig, i;
|
2012-02-01 15:24:39 +04:00
|
|
|
int ret = -1;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2011-12-29 19:56:34 +04:00
|
|
|
fd_sigact = open_image_ro(CR_FD_SIGACT, pid);
|
|
|
|
if (fd_sigact < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-12-01 17:15:00 +04:00
|
|
|
|
2011-11-29 15:12:25 +03:00
|
|
|
for (sig = 1; sig < SIGMAX; sig++) {
|
|
|
|
if (sig == SIGKILL || sig == SIGSTOP)
|
|
|
|
continue;
|
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
ret = read_img(fd_sigact, &e);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2011-12-03 17:24:05 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_handler, e.sigaction);
|
|
|
|
ASSIGN_TYPED(act.rt_sa_flags, e.flags);
|
|
|
|
ASSIGN_TYPED(act.rt_sa_restorer, e.restorer);
|
2011-12-08 19:04:07 +04:00
|
|
|
ASSIGN_TYPED(act.rt_sa_mask.sig[0], e.mask);
|
2011-12-02 23:17:30 +04:00
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
if (sig == SIGCHLD) {
|
|
|
|
sigchld_act = act;
|
|
|
|
continue;
|
|
|
|
}
|
2011-12-02 23:17:30 +04:00
|
|
|
/*
|
|
|
|
* A pure syscall is used, because glibc
|
|
|
|
* sigaction overwrites se_restorer.
|
|
|
|
*/
|
2011-11-29 15:12:25 +03:00
|
|
|
ret = sys_sigaction(sig, &act, &oact);
|
|
|
|
if (ret == -1) {
|
|
|
|
pr_err("%d: Can't restore sigaction: %m\n", pid);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd_sigact);
|
2011-11-29 15:12:25 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
static int prepare_pipes(int pid)
|
|
|
|
{
|
2012-02-29 13:39:21 +03:00
|
|
|
int ret = -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
int pipes_fd;
|
2011-12-08 16:27:00 +04:00
|
|
|
|
|
|
|
struct pipe_list_entry *le, *buf;
|
|
|
|
int buf_size = PAGE_SIZE;
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
LIST_HEAD(head);
|
2011-09-23 12:00:45 +04:00
|
|
|
|
|
|
|
pr_info("%d: Opening pipes\n", pid);
|
|
|
|
|
2011-12-29 19:56:34 +04:00
|
|
|
pipes_fd = open_image_ro(CR_FD_PIPES, pid);
|
|
|
|
if (pipes_fd < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-16 13:50:15 +04:00
|
|
|
buf = xmalloc(buf_size);
|
2012-02-29 13:39:21 +03:00
|
|
|
if (!buf)
|
|
|
|
goto out;
|
2011-12-08 16:27:00 +04:00
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
while (1) {
|
2011-12-08 16:27:00 +04:00
|
|
|
struct list_head *cur;
|
|
|
|
struct pipe_list_entry *cur_entry;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
le = &buf[nr];
|
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
ret = read_img_eof(pipes_fd, &le->e);
|
|
|
|
if (ret <= 0)
|
2011-09-30 09:00:45 +04:00
|
|
|
break;
|
|
|
|
|
2011-12-08 16:27:00 +04:00
|
|
|
list_for_each(cur, &head) {
|
|
|
|
cur_entry = list_entry(cur, struct pipe_list_entry, list);
|
|
|
|
if (cur_entry->e.pipeid > le->e.pipeid)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add_tail(&le->list, cur);
|
|
|
|
|
|
|
|
le->offset = lseek(pipes_fd, 0, SEEK_CUR);
|
|
|
|
lseek(pipes_fd, le->e.bytes, SEEK_CUR);
|
|
|
|
|
|
|
|
nr++;
|
|
|
|
if (nr > buf_size / sizeof(*le)) {
|
2012-01-26 20:30:31 +04:00
|
|
|
ret = -1;
|
2011-12-08 16:27:00 +04:00
|
|
|
pr_err("OOM storing pipes");
|
2012-01-26 20:30:31 +04:00
|
|
|
break;
|
2011-12-08 16:27:00 +04:00
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
2011-09-30 09:00:45 +04:00
|
|
|
|
2012-01-26 20:30:31 +04:00
|
|
|
if (!ret)
|
|
|
|
list_for_each_entry(le, &head, list) {
|
|
|
|
lseek(pipes_fd, le->offset, SEEK_SET);
|
|
|
|
if (open_pipe(pid, &le->e, &pipes_fd)) {
|
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-12-08 16:27:00 +04:00
|
|
|
|
|
|
|
free(buf);
|
2012-02-29 13:39:21 +03:00
|
|
|
out:
|
|
|
|
close_safe(&pipes_fd);
|
2012-01-26 20:30:31 +04:00
|
|
|
return ret;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
static int restore_one_alive_task(int pid)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
|
|
|
pr_info("%d: Restoring resources\n", pid);
|
|
|
|
|
|
|
|
if (prepare_pipes(pid))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-12-26 22:12:03 +04:00
|
|
|
if (prepare_sockets(pid))
|
|
|
|
return -1;
|
|
|
|
|
2011-09-23 12:00:45 +04:00
|
|
|
if (prepare_fds(pid))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2011-11-29 15:12:25 +03:00
|
|
|
if (prepare_sigactions(pid))
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-11-29 15:12:25 +03:00
|
|
|
|
2011-11-14 17:23:23 +04:00
|
|
|
return prepare_and_sigreturn(pid);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-22 20:28:30 +04:00
|
|
|
static void zombie_prepare_signals(void)
|
|
|
|
{
|
|
|
|
sigset_t blockmask;
|
|
|
|
int sig;
|
|
|
|
struct sigaction act;
|
|
|
|
|
|
|
|
sigfillset(&blockmask);
|
|
|
|
sigprocmask(SIG_UNBLOCK, &blockmask, NULL);
|
|
|
|
|
|
|
|
memset(&act, 0, sizeof(act));
|
|
|
|
act.sa_handler = SIG_DFL;
|
|
|
|
|
|
|
|
for (sig = 1; sig < SIGMAX; sig++)
|
|
|
|
sigaction(sig, &act, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define SIG_FATAL_MASK ( \
|
|
|
|
(1 << SIGHUP) |\
|
|
|
|
(1 << SIGINT) |\
|
|
|
|
(1 << SIGQUIT) |\
|
|
|
|
(1 << SIGILL) |\
|
|
|
|
(1 << SIGTRAP) |\
|
|
|
|
(1 << SIGABRT) |\
|
|
|
|
(1 << SIGIOT) |\
|
|
|
|
(1 << SIGBUS) |\
|
|
|
|
(1 << SIGFPE) |\
|
|
|
|
(1 << SIGKILL) |\
|
|
|
|
(1 << SIGUSR1) |\
|
|
|
|
(1 << SIGSEGV) |\
|
|
|
|
(1 << SIGUSR2) |\
|
|
|
|
(1 << SIGPIPE) |\
|
|
|
|
(1 << SIGALRM) |\
|
|
|
|
(1 << SIGTERM) |\
|
|
|
|
(1 << SIGXCPU) |\
|
|
|
|
(1 << SIGXFSZ) |\
|
|
|
|
(1 << SIGVTALRM)|\
|
|
|
|
(1 << SIGPROF) |\
|
|
|
|
(1 << SIGPOLL) |\
|
|
|
|
(1 << SIGIO) |\
|
|
|
|
(1 << SIGSYS) |\
|
|
|
|
(1 << SIGUNUSED)|\
|
|
|
|
(1 << SIGSTKFLT)|\
|
|
|
|
(1 << SIGPWR) \
|
|
|
|
)
|
|
|
|
|
|
|
|
static inline int sig_fatal(int sig)
|
|
|
|
{
|
|
|
|
return (sig > 0) && (sig < SIGMAX) && (SIG_FATAL_MASK & (1 << sig));
|
|
|
|
}
|
|
|
|
|
2012-02-14 14:09:01 +03:00
|
|
|
static int restore_one_zombie(int pid, int exit_code)
|
2012-01-22 20:28:30 +04:00
|
|
|
{
|
|
|
|
pr_info("Restoring zombie with %d code\n", exit_code);
|
|
|
|
|
|
|
|
if (task_entries != NULL) {
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
zombie_prepare_signals();
|
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_dec_and_wake(&task_entries->nr_in_progress);
|
|
|
|
futex_wait_while(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (exit_code & 0x7f) {
|
|
|
|
int signr;
|
|
|
|
|
|
|
|
signr = exit_code & 0x7F;
|
|
|
|
if (!sig_fatal(signr)) {
|
2012-03-01 18:52:42 +04:00
|
|
|
pr_warn("Exit with non fatal signal ignored\n");
|
2012-01-22 20:28:30 +04:00
|
|
|
signr = SIGABRT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kill(pid, signr) < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't kill myself, will just exit");
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
exit_code = 0;
|
|
|
|
}
|
|
|
|
|
2012-01-30 17:04:24 +04:00
|
|
|
exit((exit_code >> 8) & 0x7f);
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
/* never reached */
|
|
|
|
BUG_ON(1);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_core_header(int pid, struct task_core_entry *tc)
|
|
|
|
{
|
2012-02-29 13:39:21 +03:00
|
|
|
int fd = -1, ret = -1;
|
2012-01-22 20:28:30 +04:00
|
|
|
struct image_header hdr;
|
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_CORE, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
|
2012-02-29 13:39:21 +03:00
|
|
|
if (read_img(fd, &hdr) < 0)
|
|
|
|
goto out;
|
2012-01-22 20:28:30 +04:00
|
|
|
|
|
|
|
if (hdr.version != HEADER_VERSION) {
|
|
|
|
pr_err("Core version mismatch %d\n", (int)hdr.version);
|
2012-02-29 13:39:21 +03:00
|
|
|
goto out;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr.arch != HEADER_ARCH_X86_64) {
|
|
|
|
pr_err("Core arch mismatch %d\n", (int)hdr.arch);
|
2012-02-29 13:39:21 +03:00
|
|
|
goto out;
|
2012-01-22 20:28:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = read_img(fd, tc);
|
2012-02-29 13:39:21 +03:00
|
|
|
out:
|
|
|
|
close_safe(&fd);
|
2012-01-22 20:28:30 +04:00
|
|
|
return ret < 0 ? ret : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int restore_one_task(int pid)
|
|
|
|
{
|
|
|
|
struct task_core_entry tc;
|
|
|
|
|
|
|
|
if (check_core_header(pid, &tc))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
switch ((int)tc.task_state) {
|
|
|
|
case TASK_ALIVE:
|
|
|
|
return restore_one_alive_task(pid);
|
|
|
|
case TASK_DEAD:
|
2012-02-14 14:09:01 +03:00
|
|
|
return restore_one_zombie(pid, tc.exit_code);
|
2012-01-22 20:28:30 +04:00
|
|
|
default:
|
|
|
|
pr_err("Unknown state in code %d\n", (int)tc.task_state);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-22 17:54:26 +04:00
|
|
|
/*
|
|
|
|
* This stack size is important for the restorer
|
|
|
|
* itself only. At the final phase, we will switch
|
|
|
|
* to the original stack the program had at checkpoint
|
|
|
|
* time.
|
|
|
|
*/
|
2012-01-26 15:26:00 +04:00
|
|
|
#define STACK_SIZE (8 * 4096)
|
|
|
|
struct cr_clone_arg {
|
|
|
|
int pid, fd;
|
2012-01-26 15:27:00 +04:00
|
|
|
unsigned long clone_flags;
|
2012-01-26 15:26:00 +04:00
|
|
|
};
|
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
static inline int fork_with_pid(int pid, unsigned long ns_clone_flags)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 15:26:00 +04:00
|
|
|
int ret = -1;
|
2011-12-01 18:21:17 +04:00
|
|
|
char buf[32];
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg ca;
|
|
|
|
void *stack;
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
pr_info("Forking task with %d pid (flags %lx)\n", pid, ns_clone_flags);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
stack = mmap(NULL, STACK_SIZE, PROT_WRITE | PROT_READ,
|
|
|
|
MAP_PRIVATE | MAP_GROWSDOWN | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (stack == MAP_FAILED) {
|
2012-02-01 02:08:04 +04:00
|
|
|
pr_perror("Failed to map stack for the child");
|
2012-01-26 15:26:00 +04:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
snprintf(buf, sizeof(buf), "%d", pid - 1);
|
|
|
|
ca.pid = pid;
|
2012-01-26 15:27:00 +04:00
|
|
|
ca.clone_flags = ns_clone_flags;
|
2012-01-26 15:26:00 +04:00
|
|
|
ca.fd = open(LAST_PID_PATH, O_RDWR);
|
|
|
|
if (ca.fd < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't open %s", pid, LAST_PID_PATH);
|
2011-12-01 18:21:17 +04:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
if (flock(ca.fd, LOCK_EX)) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't lock %s", pid, LAST_PID_PATH);
|
2012-03-05 20:44:00 +04:00
|
|
|
goto err_close;
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
if (write_img_buf(ca.fd, buf, strlen(buf)))
|
2011-12-01 18:21:17 +04:00
|
|
|
goto err_unlock;
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
ret = clone(restore_task_with_children, stack + STACK_SIZE,
|
2012-01-26 15:27:00 +04:00
|
|
|
ns_clone_flags | SIGCHLD, &ca);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
if (ret < 0)
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't fork for %d", pid);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
|
|
|
err_unlock:
|
2012-01-26 15:26:00 +04:00
|
|
|
if (flock(ca.fd, LOCK_UN))
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
|
2011-12-01 18:21:17 +04:00
|
|
|
|
2012-03-05 20:44:00 +04:00
|
|
|
err_close:
|
|
|
|
close_safe(&ca.fd);
|
2011-12-01 18:21:17 +04:00
|
|
|
err:
|
2012-01-26 15:26:00 +04:00
|
|
|
if (stack != MAP_FAILED)
|
|
|
|
munmap(stack, STACK_SIZE);
|
2011-09-23 12:00:45 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
|
|
|
{
|
|
|
|
int status, pid;
|
|
|
|
|
|
|
|
if (siginfo->si_code & CLD_EXITED)
|
|
|
|
pr_err("%d exited, status=%d\n",
|
|
|
|
siginfo->si_pid, siginfo->si_status);
|
|
|
|
else if (siginfo->si_code & CLD_KILLED)
|
|
|
|
pr_err("%d killed by signal %d\n",
|
|
|
|
siginfo->si_pid, siginfo->si_status);
|
|
|
|
|
2012-04-03 00:52:00 +04:00
|
|
|
futex_abort_and_wake(&task_entries->nr_in_progress);
|
2012-01-19 01:33:19 +03:00
|
|
|
}
|
|
|
|
|
2012-01-26 15:26:00 +04:00
|
|
|
static int restore_task_with_children(void *_arg)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-26 15:26:00 +04:00
|
|
|
struct cr_clone_arg *ca = _arg;
|
2012-04-05 15:34:31 +04:00
|
|
|
pid_t pid;
|
|
|
|
int ret, i;
|
2011-12-02 16:06:00 +04:00
|
|
|
sigset_t blockmask;
|
2012-01-26 15:26:00 +04:00
|
|
|
|
|
|
|
close_safe(&ca->fd);
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
pid = getpid();
|
|
|
|
if (ca->pid != pid) {
|
2012-02-10 20:18:08 +04:00
|
|
|
pr_err("%d: Pid do not match expected %d\n", me->pid, ca->pid);
|
2012-01-26 15:26:00 +04:00
|
|
|
exit(-1);
|
|
|
|
}
|
2011-12-02 16:06:00 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
list_for_each_entry(me, &tasks, list)
|
|
|
|
if (me->pid == pid)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (me == list_entry(&tasks, struct pstree_item, list)) {
|
|
|
|
pr_err("Pid %d not found in pstree image\n", pid);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
if (ca->clone_flags) {
|
2012-02-10 20:18:08 +04:00
|
|
|
ret = prepare_namespace(me->pid, ca->clone_flags);
|
2012-01-26 15:27:00 +04:00
|
|
|
if (ret)
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
|
2012-01-20 00:05:22 +04:00
|
|
|
/*
|
|
|
|
* The block mask will be restored in sigresturn.
|
|
|
|
*
|
|
|
|
* TODO: This code should be removed, when a freezer will be added.
|
|
|
|
*/
|
2011-12-02 16:06:00 +04:00
|
|
|
sigfillset(&blockmask);
|
2012-01-19 01:33:19 +03:00
|
|
|
sigdelset(&blockmask, SIGCHLD);
|
2011-12-02 16:06:00 +04:00
|
|
|
ret = sigprocmask(SIG_BLOCK, &blockmask, NULL);
|
|
|
|
if (ret) {
|
2012-02-10 20:18:08 +04:00
|
|
|
pr_perror("%d: Can't block signals", me->pid);
|
2011-12-02 16:06:00 +04:00
|
|
|
exit(1);
|
|
|
|
}
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
pr_info("%d: Restoring %d children:\n", me->pid, me->nr_children);
|
2012-02-10 20:18:08 +04:00
|
|
|
for (i = 0; i < me->nr_children; i++) {
|
|
|
|
ret = fork_with_pid(me->children[i], 0);
|
|
|
|
if (ret < 0)
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return restore_one_task(me->pid);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
static int restore_root_task(pid_t pid, struct cr_options *opts)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-01-16 23:52:15 +03:00
|
|
|
int ret, i;
|
2012-01-27 11:07:11 +04:00
|
|
|
struct sigaction act, old_act;
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *init;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-19 01:33:19 +03:00
|
|
|
ret = sigaction(SIGCHLD, NULL, &act);
|
|
|
|
if (ret < 0) {
|
|
|
|
perror("sigaction() failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
act.sa_flags |= SA_NOCLDWAIT | SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
|
|
|
|
act.sa_sigaction = sigchld_handler;
|
2012-01-27 11:07:11 +04:00
|
|
|
ret = sigaction(SIGCHLD, &act, &old_act);
|
2012-01-19 01:33:19 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
perror("sigaction() failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
init = list_first_entry(&tasks, struct pstree_item, list);
|
|
|
|
if (init->pid != pid) {
|
|
|
|
pr_err("Pids mismatch. Init has pid %d, requested %d\n",
|
|
|
|
init->pid, pid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:27:00 +04:00
|
|
|
/*
|
|
|
|
* FIXME -- currently we assume that all the tasks live
|
|
|
|
* in the same set of namespaces. This is done to debug
|
|
|
|
* the ns contents dumping/restoring. Need to revisit
|
|
|
|
* this later.
|
|
|
|
*/
|
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
ret = fork_with_pid(init->pid, opts->namespaces_flags);
|
2011-09-23 12:00:45 +04:00
|
|
|
if (ret < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-01-27 19:01:51 +03:00
|
|
|
pr_info("Wait until all tasks are restored\n");
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_wait_while_gt(&task_entries->nr_in_progress, 0);
|
|
|
|
ret = (int)futex_get(&task_entries->nr_in_progress);
|
|
|
|
|
2012-03-21 10:12:00 +04:00
|
|
|
out:
|
2012-01-19 01:33:19 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Someone can't be restored\n");
|
2012-04-05 15:34:31 +04:00
|
|
|
struct pstree_item *pi;
|
|
|
|
|
|
|
|
list_for_each_entry(pi, &tasks, list)
|
|
|
|
kill(pi->pid, SIGKILL);
|
2012-01-19 01:33:19 +03:00
|
|
|
return 1;
|
|
|
|
}
|
2012-01-19 01:33:16 +03:00
|
|
|
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->nr_in_progress, task_entries->nr);
|
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
|
|
|
|
futex_wait_until(&task_entries->nr_in_progress, 0);
|
2012-01-27 11:07:11 +04:00
|
|
|
|
|
|
|
ret = sigaction(SIGCHLD, &old_act, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
perror("sigaction() failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-01-16 23:52:15 +03:00
|
|
|
pr_info("Go on!!!\n");
|
2012-03-26 23:11:00 +04:00
|
|
|
futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2012-01-26 15:25:00 +04:00
|
|
|
if (!opts->restore_detach)
|
2012-01-18 23:24:37 +04:00
|
|
|
wait(NULL);
|
2011-09-23 12:00:45 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-26 15:25:00 +04:00
|
|
|
static int restore_all_tasks(pid_t pid, struct cr_options *opts)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2012-04-05 15:34:31 +04:00
|
|
|
if (prepare_pstree() < 0)
|
2011-12-13 15:03:33 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
if (prepare_shared() < 0)
|
2012-03-06 18:46:00 +04:00
|
|
|
return -1;
|
2011-09-23 12:00:45 +04:00
|
|
|
|
2012-04-05 15:34:31 +04:00
|
|
|
return restore_root_task(pid, opts);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
|
|
|
|
struct list_head *self_vma_list, long vma_len)
|
2011-11-06 01:49:57 +04:00
|
|
|
{
|
2012-04-05 14:08:11 +04:00
|
|
|
struct vma_area *t_vma;
|
|
|
|
long prev_vma_end = 0;
|
2011-11-06 01:49:57 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we need some heuristics -- the VMA which restorer will
|
|
|
|
* belong to should not be unmapped, so we need to gueess out
|
|
|
|
* where to put it in.
|
|
|
|
*
|
|
|
|
* Yes, I know it's an O(n^2) algorithm, but usually there are
|
|
|
|
* not that many VMAs presented so instead of consuming memory
|
|
|
|
* better to stick with it.
|
|
|
|
*/
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
list_for_each_entry(t_vma, tgt_vma_list, list) {
|
|
|
|
if (prev_vma_end && ((t_vma->vma.start - prev_vma_end) > vma_len)) {
|
|
|
|
struct vma_area *s_vma;
|
2012-03-02 19:28:13 +04:00
|
|
|
unsigned long prev_vma_end2 = 0;
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
list_for_each_entry(s_vma, self_vma_list, list) {
|
|
|
|
if (prev_vma_end2 && (prev_vma_end2 >= prev_vma_end) &&
|
|
|
|
((s_vma->vma.start - prev_vma_end2) > vma_len))
|
|
|
|
return prev_vma_end2;
|
2012-03-02 19:28:13 +04:00
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
prev_vma_end2 = s_vma->vma.end;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-03-02 19:28:13 +04:00
|
|
|
}
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
prev_vma_end = t_vma->vma.end;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
2012-04-05 14:08:11 +04:00
|
|
|
|
|
|
|
return -1;
|
2011-11-06 01:49:57 +04:00
|
|
|
}
|
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
#define USEC_PER_SEC 1000000L
|
|
|
|
|
|
|
|
static inline int timeval_valid(struct timeval *tv)
|
|
|
|
{
|
|
|
|
return (tv->tv_sec >= 0) && ((unsigned long)tv->tv_usec < USEC_PER_SEC);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int itimer_restore_and_fix(char *n, struct itimer_entry *ie,
|
|
|
|
struct itimerval *val)
|
|
|
|
{
|
|
|
|
if (ie->isec == 0 && ie->iusec == 0) {
|
|
|
|
memzero_p(val);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
val->it_interval.tv_sec = ie->isec;
|
|
|
|
val->it_interval.tv_usec = ie->iusec;
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_interval)) {
|
|
|
|
pr_err("Invalid timer interval\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ie->vsec == 0 && ie->vusec == 0) {
|
|
|
|
/*
|
|
|
|
* Remaining time was too short. Set it to
|
|
|
|
* interval to make the timer armed and work.
|
|
|
|
*/
|
|
|
|
val->it_value.tv_sec = ie->isec;
|
|
|
|
val->it_value.tv_usec = ie->iusec;
|
|
|
|
} else {
|
|
|
|
val->it_value.tv_sec = ie->vsec;
|
|
|
|
val->it_value.tv_usec = ie->vusec;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!timeval_valid(&val->it_value)) {
|
|
|
|
pr_err("Invalid timer value\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("Restored %s timer to %ld.%ld -> %ld.%ld\n", n,
|
|
|
|
val->it_value.tv_sec, val->it_value.tv_usec,
|
|
|
|
val->it_interval.tv_sec, val->it_interval.tv_usec);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int prepare_itimers(int pid, struct task_restore_core_args *args)
|
|
|
|
{
|
|
|
|
int fd, ret = -1;
|
|
|
|
struct itimer_entry ie[3];
|
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_ITIMERS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
|
|
|
if (read_img_buf(fd, ie, sizeof(ie)) > 0) {
|
|
|
|
ret = itimer_restore_and_fix("real",
|
|
|
|
&ie[0], &args->itimers[0]);
|
|
|
|
if (!ret)
|
|
|
|
ret = itimer_restore_and_fix("virt",
|
|
|
|
&ie[1], &args->itimers[1]);
|
|
|
|
if (!ret)
|
|
|
|
ret = itimer_restore_and_fix("prof",
|
|
|
|
&ie[2], &args->itimers[2]);
|
|
|
|
}
|
|
|
|
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-24 16:45:19 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
static int prepare_creds(int pid, struct task_restore_core_args *args)
|
|
|
|
{
|
|
|
|
int fd, ret;
|
|
|
|
|
|
|
|
fd = open_image_ro(CR_FD_CREDS, pid);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
|
|
|
ret = read_img(fd, &args->creds);
|
|
|
|
|
2012-02-29 13:39:21 +03:00
|
|
|
close_safe(&fd);
|
2012-01-27 21:43:32 +04:00
|
|
|
|
|
|
|
/* XXX -- validate creds here? */
|
|
|
|
|
|
|
|
return ret > 0 ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
2012-03-27 16:31:00 +04:00
|
|
|
static struct vma_entry *vma_list_remap(void *addr, unsigned long len, struct list_head *vmas)
|
|
|
|
{
|
|
|
|
struct vma_entry *vma, *ret;
|
|
|
|
struct vma_area *vma_area;
|
|
|
|
|
|
|
|
ret = vma = mmap(addr, len, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
|
|
|
|
if (vma != addr) {
|
|
|
|
pr_perror("Can't remap vma area");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(vma_area, vmas, list) {
|
|
|
|
*vma = vma_area->vma;
|
|
|
|
vma++;
|
|
|
|
}
|
|
|
|
|
|
|
|
vma->start = 0;
|
|
|
|
free_mappings(vmas);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
|
2011-10-24 22:23:06 +04:00
|
|
|
{
|
2012-01-14 21:22:06 +03:00
|
|
|
long restore_code_len, restore_task_vma_len;
|
2012-03-27 16:34:00 +04:00
|
|
|
long restore_thread_vma_len, self_vmas_len, vmas_len;
|
2011-11-16 18:19:24 +04:00
|
|
|
|
2012-03-02 19:29:35 +04:00
|
|
|
void *mem = MAP_FAILED;
|
2011-11-12 19:26:40 +04:00
|
|
|
void *restore_thread_exec_start;
|
|
|
|
void *restore_task_exec_start;
|
2012-01-14 21:22:06 +03:00
|
|
|
void *restore_code_start;
|
2011-11-16 18:19:24 +04:00
|
|
|
|
|
|
|
long new_sp, exec_mem_hint;
|
2011-10-25 21:25:42 +04:00
|
|
|
long ret;
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
struct task_restore_core_args *task_args;
|
|
|
|
struct thread_restore_args *thread_args;
|
|
|
|
|
2011-10-26 22:50:46 +04:00
|
|
|
LIST_HEAD(self_vma_list);
|
2011-11-16 18:19:24 +04:00
|
|
|
int fd_core = -1;
|
2012-03-21 09:45:00 +04:00
|
|
|
int fd_pages = -1;
|
2012-03-02 19:30:23 +04:00
|
|
|
int i;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
int *fd_core_threads;
|
|
|
|
|
2012-01-01 13:12:37 +04:00
|
|
|
pr_info("%d: Restore via sigreturn\n", pid);
|
|
|
|
|
2012-01-14 21:22:06 +03:00
|
|
|
restore_code_len = 0;
|
2011-11-12 19:26:40 +04:00
|
|
|
restore_task_vma_len = 0;
|
2012-01-01 02:32:32 +04:00
|
|
|
restore_thread_vma_len = 0;
|
2011-10-26 22:50:46 +04:00
|
|
|
|
2012-02-17 01:39:36 +04:00
|
|
|
ret = parse_maps(pid, &self_vma_list, false);
|
|
|
|
close_pid_proc();
|
2012-03-02 19:28:46 +04:00
|
|
|
if (ret < 0)
|
2011-10-26 22:50:46 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-03-27 16:31:00 +04:00
|
|
|
self_vmas_len = round_up((ret + 1) * sizeof(struct vma_entry), PAGE_SIZE);
|
2012-03-27 16:34:00 +04:00
|
|
|
vmas_len = round_up((nr_vmas + 1) * sizeof(struct vma_entry), PAGE_SIZE);
|
2012-03-02 19:30:23 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
/* pr_info_vma_list(&self_vma_list); */
|
2011-10-27 18:59:21 +04:00
|
|
|
|
2011-11-12 19:26:40 +04:00
|
|
|
BUILD_BUG_ON(sizeof(struct task_restore_core_args) & 1);
|
|
|
|
BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1);
|
2012-01-17 15:28:13 +03:00
|
|
|
BUILD_BUG_ON(SHMEMS_SIZE % PAGE_SIZE);
|
2012-01-16 23:52:15 +03:00
|
|
|
BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-03-21 09:45:00 +04:00
|
|
|
fd_core = open_image_ro(CR_FD_CORE, pid);
|
2012-02-07 19:32:11 +04:00
|
|
|
if (fd_core < 0) {
|
2012-01-31 15:13:05 +04:00
|
|
|
pr_perror("Can't open core-out-%d", pid);
|
2012-02-07 19:32:11 +04:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-03-21 09:45:00 +04:00
|
|
|
fd_pages = open_image_ro(CR_FD_PAGES, pid);
|
|
|
|
if (fd_pages < 0) {
|
|
|
|
pr_perror("Can't open pages-%d", pid);
|
|
|
|
goto err;
|
|
|
|
}
|
2011-10-26 22:50:46 +04:00
|
|
|
|
2012-01-14 21:22:06 +03:00
|
|
|
restore_code_len = sizeof(restorer_blob);
|
|
|
|
restore_code_len = round_up(restore_code_len, 16);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-01-14 21:22:06 +03:00
|
|
|
restore_task_vma_len = round_up(restore_code_len + sizeof(*task_args), PAGE_SIZE);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Thread statistics
|
|
|
|
*/
|
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Compute how many memory we will need
|
|
|
|
* to restore all threads, every thread
|
|
|
|
* requires own stack and heap, it's ~40K
|
|
|
|
* per thread.
|
|
|
|
*/
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
restore_thread_vma_len = sizeof(*thread_args) * me->nr_threads;
|
|
|
|
restore_thread_vma_len = round_up(restore_thread_vma_len, 16);
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
pr_info("%d: %d threads require %ldK of memory\n",
|
|
|
|
pid, me->nr_threads,
|
2011-11-24 15:07:03 +04:00
|
|
|
KBYTES(restore_thread_vma_len));
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2011-12-26 21:27:03 +04:00
|
|
|
restore_thread_vma_len = round_up(restore_thread_vma_len, PAGE_SIZE);
|
|
|
|
|
2012-04-05 14:08:11 +04:00
|
|
|
exec_mem_hint = restorer_get_vma_hint(pid, tgt_vmas, &self_vma_list,
|
2011-11-16 18:19:24 +04:00
|
|
|
restore_task_vma_len +
|
2011-12-26 21:27:03 +04:00
|
|
|
restore_thread_vma_len +
|
2012-03-02 19:30:23 +04:00
|
|
|
self_vmas_len +
|
2012-01-16 23:52:15 +03:00
|
|
|
SHMEMS_SIZE + TASK_ENTRIES_SIZE);
|
2011-11-16 18:19:24 +04:00
|
|
|
if (exec_mem_hint == -1) {
|
2012-01-31 15:31:22 +04:00
|
|
|
pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
|
2011-11-12 19:26:40 +04:00
|
|
|
restore_task_vma_len + restore_thread_vma_len);
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-11-16 18:19:24 +04:00
|
|
|
}
|
2011-10-27 00:57:01 +04:00
|
|
|
|
2012-03-02 19:28:13 +04:00
|
|
|
pr_info("Found bootstrap VMA hint at: %lx (needs ~%ldK)\n", exec_mem_hint,
|
|
|
|
KBYTES(restore_task_vma_len + restore_thread_vma_len));
|
|
|
|
|
2011-11-12 19:26:40 +04:00
|
|
|
/* VMA we need to run task_restore code */
|
2012-03-02 19:29:35 +04:00
|
|
|
mem = mmap((void *)exec_mem_hint,
|
2011-11-16 18:19:24 +04:00
|
|
|
restore_task_vma_len + restore_thread_vma_len,
|
2011-10-26 11:16:00 +04:00
|
|
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
2012-03-02 19:28:13 +04:00
|
|
|
MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
|
2012-03-02 19:29:35 +04:00
|
|
|
if (mem != (void *)exec_mem_hint) {
|
2011-10-26 11:16:00 +04:00
|
|
|
pr_err("Can't mmap section for restore code\n");
|
2011-11-06 01:49:57 +04:00
|
|
|
goto err;
|
2011-10-24 22:23:06 +04:00
|
|
|
}
|
|
|
|
|
2011-10-26 11:16:00 +04:00
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Prepare a memory map for restorer. Note a thread space
|
|
|
|
* might be completely unused so it's here just for convenience.
|
2011-10-26 11:16:00 +04:00
|
|
|
*/
|
2012-03-02 19:29:35 +04:00
|
|
|
restore_code_start = mem;
|
2012-01-14 21:22:06 +03:00
|
|
|
restore_thread_exec_start = restore_code_start + restorer_blob_offset__restore_thread;
|
|
|
|
restore_task_exec_start = restore_code_start + restorer_blob_offset__restore_task;
|
|
|
|
task_args = restore_code_start + restore_code_len;
|
2012-03-29 14:49:00 +04:00
|
|
|
thread_args = (void *)((long)task_args + sizeof(*task_args));
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
memzero_p(task_args);
|
2012-03-29 14:49:00 +04:00
|
|
|
memzero(thread_args, sizeof(*thread_args) * me->nr_threads);
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Code at a new place.
|
2011-10-26 17:35:50 +04:00
|
|
|
*/
|
2012-01-14 21:22:06 +03:00
|
|
|
memcpy(restore_code_start, &restorer_blob, sizeof(restorer_blob));
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
/*
|
2011-11-16 18:19:24 +04:00
|
|
|
* Adjust stack.
|
2011-11-12 19:26:40 +04:00
|
|
|
*/
|
2011-11-16 18:19:24 +04:00
|
|
|
new_sp = RESTORE_ALIGN_STACK((long)task_args->mem_zone.stack, sizeof(task_args->mem_zone.stack));
|
2011-10-24 22:23:06 +04:00
|
|
|
|
2011-10-26 00:30:41 +04:00
|
|
|
/*
|
2012-01-01 13:10:12 +04:00
|
|
|
* Get a reference to shared memory area which is
|
|
|
|
* used to signal if shmem restoration complete
|
|
|
|
* from low-level restore code.
|
|
|
|
*
|
|
|
|
* This shmem area is mapped right after the whole area of
|
|
|
|
* sigreturn rt code. Note we didn't allocated it before
|
|
|
|
* but this area is taken into account for 'hint' memory
|
|
|
|
* address.
|
2011-10-26 00:30:41 +04:00
|
|
|
*/
|
2012-03-02 19:29:35 +04:00
|
|
|
|
|
|
|
mem += restore_task_vma_len + restore_thread_vma_len;
|
|
|
|
ret = shmem_remap(shmems, mem, SHMEMS_SIZE);
|
2012-01-03 13:05:50 +04:00
|
|
|
if (ret < 0)
|
2011-12-26 21:27:03 +04:00
|
|
|
goto err;
|
2012-03-02 19:29:35 +04:00
|
|
|
task_args->shmems = mem;
|
2011-12-26 21:27:03 +04:00
|
|
|
|
2012-03-02 19:29:35 +04:00
|
|
|
mem += SHMEMS_SIZE;
|
|
|
|
ret = shmem_remap(task_entries, mem, TASK_ENTRIES_SIZE);
|
2012-01-16 23:52:15 +03:00
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
2012-03-02 19:29:35 +04:00
|
|
|
task_args->task_entries = mem;
|
2012-01-16 23:52:15 +03:00
|
|
|
|
2012-03-02 19:30:23 +04:00
|
|
|
mem += TASK_ENTRIES_SIZE;
|
2012-03-27 16:31:00 +04:00
|
|
|
task_args->self_vmas = vma_list_remap(mem, self_vmas_len, &self_vma_list);
|
|
|
|
if (!task_args->self_vmas)
|
2012-03-02 19:30:23 +04:00
|
|
|
goto err;
|
|
|
|
|
2012-03-27 16:34:00 +04:00
|
|
|
mem += self_vmas_len;
|
|
|
|
task_args->tgt_vmas = vma_list_remap(mem, vmas_len, tgt_vmas);
|
|
|
|
if (!task_args->tgt_vmas)
|
|
|
|
goto err;
|
|
|
|
|
2012-01-01 13:10:12 +04:00
|
|
|
/*
|
|
|
|
* Arguments for task restoration.
|
|
|
|
*/
|
2011-11-16 18:19:24 +04:00
|
|
|
task_args->pid = pid;
|
|
|
|
task_args->fd_core = fd_core;
|
2012-03-01 18:52:42 +04:00
|
|
|
task_args->logfd = log_get_fd();
|
2012-01-19 01:33:19 +03:00
|
|
|
task_args->sigchld_act = sigchld_act;
|
2012-03-24 13:22:37 +04:00
|
|
|
task_args->fd_exe_link = self_exe_fd;
|
2012-03-21 09:45:00 +04:00
|
|
|
task_args->fd_pages = fd_pages;
|
2011-11-18 16:09:01 +04:00
|
|
|
|
2012-01-24 16:45:19 +04:00
|
|
|
ret = prepare_itimers(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-01-27 21:43:32 +04:00
|
|
|
ret = prepare_creds(pid, task_args);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
2012-03-26 19:38:00 +04:00
|
|
|
mutex_init(&task_args->rst_lock);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Now prepare run-time data for threads restore.
|
|
|
|
*/
|
|
|
|
task_args->nr_threads = me->nr_threads;
|
|
|
|
task_args->clone_restore_fn = (void *)restore_thread_exec_start;
|
|
|
|
task_args->thread_args = thread_args;
|
2011-11-03 11:58:45 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/*
|
|
|
|
* Fill up per-thread data.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < me->nr_threads; i++) {
|
|
|
|
thread_args[i].pid = me->threads[i];
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/* skip self */
|
|
|
|
if (thread_args[i].pid == pid)
|
|
|
|
continue;
|
2012-01-16 00:54:43 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
/* Core files are to be opened */
|
|
|
|
thread_args[i].fd_core = open_image_ro_nocheck(FMT_FNAME_CORE, thread_args[i].pid);
|
|
|
|
if (thread_args[i].fd_core < 0)
|
|
|
|
goto err;
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
thread_args[i].rst_lock = &task_args->rst_lock;
|
2011-11-17 00:59:08 +04:00
|
|
|
|
2012-02-10 20:18:08 +04:00
|
|
|
pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p\n",
|
2012-01-31 15:31:22 +04:00
|
|
|
i, thread_args[i].mem_zone.stack,
|
2011-11-16 18:19:24 +04:00
|
|
|
thread_args[i].mem_zone.heap,
|
|
|
|
thread_args[i].mem_zone.rt_sigframe);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2012-03-16 17:24:00 +04:00
|
|
|
close_image_dir();
|
|
|
|
|
2011-11-16 18:19:24 +04:00
|
|
|
pr_info("task_args: %p\n"
|
|
|
|
"task_args->pid: %d\n"
|
|
|
|
"task_args->fd_core: %d\n"
|
|
|
|
"task_args->nr_threads: %d\n"
|
|
|
|
"task_args->clone_restore_fn: %p\n"
|
|
|
|
"task_args->thread_args: %p\n",
|
|
|
|
task_args, task_args->pid,
|
2012-03-02 19:30:23 +04:00
|
|
|
task_args->fd_core,
|
|
|
|
task_args->nr_threads,
|
|
|
|
task_args->clone_restore_fn,
|
2011-11-16 18:19:24 +04:00
|
|
|
task_args->thread_args);
|
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/*
|
2011-11-12 19:26:40 +04:00
|
|
|
* An indirect call to task_restore, note it never resturns
|
2011-10-26 17:35:50 +04:00
|
|
|
* and restoreing core is extremely destructive.
|
|
|
|
*/
|
2011-10-26 11:16:00 +04:00
|
|
|
asm volatile(
|
2011-11-12 19:26:40 +04:00
|
|
|
"movq %0, %%rbx \n"
|
|
|
|
"movq %1, %%rax \n"
|
2012-01-14 21:22:06 +03:00
|
|
|
"movq %2, %%rdi \n"
|
2011-11-12 19:26:40 +04:00
|
|
|
"movq %%rbx, %%rsp \n"
|
|
|
|
"callq *%%rax \n"
|
2011-10-26 17:35:50 +04:00
|
|
|
:
|
2011-11-16 18:19:24 +04:00
|
|
|
: "g"(new_sp),
|
|
|
|
"g"(restore_task_exec_start),
|
|
|
|
"g"(task_args)
|
|
|
|
: "rsp", "rdi", "rsi", "rbx", "rax", "memory");
|
2011-10-26 11:16:00 +04:00
|
|
|
|
2011-10-26 22:50:46 +04:00
|
|
|
err:
|
2011-11-06 01:49:57 +04:00
|
|
|
free_mappings(&self_vma_list);
|
2011-11-16 18:19:24 +04:00
|
|
|
close_safe(&fd_core);
|
2011-11-12 19:26:40 +04:00
|
|
|
|
2011-10-26 17:35:50 +04:00
|
|
|
/* Just to be sure */
|
2012-01-17 10:56:28 +04:00
|
|
|
exit(1);
|
2012-03-21 19:37:00 +04:00
|
|
|
return -1;
|
2011-10-24 22:23:06 +04:00
|
|
|
}
|
|
|
|
|
2011-10-04 01:50:19 +04:00
|
|
|
int cr_restore_tasks(pid_t pid, struct cr_options *opts)
|
2011-09-23 12:00:45 +04:00
|
|
|
{
|
2011-10-04 01:50:19 +04:00
|
|
|
if (opts->leader_only)
|
2011-09-23 12:00:45 +04:00
|
|
|
return restore_one_task(pid);
|
2012-01-26 15:25:00 +04:00
|
|
|
return restore_all_tasks(pid, opts);
|
2011-09-23 12:00:45 +04:00
|
|
|
}
|