mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
Initial commit
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
This commit is contained in:
commit
523de23624
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
*.o
|
||||
*.d
|
||||
*.img
|
||||
*.bin
|
||||
*.elf
|
||||
*.out
|
||||
cscope*
|
||||
tags
|
||||
TAGS
|
171
Makefile
Normal file
171
Makefile
Normal file
@ -0,0 +1,171 @@
|
||||
ifeq ($(strip $(V)),)
|
||||
E = @echo
|
||||
Q = @
|
||||
else
|
||||
E = @\#
|
||||
Q =
|
||||
endif
|
||||
export E Q
|
||||
|
||||
FIND := find
|
||||
CSCOPE := cscope
|
||||
TAGS := ctags
|
||||
RM := rm
|
||||
LD := ld
|
||||
HEXDUMP := hexdump
|
||||
CC := gcc
|
||||
ECHO := echo
|
||||
NM := nm
|
||||
AWK := awk
|
||||
SH := sh
|
||||
|
||||
CFLAGS += -I./include
|
||||
CFLAGS += -O0 -ggdb3
|
||||
|
||||
LIBS += -lrt
|
||||
|
||||
# Additional ARCH settings for x86
|
||||
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
|
||||
-e s/arm.*/arm/ -e s/sa110/arm/ \
|
||||
-e s/s390x/s390/ -e s/parisc64/parisc/ \
|
||||
-e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
|
||||
-e s/sh[234].*/sh/ )
|
||||
|
||||
uname_M := $(shell uname -m | sed -e s/i.86/i386/)
|
||||
ifeq ($(uname_M),i386)
|
||||
ARCH := x86
|
||||
DEFINES += -DCONFIG_X86_32
|
||||
endif
|
||||
ifeq ($(uname_M),x86_64)
|
||||
ARCH := x86
|
||||
DEFINES += -DCONFIG_X86_64
|
||||
endif
|
||||
|
||||
DEFINES += -D_FILE_OFFSET_BITS=64
|
||||
DEFINES += -D_GNU_SOURCE
|
||||
|
||||
ifneq ($(WERROR),0)
|
||||
WARNINGS += -Werror
|
||||
endif
|
||||
|
||||
WARNINGS += -Wall -Wno-unused
|
||||
CFLAGS += $(WARNINGS) $(DEFINES)
|
||||
|
||||
PROGRAM := crtools
|
||||
TESTEE := testee
|
||||
TESTEE-TH := testee-threads
|
||||
TESTEE-STATIC := testee-static
|
||||
|
||||
all: $(PROGRAM) $(TESTEE) $(TESTEE-TH) $(TESTEE-STATIC)
|
||||
|
||||
OBJS += crtools.o
|
||||
OBJS += parasite-syscall.o
|
||||
OBJS += cr-dump.o
|
||||
OBJS += cr-restore.o
|
||||
OBJS += cr-show.o
|
||||
OBJS += util.o
|
||||
OBJS += rbtree.o
|
||||
OBJS += elf.o
|
||||
|
||||
OBJS-TESTEE += testee.o
|
||||
|
||||
OBJS-TESTEE-TH += testee-threads.o
|
||||
|
||||
OBJS-BLOB += parasite.o
|
||||
|
||||
DEPS := $(patsubst %.o,%.d,$(OBJS))
|
||||
DEPS-TESTEE := $(patsubst %.o,%.d,$(OBJS-TESTEE))
|
||||
DEPS-TESTEE-TH := $(patsubst %.o,%.d,$(OBJS-TESTEE-TH))
|
||||
DEPS-BLOB := $(patsubst %.o,%.d,$(OBJS-BLOB))
|
||||
|
||||
SRCS-BLOB += $(patsubst %.o,%.c,$(OBJS-BLOB))
|
||||
|
||||
HEAD-BLOB := $(patsubst %.o,%.h,$(OBJS-BLOB))
|
||||
HEAD-BLOB-GEN := $(patsubst %.o,%-blob.h,$(OBJS-BLOB))
|
||||
HEAD-BIN := $(patsubst %.o,%.bin,$(OBJS-BLOB))
|
||||
HEAD-LDS := $(patsubst %.o,%.lds.S,$(OBJS-BLOB))
|
||||
|
||||
HEAD-IDS := $(patsubst %.h,%_h__,$(subst -,_,$(HEAD-BLOB)))
|
||||
|
||||
$(OBJS-BLOB): $(SRCS-BLOB) $(DEPS-BLOB)
|
||||
$(E) " CC " $@
|
||||
$(Q) $(CC) -c $(CFLAGS) -fpic $< -o $@
|
||||
|
||||
$(HEAD-BIN): $(OBJS-BLOB) $(HEAD-LDS)
|
||||
%.bin: %.o
|
||||
$(E) " GEN " $@
|
||||
$(Q) $(LD) -T $(patsubst %.bin,%.lds.S,$@) $< -o $@
|
||||
$(Q) $(LD) -T $(patsubst %.bin,%-elf.lds.S,$@) $< -o $@.o
|
||||
|
||||
$(HEAD-BLOB): $(DEPS-BLOB) $(HEAD-BIN)
|
||||
%-blob.h: %.bin
|
||||
%.h: %.bin
|
||||
$(E) " GEN " $@
|
||||
$(Q) $(SH) gen-offsets.sh \
|
||||
$(subst -,_,$(patsubst %.h,%,$@))_h__ \
|
||||
$(subst -,_,$(patsubst %.h,%,$@))_blob_offset__ \
|
||||
$(subst -,_,$(patsubst %.h,%,$@))_blob \
|
||||
$(patsubst %.h,%.o,$@) \
|
||||
$(patsubst %.h,%.bin,$@) > $(patsubst %.h,%-blob.h,$@)
|
||||
|
||||
$(OBJS): $(HEAD-BLOB) $(DEPS)
|
||||
$(OBJS-TESTEE): $(DEPS-TESTEE)
|
||||
$(OBJS-TESTEE-TH): $(DEPS-TESTEE-TH)
|
||||
%.o: %.c
|
||||
$(E) " CC " $@
|
||||
$(Q) $(CC) -c $(CFLAGS) $< -o $@
|
||||
|
||||
$(PROGRAM): $(OBJS)
|
||||
$(E) " LINK " $@
|
||||
$(Q) $(CC) $(OBJS) $(LIBS) -o $@
|
||||
|
||||
$(TESTEE): $(OBJS-TESTEE)
|
||||
$(E) " LINK " $@
|
||||
$(Q) $(CC) $(OBJS-TESTEE) -o $@
|
||||
|
||||
$(TESTEE-TH): $(OBJS-TESTEE-TH)
|
||||
$(E) " LINK " $@
|
||||
$(Q) $(CC) $(OBJS-TESTEE-TH) -lpthread -o $@
|
||||
|
||||
$(TESTEE-STATIC).o: testee-static.c
|
||||
$(Q) gcc -c -static -I./.include -o testee-static.o testee-static.c
|
||||
|
||||
$(TESTEE-STATIC): $(TESTEE-STATIC).o
|
||||
$(Q) gcc -o testee-static -static testee-static.o
|
||||
|
||||
$(DEPS):
|
||||
$(DEPS-TESTEE):
|
||||
$(DEPS-TESTEE-TH):
|
||||
$(DEPS-BLOB):
|
||||
%.d: %.c
|
||||
$(Q) $(CC) -M -MT $(patsubst %.d,%.o,$@) $(CFLAGS) $< -o $@
|
||||
|
||||
clean:
|
||||
$(E) " CLEAN"
|
||||
$(Q) rm -f ./*.o
|
||||
$(Q) rm -f ./*.d
|
||||
$(Q) rm -f ./*.img
|
||||
$(Q) rm -f ./*.elf
|
||||
$(Q) rm -f ./*.out
|
||||
$(Q) rm -f ./*.bin
|
||||
$(Q) rm -f ./tags
|
||||
$(Q) rm -f ./cscope*
|
||||
$(Q) rm -f ./$(PROGRAM)
|
||||
$(Q) rm -f ./$(TESTEE)
|
||||
$(Q) rm -f ./$(TESTEE-STATIC)
|
||||
$(Q) rm -f ./$(TESTEE-TH)
|
||||
$(Q) rm -f ./$(HEAD-BLOB)
|
||||
$(Q) rm -f ./$(HEAD-BLOB-GEN)
|
||||
.PHONY: clean
|
||||
|
||||
tags:
|
||||
$(E) " GEN" $@
|
||||
$(Q) $(RM) -f tags
|
||||
$(Q) $(FIND) . -name '*.[hcS]' -print | xargs ctags -a
|
||||
.PHONY: tags
|
||||
|
||||
cscope:
|
||||
$(E) " GEN" $@
|
||||
$(Q) $(FIND) . -name '*.[hcS]' -print > cscope.files
|
||||
$(Q) $(CSCOPE) -bkqu
|
||||
.PHONY: cscope
|
15
README
Normal file
15
README
Normal file
@ -0,0 +1,15 @@
|
||||
crtools
|
||||
=======
|
||||
|
||||
An utility to to checkpoint/restore tasks.
|
||||
|
||||
Some code is borrowed from
|
||||
|
||||
- Linux kernel (http://kernel.org/)
|
||||
- git (http://git-scm.com/)
|
||||
- kvm-tools (https://github.com/penberg/linux-kvm.git)
|
||||
- ptrace-parasite (https://code.google.com/p/ptrace-parasite/)
|
||||
|
||||
Many thanks to these projects.
|
||||
|
||||
Licensed under GPLv2
|
977
cr-dump.c
Normal file
977
cr-dump.c
Normal file
@ -0,0 +1,977 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <sys/sendfile.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
|
||||
#include "compiler.h"
|
||||
#include "crtools.h"
|
||||
#include "syscall.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "image.h"
|
||||
|
||||
#include "parasite.h"
|
||||
#include "parasite-syscall.h"
|
||||
#include "parasite-blob.h"
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
# error No x86-32 support yet
|
||||
#endif
|
||||
|
||||
static LIST_HEAD(vma_area_list);
|
||||
static LIST_HEAD(pstree_list);
|
||||
|
||||
static char big_buffer[PATH_MAX];
|
||||
static struct parasite_ctl *parasite_ctl;
|
||||
|
||||
static char loc_buf[PAGE_SIZE];
|
||||
|
||||
static void free_pstree(void)
|
||||
{
|
||||
struct pstree_item *item, *p;
|
||||
|
||||
list_for_each_entry_safe(item, p, &pstree_list, list) {
|
||||
xfree(item->children);
|
||||
xfree(item);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&pstree_list);
|
||||
}
|
||||
|
||||
static void free_mappings(void)
|
||||
{
|
||||
struct vma_area *vma_area, *p;
|
||||
|
||||
list_for_each_entry_safe(vma_area, p, &vma_area_list, list) {
|
||||
if (vma_area->vm_file_fd > 0)
|
||||
close(vma_area->vm_file_fd);
|
||||
free(vma_area);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&vma_area_list);
|
||||
}
|
||||
|
||||
static int collect_mappings(pid_t pid)
|
||||
{
|
||||
struct vma_area *vma_area;
|
||||
int ret = -1;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Collecting mappings (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
ret = parse_maps(pid, &vma_area_list);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
pr_info_vma_list(&vma_area_list);
|
||||
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
err:
|
||||
return ret;
|
||||
|
||||
err_bogus_mapping:
|
||||
pr_error("Bogus mapping %lx-%lx\n",
|
||||
vma_area->vma.start,
|
||||
vma_area->vma.end);
|
||||
goto err;
|
||||
}
|
||||
|
||||
static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
|
||||
bool do_close, unsigned long pos, unsigned int flags,
|
||||
struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct fdinfo_entry e;
|
||||
char fd_str[128];
|
||||
int len;
|
||||
int ret = -1;
|
||||
|
||||
snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd);
|
||||
len = readlink(fd_str, big_buffer, sizeof(big_buffer) - 1);
|
||||
if (len < 0) {
|
||||
pr_perror("Can't readlink %s\n", fd_str);
|
||||
goto err;
|
||||
}
|
||||
|
||||
big_buffer[len] = '\0';
|
||||
pr_info("Dumping path for %lx fd via self %d [%s]\n",
|
||||
fd_name, lfd, big_buffer);
|
||||
|
||||
if (do_close)
|
||||
close(lfd);
|
||||
|
||||
e.type = type;
|
||||
e.len = len;
|
||||
e.flags = flags;
|
||||
e.pos = pos;
|
||||
e.addr = fd_name;
|
||||
|
||||
pr_info("fdinfo: type: %2x len: %2x flags: %4x pos: %8x addr: %16lx\n",
|
||||
type, len, flags, pos, fd_name);
|
||||
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_FDINFO].fd, &e, err);
|
||||
write_safe(cr_fdset->desc[CR_FD_FDINFO].fd, big_buffer, e.len, err);
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
|
||||
struct cr_fdset *cr_fdset)
|
||||
{
|
||||
int fd_pipes;
|
||||
int steal_pipe[2];
|
||||
int pipe_size;
|
||||
int has_bytes;
|
||||
int ret = -1;
|
||||
|
||||
fd_pipes = cr_fdset->desc[CR_FD_PIPES].fd;
|
||||
|
||||
pr_info("Dumping data from pipe %x\n", e->pipeid);
|
||||
if (pipe(steal_pipe) < 0) {
|
||||
pr_perror("Can't create pipe for stealing data\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
pipe_size = fcntl(lfd, F_GETPIPE_SZ);
|
||||
if (pipe_size < 0) {
|
||||
pr_error("Can't obtain piped data size\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
has_bytes = tee(lfd, steal_pipe[1], pipe_size, SPLICE_F_NONBLOCK);
|
||||
if (has_bytes < 0) {
|
||||
if (errno != EAGAIN) {
|
||||
pr_perror("Can't pick pipe data\n");
|
||||
goto err_close;
|
||||
} else
|
||||
has_bytes = 0;
|
||||
}
|
||||
|
||||
e->bytes = has_bytes;
|
||||
write_ptr_safe(fd_pipes, e, err_close);
|
||||
|
||||
if (has_bytes) {
|
||||
ret = splice(steal_pipe[0], NULL, fd_pipes,
|
||||
NULL, has_bytes, 0);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't push pipe data\n");
|
||||
goto err_close;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
err_close:
|
||||
close(steal_pipe[0]);
|
||||
close(steal_pipe[1]);
|
||||
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_one_pipe(int fd, int lfd, unsigned int id, unsigned int flags,
|
||||
struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct pipe_entry e;
|
||||
int ret = -1;
|
||||
|
||||
pr_info("Dumping pipe %d/%x flags %x\n", fd, id, flags);
|
||||
|
||||
e.fd = fd;
|
||||
e.pipeid = id;
|
||||
e.flags = flags;
|
||||
|
||||
if (flags & O_WRONLY) {
|
||||
e.bytes = 0;
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_PIPES].fd, &e, err);
|
||||
ret = 0;
|
||||
} else
|
||||
ret = dump_pipe_and_data(lfd, &e, cr_fdset);
|
||||
|
||||
err:
|
||||
if (!ret)
|
||||
pr_info("Dumped pipe: fd: %8lx pipeid: %8lx flags: %8lx bytes: %8lx\n",
|
||||
e.fd, e.pipeid, e.flags, e.bytes);
|
||||
else
|
||||
pr_error("Dumping pipe %d/%x flags %x\n", fd, id, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long pos,
|
||||
unsigned int flags, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct statfs stfs_buf;
|
||||
struct stat st_buf;
|
||||
int fd;
|
||||
|
||||
fd = openat(dir, fd_name, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
pr_perror("Failed to openat %s/%d %s\n", pid_fd_dir, dir, fd_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fstat(fd, &st_buf) < 0) {
|
||||
pr_perror("Can't get stat on %s\n", fd_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S_ISREG(st_buf.st_mode))
|
||||
return dump_one_reg_file(FDINFO_FD, atol(fd_name),
|
||||
fd, 1, pos, flags, cr_fdset);
|
||||
|
||||
if (S_ISFIFO(st_buf.st_mode)) {
|
||||
if (fstatfs(fd, &stfs_buf) < 0) {
|
||||
pr_perror("Can't fstatfs on %s\n", fd_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (stfs_buf.f_type == PIPEFS_MAGIC)
|
||||
return dump_one_pipe(atol(fd_name), fd,
|
||||
st_buf.st_ino, flags, cr_fdset);
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "0")) {
|
||||
pr_info("... Skipping stdin ...\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "1")) {
|
||||
pr_info("... Skipping stdout ...\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "2")) {
|
||||
pr_info("... Skipping stderr ...\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "3")) {
|
||||
pr_info("... Skipping tty ...\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_error("Can't dump file %s of that type [%x]\n", fd_name, st_buf.st_mode);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int *flags)
|
||||
{
|
||||
char fd_str[128];
|
||||
int ifd;
|
||||
|
||||
snprintf(fd_str, sizeof(fd_str), "/proc/%d/fdinfo/%s", pid, fd);
|
||||
|
||||
ifd = open(fd_str, O_RDONLY);
|
||||
if (ifd < 0) {
|
||||
pr_perror("Can't open %s\n", fd_str);
|
||||
return -1;
|
||||
}
|
||||
|
||||
read(ifd, big_buffer, sizeof(big_buffer));
|
||||
close(ifd);
|
||||
|
||||
sscanf(big_buffer, "pos:\t%li\nflags:\t%o\n", pos, flags);
|
||||
|
||||
pr_info("%s: pos: %16lx flags: %16lx\n", fd_str, *pos, *flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
char pid_fd_dir[64];
|
||||
struct dirent *de;
|
||||
unsigned long pos;
|
||||
unsigned int flags;
|
||||
DIR *fd_dir;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping opened files (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
snprintf(pid_fd_dir, sizeof(pid_fd_dir), "/proc/%d/fd", pid);
|
||||
fd_dir = opendir(pid_fd_dir);
|
||||
if (!fd_dir) {
|
||||
pr_perror("Can't open %s\n", pid_fd_dir);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ((de = readdir(fd_dir))) {
|
||||
if (de->d_name[0] == '.')
|
||||
continue;
|
||||
if (read_fd_params(pid, de->d_name, &pos, &flags))
|
||||
return -1;
|
||||
if (dump_one_fd(pid_fd_dir, dirfd(fd_dir), de->d_name, pos, flags, cr_fdset))
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
closedir(fd_dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_task_mappings(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct vma_area *vma_area;
|
||||
int ret = -1;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping mappings (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list, list) {
|
||||
|
||||
struct vma_entry *vma = &vma_area->vma;
|
||||
|
||||
if (!(vma->status & VMA_AREA_REGULAR))
|
||||
continue;
|
||||
|
||||
pr_info_vma(vma_area);
|
||||
|
||||
switch (vma->flags) {
|
||||
case MAP_SHARED:
|
||||
case MAP_PRIVATE:
|
||||
|
||||
if ((vma->status & VMA_ANON_SHARED)) {
|
||||
struct shmem_entry e;
|
||||
|
||||
e.start = vma->start;
|
||||
e.end = vma->end;
|
||||
e.shmid = vma_area->shmid;
|
||||
|
||||
pr_info("shmem: s: %16lx e: %16lx shmid: %16lx\n",
|
||||
e.start, e.end, e.shmid);
|
||||
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_SHMEM].fd, &e, err);
|
||||
} else if ((vma->status & VMA_FILE_PRIVATE) ||
|
||||
(vma->status & VMA_FILE_SHARED)) {
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
if (vma->prot & PROT_WRITE && (vma->status & VMA_FILE_SHARED))
|
||||
flags = O_RDWR;
|
||||
else
|
||||
flags = O_RDONLY;
|
||||
|
||||
ret = dump_one_reg_file(FDINFO_MAP,
|
||||
vma->start,
|
||||
vma_area->vm_file_fd,
|
||||
0, 0, flags,
|
||||
cr_fdset);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
pr_panic("Unknown VMA (pid: %d)\n", pid);
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define assign_reg(dst, src, e) dst.e = (__typeof__(dst.e))src.e
|
||||
#define assign_array(dst, src, e) memcpy(&dst.e, &src.e, sizeof(dst.e))
|
||||
|
||||
static int get_task_personality(pid_t pid, u32 *personality)
|
||||
{
|
||||
FILE *file = NULL;
|
||||
int ret = -1;
|
||||
|
||||
snprintf(loc_buf, sizeof(loc_buf), "/proc/%d/personality", pid);
|
||||
file = fopen(loc_buf, "r");
|
||||
if (!file) {
|
||||
perror("Can't open task personality");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!fgets(loc_buf, sizeof(loc_buf), file)) {
|
||||
perror("Can't read task personality");
|
||||
goto err;
|
||||
}
|
||||
|
||||
*personality = atoi(loc_buf);
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
if (file)
|
||||
fclose(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_task_tls(pid_t pid, struct desc_struct *tls_array, int size)
|
||||
{
|
||||
FILE *file = NULL;
|
||||
int ret = -1;
|
||||
|
||||
if (size != GDT_ENTRY_TLS_ENTRIES) {
|
||||
pr_error("Wrong TLS storage size: %d\n", size);
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(loc_buf, sizeof(loc_buf), "/proc/%d/tls", pid);
|
||||
file = fopen(loc_buf, "r");
|
||||
if (!file) {
|
||||
perror("Can't open task tls");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
while (fgets(loc_buf, sizeof(loc_buf), file)) {
|
||||
u32 a, b;
|
||||
if (sscanf(loc_buf, "%x %x", &a, &b) != 2) {
|
||||
pr_error("Can't parse tls entry: %s\n");
|
||||
ret = -1;
|
||||
goto err;
|
||||
}
|
||||
if (ret >= GDT_ENTRY_TLS_ENTRIES) {
|
||||
pr_error("Too many entries in tls\n");
|
||||
ret = -1;
|
||||
goto err;
|
||||
}
|
||||
tls_array[ret].a = a;
|
||||
tls_array[ret].b = b;
|
||||
|
||||
ret++;
|
||||
}
|
||||
|
||||
if (ret != GDT_ENTRY_TLS_ENTRIES) {
|
||||
pr_error("tls returened %i entries instead of %i\n",
|
||||
ret, GDT_ENTRY_TLS_ENTRIES);
|
||||
ret = -1;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
if (file)
|
||||
fclose(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_task_core_seized(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct core_entry *core = xzalloc(sizeof(*core));
|
||||
user_fpregs_struct_t fpregs = {-1};
|
||||
user_regs_struct_t regs = {-1};
|
||||
int fd_core = cr_fdset->desc[CR_FD_CORE].fd;
|
||||
int ret = -1;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping core (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
if (!core)
|
||||
goto err;
|
||||
|
||||
lseek(fd_core, MAGIC_OFFSET, SEEK_SET);
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, pid, NULL, ®s), err_free);
|
||||
jerr(ptrace(PTRACE_GETFPREGS, pid, NULL, &fpregs), err_free);
|
||||
|
||||
pr_info("Dumping GP/FPU registers ... ");
|
||||
|
||||
assign_reg(core->gpregs, regs, r15);
|
||||
assign_reg(core->gpregs, regs, r14);
|
||||
assign_reg(core->gpregs, regs, r13);
|
||||
assign_reg(core->gpregs, regs, r12);
|
||||
assign_reg(core->gpregs, regs, bp);
|
||||
assign_reg(core->gpregs, regs, bx);
|
||||
assign_reg(core->gpregs, regs, r11);
|
||||
assign_reg(core->gpregs, regs, r10);
|
||||
assign_reg(core->gpregs, regs, r9);
|
||||
assign_reg(core->gpregs, regs, r8);
|
||||
assign_reg(core->gpregs, regs, ax);
|
||||
assign_reg(core->gpregs, regs, cx);
|
||||
assign_reg(core->gpregs, regs, dx);
|
||||
assign_reg(core->gpregs, regs, si);
|
||||
assign_reg(core->gpregs, regs, di);
|
||||
assign_reg(core->gpregs, regs, orig_ax);
|
||||
assign_reg(core->gpregs, regs, ip);
|
||||
assign_reg(core->gpregs, regs, cs);
|
||||
assign_reg(core->gpregs, regs, flags);
|
||||
assign_reg(core->gpregs, regs, sp);
|
||||
assign_reg(core->gpregs, regs, ss);
|
||||
assign_reg(core->gpregs, regs, fs_base);
|
||||
assign_reg(core->gpregs, regs, gs_base);
|
||||
assign_reg(core->gpregs, regs, ds);
|
||||
assign_reg(core->gpregs, regs, es);
|
||||
assign_reg(core->gpregs, regs, fs);
|
||||
assign_reg(core->gpregs, regs, gs);
|
||||
|
||||
assign_reg(core->fpregs, fpregs, cwd);
|
||||
assign_reg(core->fpregs, fpregs, swd);
|
||||
assign_reg(core->fpregs, fpregs, twd);
|
||||
assign_reg(core->fpregs, fpregs, fop);
|
||||
assign_reg(core->fpregs, fpregs, rip);
|
||||
assign_reg(core->fpregs, fpregs, rdp);
|
||||
assign_reg(core->fpregs, fpregs, mxcsr);
|
||||
assign_reg(core->fpregs, fpregs, mxcsr_mask);
|
||||
|
||||
assign_array(core->fpregs, fpregs, st_space);
|
||||
assign_array(core->fpregs, fpregs, xmm_space);
|
||||
assign_array(core->fpregs, fpregs, padding);
|
||||
|
||||
pr_info("OK\n");
|
||||
|
||||
pr_info("Obtainting TLS ... ");
|
||||
ret = dump_task_tls(pid, core->tls_array, ARRAY_SIZE(core->tls_array));
|
||||
if (ret)
|
||||
goto err_free;
|
||||
pr_info("OK\n");
|
||||
|
||||
pr_info("Obtainting personality ... ");
|
||||
ret = get_task_personality(pid, &core->personality);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
pr_info("OK\n");
|
||||
|
||||
pr_info("Dumping header ... ");
|
||||
core->hdr.version = HEADER_VERSION;
|
||||
core->hdr.arch = HEADER_ARCH_X86_64;
|
||||
core->hdr.flags = 0;
|
||||
|
||||
write_ptr_safe(fd_core, core, err_free);
|
||||
|
||||
pr_info("OK\n");
|
||||
ret = 0;
|
||||
|
||||
err_free:
|
||||
free(core);
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct pstree_item *find_children(pid_t pid)
|
||||
{
|
||||
struct pstree_item *item = NULL;
|
||||
u32 *children = NULL;
|
||||
u32 nr_allocated = 0;
|
||||
u32 nr_children = 0;
|
||||
bool found = false;
|
||||
FILE *file;
|
||||
char *tok;
|
||||
|
||||
pr_debug("pid: %d\n", pid);
|
||||
|
||||
snprintf(loc_buf, sizeof(loc_buf), "/proc/%d/status", pid);
|
||||
file = fopen(loc_buf, "r");
|
||||
if (!file) {
|
||||
perror("Can't open task status");
|
||||
goto err;
|
||||
}
|
||||
|
||||
while ((fgets(loc_buf, sizeof(loc_buf), file))) {
|
||||
if (strncmp(loc_buf, "Children:", 9)) {
|
||||
continue;
|
||||
} else {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file), file = NULL;
|
||||
if (!found) {
|
||||
pr_error("Children marker is not found\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
item = xzalloc(sizeof(*item));
|
||||
if (!item)
|
||||
goto err;
|
||||
|
||||
tok = strtok(&loc_buf[10], " \n");
|
||||
while (tok) {
|
||||
u32 child_pid = atoi(tok);
|
||||
|
||||
pr_debug("child_pid: %d\n", child_pid);
|
||||
|
||||
if (nr_allocated <= nr_children) {
|
||||
nr_allocated += 64;
|
||||
if (xrealloc_safe((void **)&children, nr_allocated)) {
|
||||
xfree(children);
|
||||
xfree(item);
|
||||
item = NULL;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
children[nr_children++] = child_pid;
|
||||
tok = strtok(NULL, " \n");
|
||||
}
|
||||
|
||||
item->pid = pid;
|
||||
item->nr_children = nr_children;
|
||||
item->children = children;
|
||||
|
||||
err:
|
||||
return item;
|
||||
}
|
||||
|
||||
static int collect_pstree(pid_t pid)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
unsigned long i;
|
||||
int ret = -1;
|
||||
|
||||
item = find_children(pid);
|
||||
if (!item)
|
||||
goto err;
|
||||
|
||||
list_add_tail(&item->list, &pstree_list);
|
||||
|
||||
for (i = 0; i < item->nr_children; i++) {
|
||||
ret = collect_pstree(item->children[i]);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_pstree(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
struct pstree_entry e;
|
||||
unsigned long i;
|
||||
int ret = -1;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping pstree (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
list_for_each_entry(item, &pstree_list, list) {
|
||||
|
||||
pr_info("Process: %d (%d children)\n",
|
||||
item->pid, item->nr_children);
|
||||
|
||||
e.pid = item->pid;
|
||||
e.nr_children = item->nr_children;
|
||||
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_PSTREE].fd, &e, err);
|
||||
|
||||
pr_info("Children:");
|
||||
for (i = 0; i < item->nr_children; i++) {
|
||||
pr_info(" %d", item->children[i]);
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_PSTREE].fd,
|
||||
&item->children[i], err);
|
||||
}
|
||||
pr_info("\n");
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vma_area *find_vma_by_addr(unsigned long addr)
|
||||
{
|
||||
struct vma_area *vma_area;
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list, list) {
|
||||
if (in_vma_area(vma_area, addr))
|
||||
return vma_area;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* kernel expects a special format in core file */
|
||||
static int finalize_core(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
int fd_pages, fd_pages_shmem, fd_core;
|
||||
unsigned long num, num_anon;
|
||||
struct vma_area *vma_area;
|
||||
struct vma_entry ve;
|
||||
int ret = -1;
|
||||
u64 va;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Finalizing core (pid: %d)\n", pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
fd_core = cr_fdset->desc[CR_FD_CORE].fd;
|
||||
fd_pages = cr_fdset->desc[CR_FD_PAGES].fd;
|
||||
fd_pages_shmem = cr_fdset->desc[CR_FD_PAGES_SHMEM].fd;
|
||||
|
||||
pr_debug("dsc: fd_core %d fd_pages %d fd_pages_shmem %d\n",
|
||||
fd_core, fd_pages, fd_pages_shmem);
|
||||
|
||||
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
lseek(fd_pages, MAGIC_OFFSET, SEEK_SET);
|
||||
lseek(fd_pages_shmem, MAGIC_OFFSET, SEEK_SET);
|
||||
|
||||
num = 0;
|
||||
pr_info("Appending VMAs ... ");
|
||||
|
||||
/* All VMAs first */
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list, list) {
|
||||
ret = write(fd_core, &vma_area->vma, sizeof(vma_area->vma));
|
||||
if (ret != sizeof(vma_area->vma)) {
|
||||
pr_perror("\nUnable to write vma entry (%li written)\n", num);
|
||||
goto err;
|
||||
}
|
||||
num++;
|
||||
}
|
||||
|
||||
/* Ending marker */
|
||||
memset(&ve, 0, sizeof(ve));
|
||||
write_ptr_safe(fd_core, &ve, err);
|
||||
|
||||
pr_info("OK (%li written)\n", num);
|
||||
|
||||
num = 0;
|
||||
num_anon = 0;
|
||||
|
||||
pr_info("Appending pages ... ");
|
||||
while (1) {
|
||||
ret = read(fd_pages, &va, sizeof(va));
|
||||
if (!ret)
|
||||
break;
|
||||
if (ret != sizeof(va)) {
|
||||
pr_perror("\nUnable to read VA of page (%li written)\n", num);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Ending marker */
|
||||
if (va == 0) {
|
||||
write_ptr_safe(fd_core, &zero_page_entry, err);
|
||||
write_ptr_safe(fd_pages_shmem, &zero_page_entry, err);
|
||||
break;
|
||||
}
|
||||
|
||||
vma_area = find_vma_by_addr((unsigned long)va);
|
||||
if (!vma_area) {
|
||||
pr_panic("\nA page with address %lx is unknown\n", va);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case if someone broke parasite page
|
||||
* dumper code.
|
||||
*/
|
||||
if (!vma_area_has(vma_area, VMA_AREA_REGULAR)) {
|
||||
pr_panic("\nA page with address %lx has a wrong status\n", va);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (vma_area_has(vma_area, VMA_ANON_PRIVATE) ||
|
||||
vma_area_has(vma_area, VMA_FILE_PRIVATE)) {
|
||||
ret = write(fd_core, &va, sizeof(va));
|
||||
ret += sendfile(fd_core, fd_pages, NULL, PAGE_SIZE);
|
||||
if (ret != sizeof(va) + PAGE_SIZE) {
|
||||
pr_perror("\nUnable to write VMA_FILE_PRIVATE|VMA_ANON_PRIVATE "
|
||||
"page (%li, %li written)\n",
|
||||
num, num_anon);
|
||||
goto err;
|
||||
}
|
||||
num++;
|
||||
} else if (vma_area_has(vma_area, VMA_ANON_SHARED)) {
|
||||
ret = write(fd_pages_shmem, &va, sizeof(va));
|
||||
ret += sendfile(fd_pages_shmem, fd_pages, NULL, PAGE_SIZE);
|
||||
if (ret != sizeof(va) + PAGE_SIZE) {
|
||||
pr_perror("\nUnable to write VMA_ANON_SHARED "
|
||||
"page (%li, %li written)\n",
|
||||
num, num_anon);
|
||||
goto err;
|
||||
}
|
||||
num_anon++;
|
||||
} else {
|
||||
/* skip the page */
|
||||
lseek(fd_pages, PAGE_SIZE, SEEK_CUR);
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
pr_info("OK (%li written)\n", num + num_anon);
|
||||
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
return ret;
|
||||
|
||||
err_strno:
|
||||
pr_perror("Error catched\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
static int dump_one_task(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
pr_info("========================================\n");
|
||||
pr_info("Dumping task (pid: %d)\n", pid);
|
||||
pr_info("========================================\n");
|
||||
|
||||
ret = collect_mappings(pid);
|
||||
if (ret) {
|
||||
pr_error("Collect mappings (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = seize_task(pid);
|
||||
if (ret) {
|
||||
pr_error("Failed to seize task (pid: %d) with %d\n",
|
||||
pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = dump_task_core_seized(pid, cr_fdset);
|
||||
if (ret) {
|
||||
pr_error("Dump core (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
parasite_ctl = parasite_infect_seized(pid, NULL, &vma_area_list);
|
||||
if (!parasite_ctl) {
|
||||
pr_error("Can't infect (pid: %d) with parasite\n", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = parasite_dump_pages_seized(parasite_ctl, &vma_area_list,
|
||||
cr_fdset, CR_FD_PAGES);
|
||||
if (ret) {
|
||||
pr_error("Can't dump pages (pid: %d) with parasite\n", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = parasite_cure_seized(¶site_ctl, &vma_area_list);
|
||||
if (ret) {
|
||||
pr_error("Can't cure (pid: %d) from parasite\n", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = unseize_task(pid);
|
||||
if (ret) {
|
||||
pr_error("Can't unsieze (pid: %d) task\n", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = dump_task_files(pid, cr_fdset);
|
||||
if (ret) {
|
||||
pr_error("Dump files (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = dump_task_mappings(pid, cr_fdset);
|
||||
if (ret) {
|
||||
pr_error("Dump mappings (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = finalize_core(pid, cr_fdset);
|
||||
if (ret) {
|
||||
pr_error("Finalizing core (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
err:
|
||||
free_mappings();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cr_dump_tasks(pid_t pid, bool leader_only, int leave_stopped)
|
||||
{
|
||||
struct cr_fdset *cr_fdset = NULL;
|
||||
struct pstree_item *item;
|
||||
int ret = -1;
|
||||
|
||||
if (!leader_only) {
|
||||
pr_info("========================================\n");
|
||||
pr_info("Dumping process group (pid: %d)\n", pid);
|
||||
pr_info("========================================\n");
|
||||
}
|
||||
|
||||
if (collect_pstree(pid))
|
||||
goto err;
|
||||
|
||||
list_for_each_entry(item, &pstree_list, list) {
|
||||
stop_task(item->pid);
|
||||
if (leader_only)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Dump the process tree first */
|
||||
cr_fdset = alloc_cr_fdset(pid);
|
||||
if (!cr_fdset)
|
||||
goto err;
|
||||
|
||||
if (prep_cr_fdset_for_dump(cr_fdset, CR_FD_DESC_USE(CR_FD_PSTREE)))
|
||||
goto err;
|
||||
if (dump_pstree(pid, cr_fdset))
|
||||
goto err;
|
||||
|
||||
close_cr_fdset(cr_fdset);
|
||||
free_cr_fdset(&cr_fdset);
|
||||
|
||||
/* Now all other data */
|
||||
list_for_each_entry(item, &pstree_list, list) {
|
||||
|
||||
cr_fdset = alloc_cr_fdset(item->pid);
|
||||
if (!cr_fdset)
|
||||
goto err;
|
||||
if (prep_cr_fdset_for_dump(cr_fdset, CR_FD_DESC_NOPSTREE))
|
||||
goto err;
|
||||
|
||||
if (dump_one_task(item->pid, cr_fdset))
|
||||
goto err;
|
||||
|
||||
close_cr_fdset(cr_fdset);
|
||||
free_cr_fdset(&cr_fdset);
|
||||
|
||||
if (leader_only)
|
||||
break;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
if (!leave_stopped) {
|
||||
list_for_each_entry(item, &pstree_list, list) {
|
||||
continue_task(item->pid);
|
||||
if (leader_only)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free_pstree();
|
||||
close_cr_fdset(cr_fdset);
|
||||
free_cr_fdset(&cr_fdset);
|
||||
return ret;
|
||||
}
|
1144
cr-restore.c
Normal file
1144
cr-restore.c
Normal file
File diff suppressed because it is too large
Load Diff
389
cr-show.c
Normal file
389
cr-show.c
Normal file
@ -0,0 +1,389 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
|
||||
#include "compiler.h"
|
||||
#include "crtools.h"
|
||||
#include "syscall.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "image.h"
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
# error No x86-32 support yet
|
||||
#endif
|
||||
|
||||
#define pr_regs4(s, n1, n2, n3, n4) \
|
||||
pr_info("%8s: %16lx " \
|
||||
"%8s: %16lx " \
|
||||
"%8s: %16lx " \
|
||||
"%8s: %16lx\n", \
|
||||
#n1, s.n1, \
|
||||
#n2, s.n2, \
|
||||
#n3, s.n3, \
|
||||
#n4, s.n4)
|
||||
|
||||
#define pr_regs3(s, n1, n2, n3) \
|
||||
pr_info("%8s: %16lx " \
|
||||
"%8s: %16lx " \
|
||||
"%8s: %16lx\n", \
|
||||
#n1, s.n1, \
|
||||
#n2, s.n2, \
|
||||
#n3, s.n3)
|
||||
|
||||
static char local_buf[PAGE_SIZE];
|
||||
static LIST_HEAD(pstree_list);
|
||||
|
||||
/* FIXME: same as dump -- unify */
|
||||
static void free_pstree(void)
|
||||
{
|
||||
struct pstree_item *item, *p;
|
||||
|
||||
list_for_each_entry_safe(item, p, &pstree_list, list) {
|
||||
xfree(item->children);
|
||||
xfree(item);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&pstree_list);
|
||||
}
|
||||
|
||||
static void show_regs(struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct user_regs_entry regs;
|
||||
struct desc_struct tls;
|
||||
int fd_core, i;
|
||||
|
||||
fd_core = cr_fdset->desc[CR_FD_CORE].fd;
|
||||
if (fd_core < 0)
|
||||
goto err;
|
||||
|
||||
pr_info("\n\t---[GP registers set]---\n");
|
||||
|
||||
lseek(fd_core, GET_FILE_OFF(struct core_entry, gpregs), SEEK_SET);
|
||||
|
||||
read_ptr_safe(fd_core, ®s, err);
|
||||
|
||||
pr_regs4(regs, cs, ip, ds, es);
|
||||
pr_regs4(regs, ss, sp, fs, gs);
|
||||
pr_regs4(regs, di, si, dx, cx);
|
||||
pr_regs4(regs, ax, r8, r9, r10);
|
||||
pr_regs4(regs, r11, r12, r13, r14);
|
||||
pr_regs3(regs, r15, bp, bx);
|
||||
pr_regs4(regs, orig_ax, flags, fs_base, gs_base);
|
||||
|
||||
pr_info("\n\t---[TLS area]---\n");
|
||||
|
||||
lseek(fd_core, GET_FILE_OFF(struct core_entry, tls_array), SEEK_SET);
|
||||
|
||||
for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
|
||||
read_ptr_safe(fd_core, &tls, err);
|
||||
pr_info("tls[%2i] = %x %x\n", i, tls.a, tls.b);
|
||||
}
|
||||
|
||||
err:
|
||||
return;
|
||||
}
|
||||
|
||||
static void show_files(struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct fdinfo_entry e;
|
||||
int fd_files, ret;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CR_FD_FDINFO: %s\n", cr_fdset->desc[CR_FD_FDINFO].name);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
fd_files = cr_fdset->desc[CR_FD_FDINFO].fd;
|
||||
|
||||
lseek(fd_files, MAGIC_OFFSET, SEEK_SET);
|
||||
|
||||
while (1) {
|
||||
ret = read(fd_files, &e, sizeof(e));
|
||||
if (!ret)
|
||||
goto err;
|
||||
if (ret != sizeof(e)) {
|
||||
pr_perror("Can't read fdinfo entry");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (e.len) {
|
||||
ret = read(fd_files, local_buf, e.len);
|
||||
if (ret != e.len) {
|
||||
pr_perror("Can't read %d bytes\n", e.len);
|
||||
goto err;
|
||||
}
|
||||
local_buf[e.len] = 0;
|
||||
pr_info("type: %02x len: %02x flags: %4x pos: %8x addr: %16lx --> %s\n",
|
||||
e.type, e.len, e.flags, e.pos, e.addr, local_buf);
|
||||
} else
|
||||
pr_info("type: %02x len: %02x flags: %4x pos: %8x addr: %16lx\n",
|
||||
e.type, e.len, e.flags, e.pos, e.addr);
|
||||
}
|
||||
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
}
|
||||
|
||||
static void show_pipes(struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct pipe_entry e;
|
||||
int fd_pipes, ret;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CR_FD_PIPES: %s\n", cr_fdset->desc[CR_FD_PIPES].name);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
fd_pipes = cr_fdset->desc[CR_FD_PIPES].fd;
|
||||
|
||||
lseek(fd_pipes, MAGIC_OFFSET, SEEK_SET);
|
||||
|
||||
while (1) {
|
||||
ret = read(fd_pipes, &e, sizeof(e));
|
||||
if (!ret)
|
||||
goto err;
|
||||
if (ret != sizeof(e)) {
|
||||
pr_perror("Can't read pipe entry\n");
|
||||
goto err;
|
||||
}
|
||||
pr_info("fd: %8lx pipeid: %8lx flags: %8lx bytes: %8lx\n",
|
||||
e.fd, e.pipeid, e.flags, e.bytes);
|
||||
if (e.bytes)
|
||||
lseek(fd_pipes, e.bytes, SEEK_CUR);
|
||||
}
|
||||
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
}
|
||||
|
||||
static void show_core(struct cr_fdset *cr_fdset)
|
||||
{
|
||||
struct vma_area vma_area = {};
|
||||
struct vma_entry ve;
|
||||
int fd_core, ret;
|
||||
u64 va;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CR_FD_CORE: %s\n", cr_fdset->desc[CR_FD_CORE].name);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
fd_core = cr_fdset->desc[CR_FD_CORE].fd;
|
||||
if (fd_core < 0)
|
||||
goto out;
|
||||
|
||||
show_regs(cr_fdset);
|
||||
|
||||
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
|
||||
/*
|
||||
* Start with VMA, then pages.
|
||||
*/
|
||||
pr_info("\n\t---[VMA areas]---\n");
|
||||
while (1) {
|
||||
ret = read(fd_core, &ve, sizeof(ve));
|
||||
if (!ret)
|
||||
break;
|
||||
if (ret != sizeof(ve)) {
|
||||
pr_perror("Unable to read VMA\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (is_ending_vma(&ve)) {
|
||||
pr_info("\n\t---[Pages]---\n");
|
||||
while (1) {
|
||||
ret = read(fd_core, &va, sizeof(va));
|
||||
if (!ret)
|
||||
goto out;
|
||||
if (ret != sizeof(va)) {
|
||||
pr_perror("Unable to read VA\n");
|
||||
goto out;
|
||||
}
|
||||
if (va == 0)
|
||||
goto out;
|
||||
pr_info("page va: %16lx\n", va);
|
||||
lseek(fd_core, PAGE_SIZE, SEEK_CUR);
|
||||
}
|
||||
}
|
||||
|
||||
/* Simply in a sake of fancy printing */
|
||||
vma_area.vma = ve;
|
||||
pr_info_vma(&vma_area);
|
||||
}
|
||||
|
||||
out:
|
||||
pr_info("----------------------------------------\n");
|
||||
}
|
||||
|
||||
static void show_pstree_from_file(int fd, char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CR_FD_PSTREE: %s\n", name);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
while (1) {
|
||||
struct pstree_entry e;
|
||||
unsigned long i;
|
||||
u32 child_pid;
|
||||
|
||||
ret = read(fd, &e, sizeof(e));
|
||||
if (!ret)
|
||||
break;
|
||||
if (ret != sizeof(e)) {
|
||||
pr_perror("Bad pstree entry");
|
||||
break;
|
||||
}
|
||||
|
||||
pr_info("Process %d number of children: %d\n",
|
||||
e.pid, e.nr_children);
|
||||
|
||||
for (i = 0; i < e.nr_children; i++) {
|
||||
ret = read(fd, &child_pid,
|
||||
sizeof(child_pid));
|
||||
pr_info(" %d", child_pid);
|
||||
}
|
||||
if (e.nr_children)
|
||||
pr_info("\n");
|
||||
}
|
||||
|
||||
pr_info("----------------------------------------\n");
|
||||
}
|
||||
|
||||
static void show_pstree(struct list_head *head, char *name)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
int i;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CR_FD_PSTREE: %s\n", name);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
list_for_each_entry(item, head, list) {
|
||||
pr_info("Process %d number of children: %d\n",
|
||||
item->pid, item->nr_children);
|
||||
for (i = 0; i < item->nr_children; i++)
|
||||
pr_info(" %d", item->children[i]);
|
||||
if (item->nr_children)
|
||||
pr_info("\n");
|
||||
}
|
||||
|
||||
pr_info("----------------------------------------\n");
|
||||
}
|
||||
|
||||
static int collect_pstree(pid_t pid, struct cr_fdset *cr_fdset)
|
||||
{
|
||||
int fd = cr_fdset->desc[CR_FD_PSTREE].fd;
|
||||
struct pstree_item *item = NULL;
|
||||
struct pstree_entry e;
|
||||
int ret = -1;
|
||||
|
||||
for (;;) {
|
||||
size_t size;
|
||||
|
||||
ret = read(fd, &e, sizeof(e));
|
||||
if (ret && ret != sizeof(e)) {
|
||||
pr_perror("Wrong pstree entry\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
item = xmalloc(sizeof(*item));
|
||||
if (!item)
|
||||
goto err;
|
||||
|
||||
size = sizeof(u32) * e.nr_children;
|
||||
|
||||
item->pid = e.pid;
|
||||
item->nr_children = e.nr_children;
|
||||
item->children = xmalloc(size);
|
||||
|
||||
if (!item->children) {
|
||||
pr_error("No memory for children pids\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = read(fd, item->children, size);
|
||||
if (ret != size) {
|
||||
pr_error("An error in reading children pids\n");
|
||||
xfree(item->children);
|
||||
goto err;
|
||||
}
|
||||
|
||||
list_add_tail(&item->list, &pstree_list);
|
||||
}
|
||||
|
||||
item = NULL;
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
xfree(item);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cr_show(unsigned long pid, bool leader_only)
|
||||
{
|
||||
struct cr_fdset *cr_fdset;
|
||||
struct pstree_item *item;
|
||||
int i, ret = -1;
|
||||
|
||||
cr_fdset = alloc_cr_fdset(pid);
|
||||
if (!cr_fdset)
|
||||
goto out;
|
||||
|
||||
ret = prep_cr_fdset_for_restore(cr_fdset, CR_FD_DESC_ALL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = collect_pstree(pid, cr_fdset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
show_pstree(&pstree_list, cr_fdset->desc[CR_FD_PSTREE].name);
|
||||
|
||||
close_cr_fdset(cr_fdset);
|
||||
free_cr_fdset(&cr_fdset);
|
||||
|
||||
list_for_each_entry(item, &pstree_list, list) {
|
||||
|
||||
cr_fdset = alloc_cr_fdset(item->pid);
|
||||
if (!cr_fdset)
|
||||
goto out;
|
||||
|
||||
ret = prep_cr_fdset_for_restore(cr_fdset, CR_FD_DESC_NOPSTREE);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
show_core(cr_fdset);
|
||||
show_pipes(cr_fdset);
|
||||
show_files(cr_fdset);
|
||||
|
||||
if (leader_only)
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
free_pstree();
|
||||
close_cr_fdset(cr_fdset);
|
||||
free_cr_fdset(&cr_fdset);
|
||||
return ret;
|
||||
}
|
280
crtools.c
Normal file
280
crtools.c
Normal file
@ -0,0 +1,280 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/sendfile.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
|
||||
#include "compiler.h"
|
||||
#include "crtools.h"
|
||||
#include "util.h"
|
||||
|
||||
struct page_entry zero_page_entry;
|
||||
|
||||
static struct cr_fd_desc_tmpl template[CR_FD_MAX] = {
|
||||
[CR_FD_FDINFO] = {
|
||||
.fmt = "fdinfo-%li.img",
|
||||
.magic = FDINFO_MAGIC,
|
||||
},
|
||||
[CR_FD_PAGES] = {
|
||||
.fmt = "pages-%li.img",
|
||||
.magic = PAGES_MAGIC,
|
||||
},
|
||||
[CR_FD_PAGES_SHMEM] = {
|
||||
.fmt = "pages-shmem-%li.img",
|
||||
.magic = PAGES_MAGIC,
|
||||
},
|
||||
[CR_FD_CORE] = {
|
||||
.fmt = "core-%li.img",
|
||||
.magic = CORE_MAGIC,
|
||||
},
|
||||
[CR_FD_PIPES] = {
|
||||
.fmt = "pipes-%li.img",
|
||||
.magic = PIPES_MAGIC,
|
||||
},
|
||||
[CR_FD_PSTREE] = {
|
||||
.fmt = "pstree-%li.img",
|
||||
.magic = PSTREE_MAGIC,
|
||||
},
|
||||
[CR_FD_SHMEM] = {
|
||||
.fmt = "shmem-%li.img",
|
||||
.magic = SHMEM_MAGIC,
|
||||
},
|
||||
};
|
||||
|
||||
struct cr_fdset *alloc_cr_fdset(pid_t pid)
|
||||
{
|
||||
struct cr_fdset *cr_fdset;
|
||||
unsigned int i;
|
||||
|
||||
cr_fdset = xzalloc(sizeof(*cr_fdset));
|
||||
if (!cr_fdset)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < CR_FD_MAX; i++) {
|
||||
cr_fdset->desc[i].tmpl = &template[i];
|
||||
snprintf(cr_fdset->desc[i].name,
|
||||
sizeof(cr_fdset->desc[i].name),
|
||||
cr_fdset->desc[i].tmpl->fmt,
|
||||
(long)pid);
|
||||
cr_fdset->desc[i].fd = -1;
|
||||
}
|
||||
|
||||
err:
|
||||
return cr_fdset;
|
||||
}
|
||||
|
||||
int prep_cr_fdset_for_dump(struct cr_fdset *cr_fdset,
|
||||
unsigned long use_mask)
|
||||
{
|
||||
unsigned int i;
|
||||
u32 magic;
|
||||
int ret = -1;
|
||||
|
||||
if (!cr_fdset)
|
||||
goto err;
|
||||
|
||||
cr_fdset->use_mask = use_mask;
|
||||
|
||||
for (i = 0; i < CR_FD_MAX; i++) {
|
||||
if (!(use_mask & CR_FD_DESC_USE(i)))
|
||||
continue;
|
||||
|
||||
ret = unlink(cr_fdset->desc[i].name);
|
||||
if (ret && errno != ENOENT) {
|
||||
pr_perror("Unable to unlink %s (%s)\n",
|
||||
cr_fdset->desc[i].name,
|
||||
strerror(errno));
|
||||
goto err;
|
||||
} else
|
||||
ret = -1;
|
||||
cr_fdset->desc[i].fd = open(cr_fdset->desc[i].name,
|
||||
O_RDWR | O_CREAT | O_EXCL,
|
||||
CR_FD_PERM);
|
||||
if (cr_fdset->desc[i].fd < 0) {
|
||||
pr_perror("Unable to open %s (%s)\n",
|
||||
cr_fdset->desc[i].name,
|
||||
strerror(errno));
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_debug("Opened %s with %d\n",
|
||||
cr_fdset->desc[i].name,
|
||||
cr_fdset->desc[i].fd);
|
||||
|
||||
magic = cr_fdset->desc[i].tmpl->magic;
|
||||
write_ptr_safe(cr_fdset->desc[i].fd, &magic, err);
|
||||
|
||||
/*
|
||||
* Make sure it's on disk since we might
|
||||
* need to re-open files in parasite.
|
||||
*/
|
||||
fsync(cr_fdset->desc[i].fd);
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int prep_cr_fdset_for_restore(struct cr_fdset *cr_fdset,
|
||||
unsigned long use_mask)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret = -1;
|
||||
u32 magic;
|
||||
|
||||
if (!cr_fdset)
|
||||
goto err;
|
||||
|
||||
cr_fdset->use_mask = use_mask;
|
||||
|
||||
for (i = 0; i < CR_FD_MAX; i++) {
|
||||
if (!(use_mask & CR_FD_DESC_USE(i)))
|
||||
continue;
|
||||
|
||||
cr_fdset->desc[i].fd = open(cr_fdset->desc[i].name,
|
||||
O_RDWR, CR_FD_PERM);
|
||||
if (cr_fdset->desc[i].fd < 0) {
|
||||
pr_perror("Unable to open %s (%s)\n",
|
||||
cr_fdset->desc[i].name,
|
||||
strerror(errno));
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_debug("Opened %s with %d\n",
|
||||
cr_fdset->desc[i].name,
|
||||
cr_fdset->desc[i].fd);
|
||||
|
||||
read_ptr_safe(cr_fdset->desc[i].fd, &magic, err);
|
||||
if (magic != cr_fdset->desc[i].tmpl->magic) {
|
||||
pr_error("Magic doesn't match for %s\n",
|
||||
cr_fdset->desc[i].name);
|
||||
goto err;
|
||||
}
|
||||
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void close_cr_fdset(struct cr_fdset *cr_fdset)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (!cr_fdset)
|
||||
return;
|
||||
|
||||
for (i = 0; i < CR_FD_MAX; i++) {
|
||||
if (!(cr_fdset->use_mask & CR_FD_DESC_USE(i)))
|
||||
continue;
|
||||
|
||||
if (cr_fdset->desc[i].fd >= 0) {
|
||||
pr_debug("Closed %s with %d\n",
|
||||
cr_fdset->desc[i].name,
|
||||
cr_fdset->desc[i].fd);
|
||||
close(cr_fdset->desc[i].fd);
|
||||
cr_fdset->desc[i].fd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void free_cr_fdset(struct cr_fdset **cr_fdset)
|
||||
{
|
||||
if (cr_fdset && *cr_fdset) {
|
||||
free(*cr_fdset);
|
||||
*cr_fdset = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
pid_t pid;
|
||||
int ret = -1;
|
||||
|
||||
BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE);
|
||||
|
||||
if (argc < 3)
|
||||
goto usage;
|
||||
|
||||
memset(&zero_page_entry, 0, sizeof(zero_page_entry));
|
||||
|
||||
if (!strcmp(argv[1], "dump")) {
|
||||
bool leader_only;
|
||||
|
||||
switch (argv[2][1]) {
|
||||
case 'p':
|
||||
pid = atol(argv[3]);
|
||||
leader_only = true;
|
||||
break;
|
||||
case 't':
|
||||
pid = atol(argv[3]);
|
||||
leader_only = false;
|
||||
break;
|
||||
default:
|
||||
goto usage;
|
||||
}
|
||||
|
||||
ret = cr_dump_tasks(pid, leader_only, 1);
|
||||
|
||||
} else if (!strcmp(argv[1], "restore")) {
|
||||
bool leader_only;
|
||||
|
||||
switch (argv[2][1]) {
|
||||
case 'p':
|
||||
pid = atol(argv[3]);
|
||||
leader_only = true;
|
||||
break;
|
||||
case 't':
|
||||
pid = atol(argv[3]);
|
||||
leader_only = false;
|
||||
break;
|
||||
default:
|
||||
goto usage;
|
||||
}
|
||||
|
||||
ret = cr_restore_tasks(pid, leader_only, 1);
|
||||
|
||||
} else if (!strcmp(argv[1], "show")) {
|
||||
bool leader_only = true;
|
||||
|
||||
switch (argv[2][1]) {
|
||||
case 'p':
|
||||
leader_only = true;
|
||||
pid = atol(argv[3]);
|
||||
break;
|
||||
case 't':
|
||||
leader_only = false;
|
||||
pid = atol(argv[3]);
|
||||
break;
|
||||
default:
|
||||
goto usage;
|
||||
}
|
||||
|
||||
ret = cr_show(pid, leader_only);
|
||||
|
||||
} else
|
||||
goto usage;
|
||||
|
||||
return ret;
|
||||
|
||||
usage:
|
||||
printk("\nUsage:\n");
|
||||
printk("\tcrtools (dump|show|restore) (-p|-t) pid\n\n");
|
||||
return -1;
|
||||
}
|
213
elf.c
Normal file
213
elf.c
Normal file
@ -0,0 +1,213 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <sys/sendfile.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
|
||||
#include "compiler.h"
|
||||
#include "crtools.h"
|
||||
#include "syscall.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "image.h"
|
||||
#include "elf.h"
|
||||
|
||||
#define ELF_MAX_PHDR ((65536U / sizeof(Elf64_Phdr)) - 1)
|
||||
#define ELF_MAX_PAGES (1 << 10)
|
||||
|
||||
/*
|
||||
* Convert the c/r core file into elf
|
||||
* executable, the kernel will handle it.
|
||||
*/
|
||||
int convert_to_elf(char *elf_path, int fd_core)
|
||||
{
|
||||
Elf64_Ehdr elf_ehdr;
|
||||
Elf64_Phdr elf_phdr;
|
||||
|
||||
Elf64_Half e_phnum = 0;
|
||||
Elf64_Addr e_entry = 0;
|
||||
|
||||
struct page_entry page_entry;
|
||||
unsigned long nrpages = 0;
|
||||
struct core_entry core;
|
||||
struct vma_area area;
|
||||
struct vma_entry vma;
|
||||
u64 va;
|
||||
|
||||
unsigned long phoff = 0;
|
||||
unsigned long phoff_regs, phoff_pages;
|
||||
|
||||
int fd_elf;
|
||||
int ret = -1;
|
||||
|
||||
fd_elf = open(elf_path, O_RDWR | O_CREAT | O_EXCL, 0700);
|
||||
if (fd_elf < 0) {
|
||||
pr_perror("Can't open %s\n", elf_path);
|
||||
goto err;
|
||||
}
|
||||
|
||||
memset(&elf_ehdr, 0, sizeof(elf_ehdr));
|
||||
memset(&area, 0, sizeof(area));
|
||||
|
||||
memcpy(elf_ehdr.e_ident, ELFMAG, SELFMAG);
|
||||
elf_ehdr.e_ident[EI_CLASS] = ELFCLASS64;
|
||||
elf_ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
|
||||
elf_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
|
||||
|
||||
elf_ehdr.e_type = ET_CKPT;
|
||||
elf_ehdr.e_machine = EM_X86_64;
|
||||
elf_ehdr.e_version = EV_CURRENT;
|
||||
elf_ehdr.e_phoff = sizeof(elf_ehdr);
|
||||
elf_ehdr.e_ehsize = sizeof(elf_ehdr);
|
||||
elf_ehdr.e_phentsize = sizeof(Elf64_Phdr);
|
||||
|
||||
/* Get EP */
|
||||
lseek(fd_core, MAGIC_OFFSET, SEEK_SET);
|
||||
read_ptr_safe(fd_core, &core, err_close);
|
||||
|
||||
/*
|
||||
* Count the numbers of segments. Each segment
|
||||
* is the VMA record with appropriate permissions.
|
||||
* Then we need one big segment which would hold
|
||||
* all the pages dumped.
|
||||
*/
|
||||
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
while(1) {
|
||||
read_ptr_safe(fd_core, &vma, err_close);
|
||||
if (vma.start == 0 && vma.end == 0)
|
||||
break;
|
||||
e_phnum++;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
read_ptr_safe(fd_core, &va, err_close);
|
||||
nrpages++;
|
||||
if (va == 0)
|
||||
break;
|
||||
lseek(fd_core, PAGE_SIZE, SEEK_CUR);
|
||||
}
|
||||
|
||||
/* Figure out if we're overflowed */
|
||||
if (e_phnum > ELF_MAX_PHDR) {
|
||||
pr_error("Too many VMA areas (%li of %li allowed)\n",
|
||||
e_phnum, ELF_MAX_PHDR);
|
||||
goto err_close;
|
||||
} else if (nrpages > ELF_MAX_PAGES) {
|
||||
pr_error("Too many pages to restore (%li of %li allowed)\n",
|
||||
nrpages, ELF_MAX_PAGES);
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can write elf header now.
|
||||
*/
|
||||
lseek(fd_elf, 0, SEEK_SET);
|
||||
elf_ehdr.e_phnum = e_phnum + 2;
|
||||
elf_ehdr.e_entry = core.gpregs.ip;
|
||||
write_ptr_safe(fd_elf, &elf_ehdr, err_close);
|
||||
|
||||
/* Offset in file (after all headers) */
|
||||
phoff = elf_ehdr.e_phnum * sizeof(elf_phdr) + sizeof(elf_ehdr);
|
||||
|
||||
/* VMAs to headers */
|
||||
e_phnum = 0;
|
||||
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
while(1) {
|
||||
read_ptr_safe(fd_core, &vma, err_close);
|
||||
if (vma.start == 0 && vma.end == 0)
|
||||
break;
|
||||
|
||||
memset(&elf_phdr, 0, sizeof(elf_phdr));
|
||||
|
||||
elf_phdr.p_type = PT_CKPT_VMA;
|
||||
elf_phdr.p_offset = phoff;
|
||||
elf_phdr.p_vaddr = vma.start;
|
||||
elf_phdr.p_paddr = vma.start;
|
||||
elf_phdr.p_filesz = sizeof(vma);
|
||||
elf_phdr.p_memsz = vma.end - vma.start;
|
||||
elf_phdr.p_align = 0x1000;
|
||||
|
||||
if (vma.prot & PROT_READ)
|
||||
elf_phdr.p_flags |= PF_R;
|
||||
if (vma.prot & PROT_WRITE)
|
||||
elf_phdr.p_flags |= PF_W;
|
||||
if (vma.prot & PROT_EXEC)
|
||||
elf_phdr.p_flags |= PF_X;
|
||||
|
||||
write_ptr_safe(fd_elf, &elf_phdr, err_close);
|
||||
|
||||
phoff += sizeof(vma);
|
||||
}
|
||||
|
||||
/* The binfmt header */
|
||||
memset(&elf_phdr, 0, sizeof(elf_phdr));
|
||||
|
||||
elf_phdr.p_type = PT_CKPT_CORE;
|
||||
elf_phdr.p_flags = PF_R;
|
||||
elf_phdr.p_offset = phoff;
|
||||
elf_phdr.p_vaddr = 0;
|
||||
elf_phdr.p_filesz = sizeof(core);
|
||||
elf_phdr.p_memsz = sizeof(core);
|
||||
elf_phdr.p_align = 0x1000;
|
||||
|
||||
write_ptr_safe(fd_elf, &elf_phdr, err_close);
|
||||
|
||||
phoff += sizeof(core);
|
||||
|
||||
/* The pages and binfmt header */
|
||||
memset(&elf_phdr, 0, sizeof(elf_phdr));
|
||||
|
||||
elf_phdr.p_type = PT_CKPT_PAGES;
|
||||
elf_phdr.p_flags = PF_R;
|
||||
elf_phdr.p_offset = phoff;
|
||||
elf_phdr.p_vaddr = 0;
|
||||
elf_phdr.p_filesz = nrpages * (sizeof(page_entry));
|
||||
elf_phdr.p_memsz = nrpages * (sizeof(page_entry));
|
||||
elf_phdr.p_align = 0x1000;
|
||||
|
||||
write_ptr_safe(fd_elf, &elf_phdr, err_close);
|
||||
|
||||
/* Now write real contents for program segments */
|
||||
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
while(1) {
|
||||
read_ptr_safe(fd_core, &vma, err_close);
|
||||
if (vma.start == 0 && vma.end == 0)
|
||||
break;
|
||||
area.vma = vma, pr_info_vma(&area);
|
||||
write_ptr_safe(fd_elf, &vma, err_close);
|
||||
}
|
||||
|
||||
write_ptr_safe(fd_elf, &core, err_close);
|
||||
|
||||
if (sendfile(fd_elf, fd_core, NULL, nrpages * (sizeof(page_entry))) !=
|
||||
nrpages * (sizeof(page_entry))) {
|
||||
pr_perror("Can't send %li bytes to elf\n",
|
||||
(long)(nrpages * (sizeof(page_entry))));
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
err_close:
|
||||
close(fd_elf);
|
||||
err:
|
||||
return ret;
|
||||
}
|
22
gen-offsets.sh
Normal file
22
gen-offsets.sh
Normal file
@ -0,0 +1,22 @@
|
||||
#!/bin/sh
|
||||
|
||||
name_ifndef=$1
|
||||
name_prefix_offset=$2
|
||||
name_blob=$3
|
||||
name_objname=$4
|
||||
name_bin=$5
|
||||
|
||||
awk_cmd="{ print \"#define $name_prefix_offset\" \$3 \" 0x\" \$1; }"
|
||||
|
||||
echo "/* Autogenerated file, don't edit */"
|
||||
echo "#ifndef $name_ifndef"
|
||||
echo "#define $name_ifndef"
|
||||
echo ""
|
||||
nm $name_objname | grep ' [Tt] ' | awk "$awk_cmd"
|
||||
echo ""
|
||||
echo "static char $name_blob[] = {"
|
||||
hexdump -v -e '"\t"' -e '8/1 "0x%02x, "' -e '"\n"' $name_bin
|
||||
echo "};"
|
||||
echo ""
|
||||
echo "#endif /* $name_ifndef */"
|
||||
|
54
include/bitops.h
Normal file
54
include/bitops.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef CR_BITOPS_H_
|
||||
#define CR_BITOPS_H_
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, 8 * sizeof(long))
|
||||
|
||||
#define DECLARE_BITMAP(name, bits) \
|
||||
unsigned long name[BITS_TO_LONGS(bits)]
|
||||
|
||||
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
|
||||
/* Technically wrong, but this avoids compilation errors on some gcc
|
||||
versions. */
|
||||
#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
|
||||
#else
|
||||
#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
|
||||
#endif
|
||||
|
||||
#define ADDR BITOP_ADDR(addr)
|
||||
|
||||
static void set_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
|
||||
}
|
||||
|
||||
static void change_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
|
||||
static int test_bit(int nr, volatile const unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
asm volatile("bt %2,%1\n\t"
|
||||
"sbb %0,%0"
|
||||
: "=r" (oldbit)
|
||||
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
||||
|
||||
return oldbit;
|
||||
}
|
||||
|
||||
static void clear_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
# error x86-32 is not implemented yet
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#endif /* CR_BITOPS_H_ */
|
57
include/compiler.h
Normal file
57
include/compiler.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef CR_COMPILER_H_
|
||||
#define CR_COMPILER_H_
|
||||
|
||||
/*
|
||||
* Various definitions for success build,
|
||||
* picked from various places, mostly from
|
||||
* the linux kernel.
|
||||
*/
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
||||
|
||||
#define __stringify_1(x...) #x
|
||||
#define __stringify(x...) __stringify_1(x)
|
||||
|
||||
#define NORETURN __attribute__((__noreturn__))
|
||||
#define __packed __attribute__((__packed__))
|
||||
#define __used __attribute__((__used__))
|
||||
|
||||
#define __section(S) __attribute__ ((__section__(#S)))
|
||||
|
||||
#ifndef __always_inline
|
||||
# define __always_inline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#ifndef always_inline
|
||||
# define always_inline __always_inline
|
||||
#endif
|
||||
|
||||
#ifndef offsetof
|
||||
# define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
|
||||
#define container_of(ptr, type, member) ({ \
|
||||
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
|
||||
(type *)( (char *)__mptr - offsetof(type,member) );})
|
||||
|
||||
#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
|
||||
#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
|
||||
#define round_down(x, y) ((x) & ~__round_mask(x, y))
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
|
||||
#define min(x, y) ({ \
|
||||
typeof(x) _min1 = (x); \
|
||||
typeof(y) _min2 = (y); \
|
||||
(void) (&_min1 == &_min2); \
|
||||
_min1 < _min2 ? _min1 : _min2; })
|
||||
|
||||
#define max(x, y) ({ \
|
||||
typeof(x) _max1 = (x); \
|
||||
typeof(y) _max2 = (y); \
|
||||
(void) (&_max1 == &_max2); \
|
||||
_max1 > _max2 ? _max1 : _max2; })
|
||||
|
||||
#define is_log2(v) (((v) & ((v) - 1)) == 0)
|
||||
|
||||
#endif /* CR_COMPILER_H_ */
|
105
include/crtools.h
Normal file
105
include/crtools.h
Normal file
@ -0,0 +1,105 @@
|
||||
#ifndef CRTOOLS_H_
|
||||
#define CRTOOLS_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
|
||||
#include "image.h"
|
||||
|
||||
extern struct page_entry zero_page_entry;
|
||||
|
||||
int cr_dump_tasks(pid_t pid, bool leader_only, int leave_stopped);
|
||||
int cr_restore_tasks(pid_t pid, bool leader_only, int leave_stopped);
|
||||
int cr_show(unsigned long pid, bool leader_only);
|
||||
int convert_to_elf(char *elf_path, int fd_core);
|
||||
|
||||
#define CR_FD_PERM 0600
|
||||
|
||||
enum {
|
||||
CR_FD_FDINFO,
|
||||
CR_FD_PAGES,
|
||||
CR_FD_PAGES_SHMEM,
|
||||
CR_FD_CORE,
|
||||
CR_FD_PIPES,
|
||||
CR_FD_PSTREE,
|
||||
CR_FD_SHMEM,
|
||||
|
||||
CR_FD_MAX
|
||||
};
|
||||
|
||||
/* file descriptors template */
|
||||
struct cr_fd_desc_tmpl {
|
||||
const char *fmt; /* format for the name */
|
||||
u32 magic; /* magic in the header */
|
||||
};
|
||||
|
||||
/* file descriptors */
|
||||
struct cr_fd_desc {
|
||||
struct cr_fd_desc_tmpl *tmpl; /* template we refer to */
|
||||
char name[64]; /* the name, based on pid */
|
||||
int fd; /* descriptor for open/close */
|
||||
};
|
||||
|
||||
struct cr_fdset {
|
||||
struct cr_fd_desc desc[CR_FD_MAX];
|
||||
u32 use_mask; /*
|
||||
* if descriptor get used,set
|
||||
* bit here
|
||||
*/
|
||||
};
|
||||
|
||||
#define CR_FD_DESC_USE(type) ((1 << (type)))
|
||||
#define CR_FD_DESC_ALL ((1 << CR_FD_MAX) - 1)
|
||||
#define CR_FD_DESC_NOPSTREE (CR_FD_DESC_ALL & ~(CR_FD_DESC_USE(CR_FD_PSTREE)))
|
||||
#define CR_FD_DESC_NONE (0)
|
||||
|
||||
|
||||
struct cr_fdset *alloc_cr_fdset(pid_t pid);
|
||||
int prep_cr_fdset_for_dump(struct cr_fdset *cr_fdset,
|
||||
unsigned long use_mask);
|
||||
int prep_cr_fdset_for_restore(struct cr_fdset *cr_fdset,
|
||||
unsigned long use_mask);
|
||||
void close_cr_fdset(struct cr_fdset *cr_fdset);
|
||||
void free_cr_fdset(struct cr_fdset **cr_fdset);
|
||||
|
||||
struct vma_area {
|
||||
struct list_head list;
|
||||
struct vma_entry vma;
|
||||
unsigned long shmid;
|
||||
int vm_file_fd;
|
||||
};
|
||||
|
||||
#define vma_area_has(vma_area, s) vma_entry_has(&vma_area->vma, s)
|
||||
#define vma_entry_len(vma) ((vma)->end - (vma)->start)
|
||||
|
||||
struct pstree_item {
|
||||
struct list_head list;
|
||||
pid_t pid; /* leader pid */
|
||||
u32 nr_children; /* number of children */
|
||||
u32 *children; /* array of children */
|
||||
};
|
||||
|
||||
struct pstree_item_info {
|
||||
struct list_head list;
|
||||
|
||||
pid_t pid; /* leader pid */
|
||||
u32 nr_children; /* number of children */
|
||||
u32 *children; /* array of children */
|
||||
|
||||
bool launched; /* set if launched */
|
||||
};
|
||||
|
||||
static inline unsigned long vma_area_size(struct vma_area *vma)
|
||||
{
|
||||
return vma->vma.end - vma->vma.start;
|
||||
}
|
||||
|
||||
static inline int in_vma_area(struct vma_area *vma, unsigned long addr)
|
||||
{
|
||||
return addr >= (unsigned long)vma->vma.start &&
|
||||
addr < (unsigned long)vma->vma.end;
|
||||
}
|
||||
|
||||
#endif /* CRTOOLS_H_ */
|
507
include/elf.h
Normal file
507
include/elf.h
Normal file
@ -0,0 +1,507 @@
|
||||
#ifndef CR_ELF_H
|
||||
#define CR_ELF_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
/* Segment types */
|
||||
#define PT_NULL 0
|
||||
#define PT_LOAD 1
|
||||
#define PT_DYNAMIC 2
|
||||
#define PT_INTERP 3
|
||||
#define PT_NOTE 4
|
||||
#define PT_SHLIB 5
|
||||
#define PT_PHDR 6
|
||||
#define PT_TLS 7
|
||||
#define PT_LOOS 0x60000000
|
||||
#define PT_HIOS 0x6fffffff
|
||||
#define PT_LOPROC 0x70000000
|
||||
#define PT_HIPROC 0x7fffffff
|
||||
#define PT_GNU_EH_FRAME 0x6474e550
|
||||
|
||||
#define PT_CKPT_OFFSET 0x01010101
|
||||
|
||||
#define PT_CKPT_VMA (PT_LOOS + PT_CKPT_OFFSET + 1)
|
||||
#define PT_CKPT_CORE (PT_LOOS + PT_CKPT_OFFSET + 2)
|
||||
#define PT_CKPT_PAGES (PT_LOOS + PT_CKPT_OFFSET + 3)
|
||||
|
||||
/* ELF file types */
|
||||
#define ET_NONE 0
|
||||
#define ET_REL 1
|
||||
#define ET_EXEC 2
|
||||
#define ET_DYN 3
|
||||
#define ET_CORE 4
|
||||
#define ET_CKPT 5
|
||||
#define ET_LOPROC 0xff00
|
||||
#define ET_HIPROC 0xffff
|
||||
|
||||
/* ELF machine types */
|
||||
#define EM_NONE 0
|
||||
#define EM_M32 1
|
||||
#define EM_SPARC 2
|
||||
#define EM_386 3
|
||||
#define EM_68K 4
|
||||
#define EM_88K 5
|
||||
#define EM_486 6 /* Not used in Linux at least */
|
||||
#define EM_860 7
|
||||
#define EM_MIPS 8 /* R3k, bigendian(?) */
|
||||
#define EM_MIPS_RS4_BE 10 /* R4k BE */
|
||||
#define EM_PARISC 15
|
||||
#define EM_SPARC32PLUS 18
|
||||
#define EM_PPC 20
|
||||
#define EM_PPC64 21
|
||||
#define EM_S390 22
|
||||
#define EM_SH 42
|
||||
#define EM_SPARCV9 43 /* v9 = SPARC64 */
|
||||
#define EM_H8_300H 47
|
||||
#define EM_H8S 48
|
||||
#define EM_IA_64 50
|
||||
#define EM_X86_64 62
|
||||
#define EM_CRIS 76
|
||||
#define EM_V850 87
|
||||
#define EM_ALPHA 0x9026 /* Interrim Alpha that stuck around */
|
||||
#define EM_CYGNUS_V850 0x9080 /* Old v850 ID used by Cygnus */
|
||||
#define EM_S390_OLD 0xA390 /* Obsolete interrim value for S/390 */
|
||||
|
||||
/* Dynamic type values */
|
||||
#define DT_NULL 0
|
||||
#define DT_NEEDED 1
|
||||
#define DT_PLTRELSZ 2
|
||||
#define DT_PLTGOT 3
|
||||
#define DT_HASH 4
|
||||
#define DT_STRTAB 5
|
||||
#define DT_SYMTAB 6
|
||||
#define DT_RELA 7
|
||||
#define DT_RELASZ 8
|
||||
#define DT_RELAENT 9
|
||||
#define DT_STRSZ 10
|
||||
#define DT_SYMENT 11
|
||||
#define DT_INIT 12
|
||||
#define DT_FINI 13
|
||||
#define DT_SONAME 14
|
||||
#define DT_RPATH 15
|
||||
#define DT_SYMBOLIC 16
|
||||
#define DT_REL 17
|
||||
#define DT_RELSZ 18
|
||||
#define DT_RELENT 19
|
||||
#define DT_PLTREL 20
|
||||
#define DT_DEBUG 21
|
||||
#define DT_TEXTREL 22
|
||||
#define DT_JMPREL 23
|
||||
#define DT_LOPROC 0x70000000
|
||||
#define DT_HIPROC 0x7fffffff
|
||||
|
||||
/* Auxilliary table entries */
|
||||
#define AT_NULL 0 /* end of vector */
|
||||
#define AT_IGNORE 1 /* entry should be ignored */
|
||||
#define AT_EXECFD 2 /* file descriptor of program */
|
||||
#define AT_PHDR 3 /* program headers for program */
|
||||
#define AT_PHENT 4 /* size of program header entry */
|
||||
#define AT_PHNUM 5 /* number of program headers */
|
||||
#define AT_PAGESZ 6 /* system page size */
|
||||
#define AT_BASE 7 /* base address of interpreter */
|
||||
#define AT_FLAGS 8 /* flags */
|
||||
#define AT_ENTRY 9 /* entry point of program */
|
||||
#define AT_NOTELF 10 /* program is not ELF */
|
||||
#define AT_UID 11 /* real uid */
|
||||
#define AT_EUID 12 /* effective uid */
|
||||
#define AT_GID 13 /* real gid */
|
||||
#define AT_EGID 14 /* effective gid */
|
||||
#define AT_PLATFORM 15 /* string identifying CPU for optimizations */
|
||||
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
|
||||
#define AT_CLKTCK 17 /* frequency at which times() increments */
|
||||
/* 18..22 = ? */
|
||||
#define AT_SECURE 23 /* secure mode boolean */
|
||||
|
||||
/* Program header permission flags */
|
||||
#define PF_X 0x1
|
||||
#define PF_W 0x2
|
||||
#define PF_R 0x4
|
||||
|
||||
/* Section header types */
|
||||
#define SHT_NULL 0
|
||||
#define SHT_PROGBITS 1
|
||||
#define SHT_SYMTAB 2
|
||||
#define SHT_STRTAB 3
|
||||
#define SHT_RELA 4
|
||||
#define SHT_HASH 5
|
||||
#define SHT_DYNAMIC 6
|
||||
#define SHT_NOTE 7
|
||||
#define SHT_NOBITS 8
|
||||
#define SHT_REL 9
|
||||
#define SHT_SHLIB 10
|
||||
#define SHT_DYNSYM 11
|
||||
#define SHT_NUM 12
|
||||
#define SHT_LOPROC 0x70000000
|
||||
#define SHT_HIPROC 0x7fffffff
|
||||
#define SHT_LOUSER 0x80000000
|
||||
#define SHT_HIUSER 0xffffffff
|
||||
|
||||
/* Section header flags */
|
||||
#define SHF_WRITE (1 << 0) /* Writable */
|
||||
#define SHF_ALLOC (1 << 1) /* Occupies memory during execution */
|
||||
#define SHF_EXECINSTR (1 << 2) /* Executable */
|
||||
#define SHF_MERGE (1 << 4) /* Might be merged */
|
||||
#define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */
|
||||
#define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */
|
||||
#define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */
|
||||
#define SHF_OS_NONCONFORMING (1 << 8) /* Non-standard OS specific handling required */
|
||||
#define SHF_GROUP (1 << 9) /* Section is member of a group. */
|
||||
#define SHF_TLS (1 << 10) /* Section hold thread-local data. */
|
||||
|
||||
/* Special section numbers */
|
||||
#define SHN_UNDEF 0
|
||||
#define SHN_LORESERVE 0xff00
|
||||
#define SHN_LOPROC 0xff00
|
||||
#define SHN_HIPROC 0xff1f
|
||||
#define SHN_ABS 0xfff1
|
||||
#define SHN_COMMON 0xfff2
|
||||
#define SHN_HIRESERVE 0xffff
|
||||
|
||||
/* Section align flag */
|
||||
#define SHA_ANY 1 /* No alignment constraint */
|
||||
|
||||
/* Lenght of magic at the start of a file */
|
||||
#define EI_NIDENT 16
|
||||
|
||||
/* Magic number constants... */
|
||||
#define EI_MAG0 0 /* e_ident[] indexes */
|
||||
#define EI_MAG1 1
|
||||
#define EI_MAG2 2
|
||||
#define EI_MAG3 3
|
||||
#define EI_CLASS 4
|
||||
#define EI_DATA 5
|
||||
#define EI_VERSION 6
|
||||
#define EI_OSABI 7
|
||||
#define EI_PAD 8
|
||||
|
||||
#define ELFMAG0 0x7f /* EI_MAG */
|
||||
#define ELFMAG1 'E'
|
||||
#define ELFMAG2 'L'
|
||||
#define ELFMAG3 'F'
|
||||
#define ELFMAG "\177ELF"
|
||||
#define SELFMAG 4
|
||||
|
||||
#define ELFCLASSNONE 0 /* EI_CLASS */
|
||||
#define ELFCLASS32 1
|
||||
#define ELFCLASS64 2
|
||||
#define ELFCLASSNUM 3
|
||||
|
||||
#define ELFDATANONE 0 /* e_ident[EI_DATA] */
|
||||
#define ELFDATA2LSB 1
|
||||
#define ELFDATA2MSB 2
|
||||
|
||||
#define EV_NONE 0 /* e_version, EI_VERSION */
|
||||
#define EV_CURRENT 1
|
||||
#define EV_NUM 2
|
||||
|
||||
#define ELFOSABI_NONE 0
|
||||
#define ELFOSABI_LINUX 3
|
||||
|
||||
/* Legal values for ST_BIND subfield of st_info (symbol binding). */
|
||||
#define STB_LOCAL 0 /* Local symbol */
|
||||
#define STB_GLOBAL 1 /* Global symbol */
|
||||
#define STB_WEAK 2 /* Weak symbol */
|
||||
#define STB_NUM 3 /* Number of defined types. */
|
||||
#define STB_LOOS 10 /* Start of OS-specific */
|
||||
#define STB_HIOS 12 /* End of OS-specific */
|
||||
#define STB_LOPROC 13 /* Start of processor-specific */
|
||||
#define STB_HIPROC 15 /* End of processor-specific */
|
||||
|
||||
/* Symbol types */
|
||||
#define STT_NOTYPE 0 /* Symbol type is unspecified */
|
||||
#define STT_OBJECT 1 /* Symbol is a data object */
|
||||
#define STT_FUNC 2 /* Symbol is a code object */
|
||||
#define STT_SECTION 3 /* Symbol associated with a section */
|
||||
#define STT_FILE 4 /* Symbol's name is file name */
|
||||
#define STT_COMMON 5 /* Symbol is a common data object */
|
||||
#define STT_TLS 6 /* Symbol is thread-local data object */
|
||||
#define STT_NUM 7 /* Number of defined types. */
|
||||
|
||||
/* Symbol visibilities */
|
||||
#define STV_DEFAULT 0 /* Default symbol visibility rules */
|
||||
#define STV_INTERNAL 1 /* Processor specific hidden class */
|
||||
#define STV_HIDDEN 2 /* Sym unavailable in other modules */
|
||||
#define STV_PROTECTED 3 /* Not preemptible, not exported */
|
||||
|
||||
/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field */
|
||||
#define ELF32_ST_BIND(i) ((i) >> 4)
|
||||
#define ELF32_ST_MKBIND(i) ((i) << 4) /* just a helper */
|
||||
#define ELF32_ST_TYPE(i) ((i) & 0xf)
|
||||
#define ELF32_ST_INFO(b, i) (ELF_ST_MKBIND(b) + ELF_ST_TYPE(i))
|
||||
|
||||
#define ELF64_ST_BIND(i) ELF32_ST_BIND(i)
|
||||
#define ELF64_ST_MKBIND(i) ELF32_ST_MKBIND(i)
|
||||
#define ELF64_ST_TYPE(i) ELF32_ST_TYPE(i)
|
||||
#define ELF64_ST_INFO(b, i) ELF32_ST_INFO(b, i)
|
||||
|
||||
/*
|
||||
* ELF standard typedefs (yet more proof that <stdint.h> was way overdue)
|
||||
*/
|
||||
|
||||
typedef u16 Elf32_Half;
|
||||
typedef s16 Elf32_SHalf;
|
||||
typedef u32 Elf32_Word;
|
||||
typedef s32 Elf32_Sword;
|
||||
typedef u64 Elf32_Xword;
|
||||
typedef s64 Elf32_Sxword;
|
||||
|
||||
typedef u32 Elf32_Off;
|
||||
typedef u32 Elf32_Addr;
|
||||
typedef u16 Elf32_Section;
|
||||
|
||||
typedef u16 Elf64_Half;
|
||||
typedef s16 Elf64_SHalf;
|
||||
typedef u32 Elf64_Word;
|
||||
typedef s32 Elf64_Sword;
|
||||
typedef u64 Elf64_Xword;
|
||||
typedef s64 Elf64_Sxword;
|
||||
|
||||
typedef u64 Elf64_Off;
|
||||
typedef u64 Elf64_Addr;
|
||||
typedef u16 Elf64_Section;
|
||||
|
||||
/*
|
||||
* Dynamic header
|
||||
*/
|
||||
|
||||
typedef struct elf32_dyn {
|
||||
Elf32_Sword d_tag;
|
||||
union {
|
||||
Elf32_Sword d_val;
|
||||
Elf32_Addr d_ptr;
|
||||
} d_un;
|
||||
} Elf32_Dyn;
|
||||
|
||||
typedef struct elf64_dyn {
|
||||
Elf64_Sxword d_tag;
|
||||
union {
|
||||
Elf64_Xword d_val;
|
||||
Elf64_Addr d_ptr;
|
||||
} d_un;
|
||||
} Elf64_Dyn;
|
||||
|
||||
/*
|
||||
* Relocations
|
||||
*/
|
||||
|
||||
#define ELF32_R_SYM(x) ((x) >> 8)
|
||||
#define ELF32_R_TYPE(x) ((x) & 0xff)
|
||||
|
||||
typedef struct elf32_rel {
|
||||
Elf32_Addr r_offset;
|
||||
Elf32_Word r_info;
|
||||
} Elf32_Rel;
|
||||
|
||||
typedef struct elf32_rela {
|
||||
Elf32_Addr r_offset;
|
||||
Elf32_Word r_info;
|
||||
Elf32_Sword r_addend;
|
||||
} Elf32_Rela;
|
||||
|
||||
enum reloc32_type {
|
||||
R_386_32 = 1, /* ordinary absolute relocation */
|
||||
R_386_PC32 = 2, /* PC-relative relocation */
|
||||
R_386_GOT32 = 3, /* an offset into GOT */
|
||||
R_386_PLT32 = 4, /* a PC-relative offset into PLT */
|
||||
R_386_COPY = 5, /* ??? */
|
||||
R_386_GLOB_DAT = 6, /* ??? */
|
||||
R_386_JUMP_SLOT = 7, /* ??? */
|
||||
R_386_RELATIVE = 8, /* ??? */
|
||||
R_386_GOTOFF = 9, /* an offset from GOT base */
|
||||
R_386_GOTPC = 10, /* a PC-relative offset _to_ GOT */
|
||||
R_386_TLS_TPOFF = 14, /* Offset in static TLS block */
|
||||
R_386_TLS_IE = 15, /* Address of GOT entry for static TLS block offset */
|
||||
|
||||
/* These are GNU extensions, but useful */
|
||||
R_386_16 = 20, /* A 16-bit absolute relocation */
|
||||
R_386_PC16 = 21, /* A 16-bit PC-relative relocation */
|
||||
R_386_8 = 22, /* An 8-bit absolute relocation */
|
||||
R_386_PC8 = 23 /* An 8-bit PC-relative relocation */
|
||||
};
|
||||
|
||||
#define ELF64_R_SYM(x) ((x) >> 32)
|
||||
#define ELF64_R_TYPE(x) ((x) & 0xffffffff)
|
||||
|
||||
typedef struct elf64_rel {
|
||||
Elf64_Addr r_offset;
|
||||
Elf64_Xword r_info;
|
||||
} Elf64_Rel;
|
||||
|
||||
typedef struct elf64_rela {
|
||||
Elf64_Addr r_offset;
|
||||
Elf64_Xword r_info;
|
||||
Elf64_Sxword r_addend;
|
||||
} Elf64_Rela;
|
||||
|
||||
enum reloc64_type {
|
||||
R_X86_64_NONE = 0, /* No reloc */
|
||||
R_X86_64_64 = 1, /* Direct 64 bit */
|
||||
R_X86_64_PC32 = 2, /* PC relative 32 bit signed */
|
||||
R_X86_64_GOT32 = 3, /* 32 bit GOT entry */
|
||||
R_X86_64_PLT32 = 4, /* 32 bit PLT address */
|
||||
R_X86_64_COPY = 5, /* Copy symbol at runtime */
|
||||
R_X86_64_GLOB_DAT = 6, /* Create GOT entry */
|
||||
R_X86_64_JUMP_SLOT = 7, /* Create PLT entry */
|
||||
R_X86_64_RELATIVE = 8, /* Adjust by program base */
|
||||
R_X86_64_GOTPCREL = 9, /* 32 bit signed PC relative offset to GOT */
|
||||
R_X86_64_32 = 10, /* Direct 32 bit zero extended */
|
||||
R_X86_64_32S = 11, /* Direct 32 bit sign extended */
|
||||
R_X86_64_16 = 12, /* Direct 16 bit zero extended */
|
||||
R_X86_64_PC16 = 13, /* 16 bit sign extended pc relative */
|
||||
R_X86_64_8 = 14, /* Direct 8 bit sign extended */
|
||||
R_X86_64_PC8 = 15, /* 8 bit sign extended pc relative */
|
||||
R_X86_64_DTPMOD64 = 16, /* ID of module containing symbol */
|
||||
R_X86_64_DTPOFF64 = 17, /* Offset in module's TLS block */
|
||||
R_X86_64_TPOFF64 = 18, /* Offset in initial TLS block */
|
||||
R_X86_64_TLSGD = 19, /* 32 bit signed PC relative offset to two GOT entries for GD symbol */
|
||||
R_X86_64_TLSLD = 20, /* 32 bit signed PC relative offset to two GOT entries for LD symbol */
|
||||
R_X86_64_DTPOFF32 = 21, /* Offset in TLS block */
|
||||
R_X86_64_GOTTPOFF = 22, /* 32 bit signed PC relative offset to GOT entry for IE symbol */
|
||||
R_X86_64_TPOFF32 = 23, /* Offset in initial TLS block */
|
||||
R_X86_64_PC64 = 24, /* word64 S + A - P */
|
||||
R_X86_64_GOTOFF64 = 25, /* word64 S + A - GOT */
|
||||
R_X86_64_GOTPC32 = 26, /* word32 GOT + A - P */
|
||||
R_X86_64_GOT64 = 27, /* word64 G + A */
|
||||
R_X86_64_GOTPCREL64 = 28,/* word64 G + GOT - P + A */
|
||||
R_X86_64_GOTPC64 = 29, /* word64 GOT - P + A */
|
||||
R_X86_64_GOTPLT64 = 30, /* word64 G + A */
|
||||
R_X86_64_PLTOFF64 = 31, /* word64 L - GOT + A */
|
||||
R_X86_64_SIZE32 = 32, /* word32 Z + A */
|
||||
R_X86_64_SIZE64 = 33, /* word64 Z + A */
|
||||
R_X86_64_GOTPC32_TLSDESC = 34, /* word32 */
|
||||
R_X86_64_TLSDESC_CALL = 35, /* none */
|
||||
R_X86_64_TLSDESC = 36 /* word64?2 */
|
||||
};
|
||||
|
||||
/*
|
||||
* Symbol
|
||||
*/
|
||||
|
||||
typedef struct elf32_sym {
|
||||
Elf32_Word st_name;
|
||||
Elf32_Addr st_value;
|
||||
Elf32_Word st_size;
|
||||
unsigned char st_info;
|
||||
unsigned char st_other;
|
||||
Elf32_Half st_shndx;
|
||||
} Elf32_Sym;
|
||||
|
||||
typedef struct elf64_sym {
|
||||
Elf64_Word st_name;
|
||||
unsigned char st_info;
|
||||
unsigned char st_other;
|
||||
Elf64_Half st_shndx;
|
||||
Elf64_Addr st_value;
|
||||
Elf64_Xword st_size;
|
||||
} Elf64_Sym;
|
||||
|
||||
/*
|
||||
* Main file header
|
||||
*/
|
||||
|
||||
typedef struct elf32_hdr {
|
||||
unsigned char e_ident[EI_NIDENT];
|
||||
Elf32_Half e_type;
|
||||
Elf32_Half e_machine;
|
||||
Elf32_Word e_version;
|
||||
Elf32_Addr e_entry;
|
||||
Elf32_Off e_phoff;
|
||||
Elf32_Off e_shoff;
|
||||
Elf32_Word e_flags;
|
||||
Elf32_Half e_ehsize;
|
||||
Elf32_Half e_phentsize;
|
||||
Elf32_Half e_phnum;
|
||||
Elf32_Half e_shentsize;
|
||||
Elf32_Half e_shnum;
|
||||
Elf32_Half e_shstrndx;
|
||||
} Elf32_Ehdr;
|
||||
|
||||
typedef struct elf64_hdr {
|
||||
unsigned char e_ident[EI_NIDENT];
|
||||
Elf64_Half e_type;
|
||||
Elf64_Half e_machine;
|
||||
Elf64_Word e_version;
|
||||
Elf64_Addr e_entry;
|
||||
Elf64_Off e_phoff;
|
||||
Elf64_Off e_shoff;
|
||||
Elf64_Word e_flags;
|
||||
Elf64_Half e_ehsize;
|
||||
Elf64_Half e_phentsize;
|
||||
Elf64_Half e_phnum;
|
||||
Elf64_Half e_shentsize;
|
||||
Elf64_Half e_shnum;
|
||||
Elf64_Half e_shstrndx;
|
||||
} Elf64_Ehdr;
|
||||
|
||||
/*
|
||||
* Program header
|
||||
*/
|
||||
|
||||
typedef struct elf32_phdr {
|
||||
Elf32_Word p_type;
|
||||
Elf32_Off p_offset;
|
||||
Elf32_Addr p_vaddr;
|
||||
Elf32_Addr p_paddr;
|
||||
Elf32_Word p_filesz;
|
||||
Elf32_Word p_memsz;
|
||||
Elf32_Word p_flags;
|
||||
Elf32_Word p_align;
|
||||
} Elf32_Phdr;
|
||||
|
||||
typedef struct elf64_phdr {
|
||||
Elf64_Word p_type;
|
||||
Elf64_Word p_flags;
|
||||
Elf64_Off p_offset;
|
||||
Elf64_Addr p_vaddr;
|
||||
Elf64_Addr p_paddr;
|
||||
Elf64_Xword p_filesz;
|
||||
Elf64_Xword p_memsz;
|
||||
Elf64_Xword p_align;
|
||||
} Elf64_Phdr;
|
||||
|
||||
/*
|
||||
* Section headers.
|
||||
*/
|
||||
|
||||
typedef struct elf32_shdr {
|
||||
Elf32_Word sh_name;
|
||||
Elf32_Word sh_type;
|
||||
Elf32_Word sh_flags;
|
||||
Elf32_Addr sh_addr;
|
||||
Elf32_Off sh_offset;
|
||||
Elf32_Word sh_size;
|
||||
Elf32_Word sh_link;
|
||||
Elf32_Word sh_info;
|
||||
Elf32_Word sh_addralign;
|
||||
Elf32_Word sh_entsize;
|
||||
} Elf32_Shdr;
|
||||
|
||||
typedef struct elf64_shdr {
|
||||
Elf64_Word sh_name;
|
||||
Elf64_Word sh_type;
|
||||
Elf64_Xword sh_flags;
|
||||
Elf64_Addr sh_addr;
|
||||
Elf64_Off sh_offset;
|
||||
Elf64_Xword sh_size;
|
||||
Elf64_Word sh_link;
|
||||
Elf64_Word sh_info;
|
||||
Elf64_Xword sh_addralign;
|
||||
Elf64_Xword sh_entsize;
|
||||
} Elf64_Shdr;
|
||||
|
||||
/*
|
||||
* Note header
|
||||
*/
|
||||
typedef struct elf32_note {
|
||||
Elf32_Word n_namesz; /* Name size */
|
||||
Elf32_Word n_descsz; /* Content size */
|
||||
Elf32_Word n_type; /* Content type */
|
||||
} Elf32_Nhdr;
|
||||
|
||||
typedef struct elf64_note {
|
||||
Elf64_Word n_namesz; /* Name size */
|
||||
Elf64_Word n_descsz; /* Content size */
|
||||
Elf64_Word n_type; /* Content type */
|
||||
} Elf64_Nhdr;
|
||||
|
||||
#endif /* CR_ELF_H */
|
191
include/image.h
Normal file
191
include/image.h
Normal file
@ -0,0 +1,191 @@
|
||||
#ifndef CR_IMAGE_H
|
||||
#define CR_IMAGE_H
|
||||
|
||||
#include "types.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#define FDINFO_MAGIC 0x01010101
|
||||
#define PAGES_MAGIC 0x20202020
|
||||
#define CORE_MAGIC 0xa75b8d43
|
||||
#define SHMEM_MAGIC 0x03300330
|
||||
#define PIPEFS_MAGIC 0x50495045
|
||||
#define PSTREE_MAGIC 0x40044004
|
||||
#define PIPES_MAGIC 0x05055050
|
||||
|
||||
#define FDINFO_FD 1
|
||||
#define FDINFO_MAP 2
|
||||
|
||||
#define PAGE_IMAGE_SIZE 4096
|
||||
#define PAGE_RSS 1
|
||||
|
||||
struct fdinfo_entry {
|
||||
u8 type;
|
||||
u8 len;
|
||||
u16 flags;
|
||||
u32 pos;
|
||||
u64 addr;
|
||||
u8 name[0];
|
||||
} __packed;
|
||||
|
||||
struct shmem_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
u64 shmid;
|
||||
} __packed;
|
||||
|
||||
struct pstree_entry {
|
||||
u32 pid;
|
||||
u32 nr_children;
|
||||
u32 children[0];
|
||||
} __packed;
|
||||
|
||||
struct pipe_entry {
|
||||
u32 fd;
|
||||
u32 pipeid;
|
||||
u32 flags;
|
||||
u32 bytes;
|
||||
u8 data[0];
|
||||
} __packed;
|
||||
|
||||
#define VMA_AREA_REGULAR (1 << 0)
|
||||
#define VMA_AREA_STACK (1 << 1)
|
||||
#define VMA_AREA_VSYSCALL (1 << 2)
|
||||
#define VMA_AREA_VDSO (1 << 3)
|
||||
#define VMA_FORCE_READ (1 << 4)
|
||||
#define VMA_AREA_HEAP (1 << 5)
|
||||
#define VMA_FILE_PRIVATE (1 << 6)
|
||||
#define VMA_FILE_SHARED (1 << 7)
|
||||
#define VMA_ANON_SHARED (1 << 8)
|
||||
#define VMA_ANON_PRIVATE (1 << 9)
|
||||
#define VMA_FORCE_WRITE (1 << 10)
|
||||
#define VMA_DUMP_ALL (1 << 11)
|
||||
|
||||
#define vma_entry_has(vma, s) (((vma)->status & (s)) == (s))
|
||||
|
||||
struct vma_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
u64 pgoff;
|
||||
u32 prot;
|
||||
u32 flags;
|
||||
u32 status;
|
||||
u32 pid;
|
||||
s64 fd;
|
||||
u64 ino;
|
||||
u32 dev_maj;
|
||||
u32 dev_min;
|
||||
} __packed;
|
||||
|
||||
struct page_entry {
|
||||
u64 va;
|
||||
u8 data[PAGE_IMAGE_SIZE];
|
||||
} __packed;
|
||||
|
||||
#define HEADER_VERSION 1
|
||||
#define HEADER_ARCH_X86_64 1
|
||||
|
||||
struct image_header {
|
||||
u16 version;
|
||||
u16 arch;
|
||||
u32 flags;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* PTRACE_GETREGS
|
||||
* PTRACE_GETFPREGS
|
||||
* PTRACE_GETFPXREGS dep CONFIG_X86_32
|
||||
* PTRACE_GET_THREAD_AREA dep CONFIG_X86_32 || CONFIG_IA32_EMULATION
|
||||
* PTRACE_GETFDPIC dep CONFIG_BINFMT_ELF_FDPIC
|
||||
*
|
||||
* PTRACE_ARCH_PRCTL dep CONFIG_X86_64
|
||||
* ARCH_SET_GS/ARCH_GET_FS
|
||||
* ARCH_SET_FS/ARCH_GET_GS
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
struct user_regs_entry {
|
||||
u64 r15;
|
||||
u64 r14;
|
||||
u64 r13;
|
||||
u64 r12;
|
||||
u64 bp;
|
||||
u64 bx;
|
||||
u64 r11;
|
||||
u64 r10;
|
||||
u64 r9;
|
||||
u64 r8;
|
||||
u64 ax;
|
||||
u64 cx;
|
||||
u64 dx;
|
||||
u64 si;
|
||||
u64 di;
|
||||
u64 orig_ax;
|
||||
u64 ip;
|
||||
u64 cs;
|
||||
u64 flags;
|
||||
u64 sp;
|
||||
u64 ss;
|
||||
u64 fs_base;
|
||||
u64 gs_base;
|
||||
u64 ds;
|
||||
u64 es;
|
||||
u64 fs;
|
||||
u64 gs;
|
||||
} __packed;
|
||||
|
||||
struct desc_struct {
|
||||
union {
|
||||
struct {
|
||||
u32 a;
|
||||
u32 b;
|
||||
};
|
||||
struct {
|
||||
u16 limit0;
|
||||
u16 base0;
|
||||
unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
|
||||
unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
|
||||
};
|
||||
};
|
||||
} __packed;
|
||||
|
||||
struct user_fpregs_entry {
|
||||
u16 cwd;
|
||||
u16 swd;
|
||||
u16 twd; /* Note this is not the same as
|
||||
the 32bit/x87/FSAVE twd */
|
||||
u16 fop;
|
||||
u64 rip;
|
||||
u64 rdp;
|
||||
u32 mxcsr;
|
||||
u32 mxcsr_mask;
|
||||
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
|
||||
u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
|
||||
u32 padding[24];
|
||||
} __packed;
|
||||
|
||||
#define GDT_ENTRY_TLS_ENTRIES 3
|
||||
|
||||
struct core_entry {
|
||||
struct image_header hdr;
|
||||
struct user_regs_entry gpregs;
|
||||
struct user_fpregs_entry fpregs;
|
||||
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
||||
u32 personality;
|
||||
} __packed;
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifndef offsetof
|
||||
# define offsetof(TYPE, MEMBER) ((long) &((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* There are always 4 magic bytes at the
|
||||
* beginning of the every file.
|
||||
*/
|
||||
#define MAGIC_OFFSET (sizeof(u32))
|
||||
#define GET_FILE_OFF(s, m) (offsetof(s,m) + MAGIC_OFFSET)
|
||||
#define GET_FILE_OFF_AFTER(s) (sizeof(s) + MAGIC_OFFSET)
|
||||
|
||||
#endif /* CR_IMAGE_H */
|
286
include/list.h
Normal file
286
include/list.h
Normal file
@ -0,0 +1,286 @@
|
||||
#ifndef CR_LIST_H_
|
||||
#define CR_LIST_H_
|
||||
|
||||
/*
|
||||
* Double linked lists.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#define POISON_POINTER_DELTA 0
|
||||
#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
|
||||
#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)
|
||||
|
||||
struct list_head {
|
||||
struct list_head *prev, *next;
|
||||
};
|
||||
|
||||
#define LIST_HEAD_INIT(name) { &(name), &(name) }
|
||||
#define LIST_HEAD(name) struct list_head name = LIST_HEAD_INIT(name)
|
||||
|
||||
static inline void INIT_LIST_HEAD(struct list_head *list)
|
||||
{
|
||||
list->next = list;
|
||||
list->prev = list;
|
||||
}
|
||||
|
||||
static inline void __list_add(struct list_head *new,
|
||||
struct list_head *prev,
|
||||
struct list_head *next)
|
||||
{
|
||||
next->prev = new;
|
||||
new->next = next;
|
||||
new->prev = prev;
|
||||
prev->next = new;
|
||||
}
|
||||
|
||||
static inline void list_add(struct list_head *new, struct list_head *head)
|
||||
{
|
||||
__list_add(new, head, head->next);
|
||||
}
|
||||
|
||||
static inline void list_add_tail(struct list_head *new, struct list_head *head)
|
||||
{
|
||||
__list_add(new, head->prev, head);
|
||||
}
|
||||
|
||||
static inline void __list_del(struct list_head * prev, struct list_head * next)
|
||||
{
|
||||
next->prev = prev;
|
||||
prev->next = next;
|
||||
}
|
||||
|
||||
static inline void __list_del_entry(struct list_head *entry)
|
||||
{
|
||||
__list_del(entry->prev, entry->next);
|
||||
}
|
||||
|
||||
static inline void list_del(struct list_head *entry)
|
||||
{
|
||||
__list_del(entry->prev, entry->next);
|
||||
entry->next = LIST_POISON1;
|
||||
entry->prev = LIST_POISON2;
|
||||
}
|
||||
|
||||
static inline void list_replace(struct list_head *old,
|
||||
struct list_head *new)
|
||||
{
|
||||
new->next = old->next;
|
||||
new->next->prev = new;
|
||||
new->prev = old->prev;
|
||||
new->prev->next = new;
|
||||
}
|
||||
|
||||
static inline void list_replace_init(struct list_head *old,
|
||||
struct list_head *new)
|
||||
{
|
||||
list_replace(old, new);
|
||||
INIT_LIST_HEAD(old);
|
||||
}
|
||||
|
||||
static inline void list_del_init(struct list_head *entry)
|
||||
{
|
||||
__list_del_entry(entry);
|
||||
INIT_LIST_HEAD(entry);
|
||||
}
|
||||
|
||||
static inline void list_move(struct list_head *list, struct list_head *head)
|
||||
{
|
||||
__list_del_entry(list);
|
||||
list_add(list, head);
|
||||
}
|
||||
|
||||
static inline void list_move_tail(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
__list_del_entry(list);
|
||||
list_add_tail(list, head);
|
||||
}
|
||||
|
||||
static inline int list_is_last(const struct list_head *list,
|
||||
const struct list_head *head)
|
||||
{
|
||||
return list->next == head;
|
||||
}
|
||||
|
||||
static inline int list_is_first(const struct list_head *list,
|
||||
const struct list_head *head)
|
||||
{
|
||||
return list->prev == head;
|
||||
}
|
||||
|
||||
static inline int list_empty(const struct list_head *head)
|
||||
{
|
||||
return head->next == head;
|
||||
}
|
||||
|
||||
static inline int list_empty_careful(const struct list_head *head)
|
||||
{
|
||||
struct list_head *next = head->next;
|
||||
return (next == head) && (next == head->prev);
|
||||
}
|
||||
static inline void list_rotate_left(struct list_head *head)
|
||||
{
|
||||
struct list_head *first;
|
||||
|
||||
if (!list_empty(head)) {
|
||||
first = head->next;
|
||||
list_move_tail(first, head);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int list_is_singular(const struct list_head *head)
|
||||
{
|
||||
return !list_empty(head) && (head->next == head->prev);
|
||||
}
|
||||
|
||||
static inline void __list_cut_position(struct list_head *list,
|
||||
struct list_head *head, struct list_head *entry)
|
||||
{
|
||||
struct list_head *new_first = entry->next;
|
||||
list->next = head->next;
|
||||
list->next->prev = list;
|
||||
list->prev = entry;
|
||||
entry->next = list;
|
||||
head->next = new_first;
|
||||
new_first->prev = head;
|
||||
}
|
||||
|
||||
static inline void list_cut_position(struct list_head *list,
|
||||
struct list_head *head, struct list_head *entry)
|
||||
{
|
||||
if (list_empty(head))
|
||||
return;
|
||||
if (list_is_singular(head) &&
|
||||
(head->next != entry && head != entry))
|
||||
return;
|
||||
if (entry == head)
|
||||
INIT_LIST_HEAD(list);
|
||||
else
|
||||
__list_cut_position(list, head, entry);
|
||||
}
|
||||
|
||||
static inline void __list_splice(const struct list_head *list,
|
||||
struct list_head *prev,
|
||||
struct list_head *next)
|
||||
{
|
||||
struct list_head *first = list->next;
|
||||
struct list_head *last = list->prev;
|
||||
|
||||
first->prev = prev;
|
||||
prev->next = first;
|
||||
|
||||
last->next = next;
|
||||
next->prev = last;
|
||||
}
|
||||
|
||||
static inline void list_splice(const struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list))
|
||||
__list_splice(list, head, head->next);
|
||||
}
|
||||
|
||||
static inline void list_splice_tail(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list))
|
||||
__list_splice(list, head->prev, head);
|
||||
}
|
||||
|
||||
static inline void list_splice_init(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list)) {
|
||||
__list_splice(list, head, head->next);
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void list_splice_tail_init(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list)) {
|
||||
__list_splice(list, head->prev, head);
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
}
|
||||
|
||||
#define list_entry(ptr, type, member) \
|
||||
container_of(ptr, type, member)
|
||||
|
||||
#define list_first_entry(ptr, type, member) \
|
||||
list_entry((ptr)->next, type, member)
|
||||
|
||||
#define list_for_each(pos, head) \
|
||||
for (pos = (head)->next; pos != (head); pos = pos->next)
|
||||
|
||||
#define __list_for_each(pos, head) \
|
||||
for (pos = (head)->next; pos != (head); pos = pos->next)
|
||||
|
||||
#define list_for_each_prev(pos, head) \
|
||||
for (pos = (head)->prev; pos != (head); pos = pos->prev)
|
||||
|
||||
#define list_for_each_safe(pos, n, head) \
|
||||
for (pos = (head)->next, n = pos->next; pos != (head); \
|
||||
pos = n, n = pos->next)
|
||||
|
||||
#define list_for_each_prev_safe(pos, n, head) \
|
||||
for (pos = (head)->prev, n = pos->prev; \
|
||||
pos != (head); \
|
||||
pos = n, n = pos->prev)
|
||||
|
||||
#define list_for_each_entry(pos, head, member) \
|
||||
for (pos = list_entry((head)->next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.next, typeof(*pos), member))
|
||||
|
||||
#define list_for_each_entry_reverse(pos, head, member) \
|
||||
for (pos = list_entry((head)->prev, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.prev, typeof(*pos), member))
|
||||
|
||||
#define list_prepare_entry(pos, head, member) \
|
||||
((pos) ? : list_entry(head, typeof(*pos), member))
|
||||
|
||||
#define list_for_each_entry_continue(pos, head, member) \
|
||||
for (pos = list_entry(pos->member.next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.next, typeof(*pos), member))
|
||||
|
||||
#define list_for_each_entry_continue_reverse(pos, head, member) \
|
||||
for (pos = list_entry(pos->member.prev, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.prev, typeof(*pos), member))
|
||||
|
||||
#define list_for_each_entry_from(pos, head, member) \
|
||||
for (; &pos->member != (head); \
|
||||
pos = list_entry(pos->member.next, typeof(*pos), member))
|
||||
|
||||
#define list_for_each_entry_safe(pos, n, head, member) \
|
||||
for (pos = list_entry((head)->next, typeof(*pos), member), \
|
||||
n = list_entry(pos->member.next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = n, n = list_entry(n->member.next, typeof(*n), member))
|
||||
|
||||
#define list_for_each_entry_safe_continue(pos, n, head, member) \
|
||||
for (pos = list_entry(pos->member.next, typeof(*pos), member), \
|
||||
n = list_entry(pos->member.next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = n, n = list_entry(n->member.next, typeof(*n), member))
|
||||
|
||||
#define list_for_each_entry_safe_from(pos, n, head, member) \
|
||||
for (n = list_entry(pos->member.next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = n, n = list_entry(n->member.next, typeof(*n), member))
|
||||
|
||||
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
|
||||
for (pos = list_entry((head)->prev, typeof(*pos), member), \
|
||||
n = list_entry(pos->member.prev, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = n, n = list_entry(n->member.prev, typeof(*n), member))
|
||||
|
||||
#define list_safe_reset_next(pos, n, member) \
|
||||
n = list_entry(pos->member.next, typeof(*pos), member)
|
||||
|
||||
#endif /* CR_LIST_H_ */
|
46
include/parasite-syscall.h
Normal file
46
include/parasite-syscall.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef PARASITE_SYSCALL_H_
|
||||
#define PARASITE_SYSCALL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
#include "crtools.h"
|
||||
|
||||
#define BUILTIN_SYSCALL_SIZE 8
|
||||
|
||||
/* parasite control block */
|
||||
struct parasite_ctl {
|
||||
pid_t pid; /* process where we live */
|
||||
struct vma_area *vma_area; /* our space */
|
||||
unsigned long parasite_ip; /* service routine start ip */
|
||||
unsigned long parasite_complete_ip; /* where we end execution */
|
||||
unsigned long addr_cmd; /* addr for command */
|
||||
unsigned long addr_args; /* address for arguments */
|
||||
};
|
||||
|
||||
int can_run_syscall(unsigned long ip, unsigned long start, unsigned long end);
|
||||
|
||||
void *mmap_seized(pid_t pid, user_regs_struct_t *regs,
|
||||
void *addr, size_t length, int prot,
|
||||
int flags, int fd, off_t offset);
|
||||
|
||||
int munmap_seized(pid_t pid, user_regs_struct_t *regs,
|
||||
void *addr, size_t length);
|
||||
int kill_seized(pid_t pid, user_regs_struct_t *where);
|
||||
|
||||
|
||||
int syscall_seized(pid_t pid,
|
||||
user_regs_struct_t *where,
|
||||
user_regs_struct_t *params,
|
||||
user_regs_struct_t *result);
|
||||
|
||||
int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_area_list,
|
||||
struct cr_fdset *cr_fdset, int fd_type);
|
||||
|
||||
struct parasite_ctl *parasite_infect_seized(pid_t pid, void *addr_hint, struct list_head *vma_area_list);
|
||||
int parasite_cure_seized(struct parasite_ctl **p_ctrl, struct list_head *vma_area_list);
|
||||
|
||||
#endif /* PARASITE_SYSCALL_H_ */
|
68
include/parasite.h
Normal file
68
include/parasite.h
Normal file
@ -0,0 +1,68 @@
|
||||
#ifndef CR_PARASITE_H_
|
||||
#define CR_PARASITE_H_
|
||||
|
||||
#include "compiler.h"
|
||||
#include "syscall.h"
|
||||
#include "image.h"
|
||||
|
||||
#define __parasite_head __used __section(.parasite.head.text)
|
||||
#define __parasite_text __used __section(.parasite.text)
|
||||
#define __parasite_stack __used __section(.parasite.stack)
|
||||
|
||||
#define PARASITE_STACK_SIZE 2048
|
||||
#define PARASITE_ARG_SIZE 256
|
||||
#define PARASITE_BRK_SIZE 32768
|
||||
|
||||
#define PARASITE_MAX_SIZE (64 << 10)
|
||||
|
||||
/* we need own error code for diagnostics */
|
||||
#define PARASITE_ERR_FAIL -1024
|
||||
#define PARASITE_ERR_OPEN -1025
|
||||
#define PARASITE_ERR_MMAP -1026
|
||||
#define PARASITE_ERR_MINCORE -1027
|
||||
#define PARASITE_ERR_MUNMAP -1028
|
||||
#define PARASITE_ERR_CLOSE -1029
|
||||
#define PARASITE_ERR_WRITE -1030
|
||||
#define PARASITE_ERR_MPROTECT -1031
|
||||
#define PARASITE_ERR_CORE_VMA -1032
|
||||
#define PARASITE_ERR_CORE_PAGE -1033
|
||||
|
||||
enum {
|
||||
PARASITE_CMD_NONE,
|
||||
PARASITE_CMD_KILLME,
|
||||
PARASITE_CMD_PINGME,
|
||||
PARASITE_CMD_DUMPPAGES,
|
||||
PARASITE_CMD_RESTORECORE,
|
||||
|
||||
PARASITE_CMD_MAX,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
unsigned long command;
|
||||
unsigned long args_size;
|
||||
void *args;
|
||||
} parasite_args_t;
|
||||
|
||||
typedef struct {
|
||||
struct vma_entry vma_entry;
|
||||
unsigned long nrpages_dumped; /* how many pages are dumped */
|
||||
unsigned long fd;
|
||||
unsigned long open_mode;
|
||||
unsigned long open_flags;
|
||||
char open_path[64];
|
||||
} parasite_args_cmd_dumppages_t;
|
||||
|
||||
/*
|
||||
* Some useful offsets
|
||||
*/
|
||||
|
||||
#define PARASITE_ARGS_ADDR(start) \
|
||||
((start) + parasite_blob_offset__parasite_args)
|
||||
#define PARASITE_CMD_ADDR(start) \
|
||||
((start) + parasite_blob_offset__parasite_cmd)
|
||||
#define PARASITE_HEAD_ADDR(start) \
|
||||
((start) + parasite_blob_offset__parasite_head_start)
|
||||
#define PARASITE_COMPLETE_ADDR(start) \
|
||||
((start) + parasite_blob_offset__parasite_service_complete)
|
||||
|
||||
#endif /* CR_PARASITE_H_ */
|
79
include/rbtree.h
Normal file
79
include/rbtree.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* RBtree implementation adopted from the Linux
|
||||
* kernel sources.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_RBTREE_H
|
||||
#define _LINUX_RBTREE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#define RB_RED 0
|
||||
#define RB_BLACK 1
|
||||
#define RB_COLOR_MASK 3
|
||||
|
||||
struct rb_node {
|
||||
unsigned long rb_parent_color;
|
||||
struct rb_node *rb_right;
|
||||
struct rb_node *rb_left;
|
||||
} __attribute__((aligned(sizeof(long))));
|
||||
|
||||
struct rb_root {
|
||||
struct rb_node *rb_node;
|
||||
};
|
||||
|
||||
|
||||
#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~RB_COLOR_MASK))
|
||||
#define rb_color(r) ((r)->rb_parent_color & RB_BLACK)
|
||||
#define rb_is_red(r) (!rb_color(r))
|
||||
#define rb_is_black(r) rb_color(r)
|
||||
#define rb_set_red(r) do { (r)->rb_parent_color &= ~RB_BLACK; } while (0)
|
||||
#define rb_set_black(r) do { (r)->rb_parent_color |= RB_BLACK; } while (0)
|
||||
|
||||
static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
|
||||
{
|
||||
rb->rb_parent_color = (rb->rb_parent_color & RB_COLOR_MASK) |(unsigned long)p;
|
||||
}
|
||||
|
||||
static inline void rb_set_color(struct rb_node *rb, int color)
|
||||
{
|
||||
rb->rb_parent_color = (rb->rb_parent_color & ~RB_BLACK) | color;
|
||||
}
|
||||
|
||||
#define RB_ROOT (struct rb_root) { NULL, }
|
||||
#define rb_entry(ptr, type, member) \
|
||||
container_of(ptr, type, member)
|
||||
|
||||
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
|
||||
#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
|
||||
#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
|
||||
|
||||
static inline void rb_init_node(struct rb_node *rb)
|
||||
{
|
||||
rb->rb_parent_color = 0;
|
||||
rb->rb_right = NULL;
|
||||
rb->rb_left = NULL;
|
||||
RB_CLEAR_NODE(rb);
|
||||
}
|
||||
|
||||
void rb_insert_color(struct rb_node *, struct rb_root *);
|
||||
void rb_erase(struct rb_node *, struct rb_root *);
|
||||
|
||||
struct rb_node *rb_next(const struct rb_node *node);
|
||||
struct rb_node *rb_prev(const struct rb_node *node);
|
||||
struct rb_node *rb_first(const struct rb_root *node);
|
||||
struct rb_node *rb_last(const struct rb_root *node);
|
||||
|
||||
void rb_replace_node(struct rb_node *victim, struct rb_node *new,
|
||||
struct rb_root *root);
|
||||
|
||||
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
|
||||
struct rb_node **rb_link)
|
||||
{
|
||||
node->rb_parent_color = (unsigned long )parent;
|
||||
node->rb_left = node->rb_right = NULL;
|
||||
|
||||
*rb_link = node;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_RBTREE_H */
|
181
include/syscall.h
Normal file
181
include/syscall.h
Normal file
@ -0,0 +1,181 @@
|
||||
#ifndef CR_SYSCALL_H_
|
||||
#define CR_SYSCALL_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static long syscall0(int nr)
|
||||
{
|
||||
long ret;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall1(int nr, unsigned long arg0)
|
||||
{
|
||||
long ret;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall2(int nr, unsigned long arg0, unsigned long arg1)
|
||||
{
|
||||
long ret;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall3(int nr, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2)
|
||||
{
|
||||
long ret;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall4(int nr, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3)
|
||||
{
|
||||
register unsigned long r10 asm("r10") = r10;
|
||||
long ret;
|
||||
|
||||
r10 = arg3;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall5(int nr, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4)
|
||||
{
|
||||
register unsigned long r10 asm("r10") = r10;
|
||||
register unsigned long r8 asm("r8") = r8;
|
||||
long ret;
|
||||
|
||||
r10 = arg3;
|
||||
r8 = arg4;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long syscall6(int nr, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4, unsigned long arg5)
|
||||
{
|
||||
register unsigned long r10 asm("r10") = r10;
|
||||
register unsigned long r8 asm("r8") = r8;
|
||||
register unsigned long r9 asm("r9") = r9;
|
||||
long ret;
|
||||
|
||||
r10 = arg3;
|
||||
r8 = arg4;
|
||||
r9 = arg5;
|
||||
asm volatile("syscall"
|
||||
: "=a" (ret)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* syscall codes
|
||||
*/
|
||||
#define __NR_read 0
|
||||
#define __NR_write 1
|
||||
#define __NR_open 2
|
||||
#define __NR_close 3
|
||||
#define __NR_lseek 8
|
||||
#define __NR_mmap 9
|
||||
#define __NR_mprotect 10
|
||||
#define __NR_munmap 11
|
||||
#define __NR_mincore 27
|
||||
#define __NR_dup 32
|
||||
#define __NR_dup2 33
|
||||
#define __NR_pause 34
|
||||
#define __NR_nanosleep 35
|
||||
#define __NR_getpid 39
|
||||
#define __NR_exit 60
|
||||
|
||||
static unsigned long sys_pause(void)
|
||||
{
|
||||
return syscall0(__NR_pause);
|
||||
}
|
||||
|
||||
static unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot,
|
||||
unsigned long flags, unsigned long fd, unsigned long offset)
|
||||
{
|
||||
return syscall6(__NR_mmap, (unsigned long)addr,
|
||||
len, prot, flags, fd, offset);
|
||||
}
|
||||
|
||||
static unsigned long sys_munmap(void *addr,unsigned long len)
|
||||
{
|
||||
return syscall2(__NR_munmap, (unsigned long)addr, len);
|
||||
}
|
||||
|
||||
static long sys_open(const char *filename, unsigned long flags, unsigned long mode)
|
||||
{
|
||||
return syscall3(__NR_open, (unsigned long)filename, flags, mode);
|
||||
}
|
||||
|
||||
static long sys_close(int fd)
|
||||
{
|
||||
return syscall1(__NR_close, fd);
|
||||
}
|
||||
|
||||
static long sys_write(unsigned long fd, const void *buf, unsigned long count)
|
||||
{
|
||||
return syscall3(__NR_write, fd, (unsigned long)buf, count);
|
||||
}
|
||||
|
||||
static long sys_mincore(unsigned long addr, unsigned long size, void *vec)
|
||||
{
|
||||
return syscall3(__NR_mincore, addr, size, (unsigned long)vec);
|
||||
}
|
||||
|
||||
static long sys_lseek(unsigned long fd, unsigned long offset, unsigned long origin)
|
||||
{
|
||||
return syscall3(__NR_lseek, fd, offset, origin);
|
||||
}
|
||||
|
||||
static long sys_mprotect(unsigned long start, unsigned long len, unsigned long prot)
|
||||
{
|
||||
return syscall3(__NR_mprotect, start, len, prot);
|
||||
}
|
||||
|
||||
static long sys_nanosleep(struct timespec *req, struct timespec *rem)
|
||||
{
|
||||
return syscall2(__NR_nanosleep, (unsigned long)req, (unsigned long)rem);
|
||||
}
|
||||
|
||||
static long sys_read(unsigned long fd, void *buf, unsigned long count)
|
||||
{
|
||||
return syscall3(__NR_read, fd, (unsigned long)buf, count);
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
# error x86-32 bit mode not yet implemented
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#endif /* CR_SYSCALL_H_ */
|
132
include/types.h
Normal file
132
include/types.h
Normal file
@ -0,0 +1,132 @@
|
||||
#ifndef CR_TYPES_H_
|
||||
#define CR_TYPES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "bitops.h"
|
||||
|
||||
/* some constants for ptrace */
|
||||
#define PTRACE_SEIZE 0x4206
|
||||
#define PTRACE_INTERRUPT 0x4207
|
||||
#define PTRACE_LISTEN 0x4208
|
||||
|
||||
#define PTRACE_SEIZE_DEVEL 0x80000000
|
||||
|
||||
#define PTRACE_EVENT_FORK 1
|
||||
#define PTRACE_EVENT_VFORK 2
|
||||
#define PTRACE_EVENT_CLONE 3
|
||||
#define PTRACE_EVENT_EXEC 4
|
||||
#define PTRACE_EVENT_VFORK_DONE 5
|
||||
#define PTRACE_EVENT_EXIT 6
|
||||
#define PTRACE_EVENT_STOP 7
|
||||
|
||||
#define PTRACE_O_TRACESYSGOOD 0x00000001
|
||||
#define PTRACE_O_TRACEFORK 0x00000002
|
||||
#define PTRACE_O_TRACEVFORK 0x00000004
|
||||
#define PTRACE_O_TRACECLONE 0x00000008
|
||||
#define PTRACE_O_TRACEEXEC 0x00000010
|
||||
#define PTRACE_O_TRACEVFORKDONE 0x00000020
|
||||
#define PTRACE_O_TRACEEXIT 0x00000040
|
||||
|
||||
/* fcntl */
|
||||
#ifndef F_LINUX_SPECIFIC_BASE
|
||||
#define F_LINUX_SPECIFIC_BASE 1024
|
||||
#endif
|
||||
#ifndef F_SETPIPE_SZ
|
||||
# define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
|
||||
#endif
|
||||
#ifndef F_GETPIPE_SZ
|
||||
# define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
|
||||
#endif
|
||||
|
||||
#define CLONE_CHILD_USEPID 0x02000000
|
||||
#define CLONE_VFORK 0x00004000
|
||||
|
||||
typedef uint64_t u64;
|
||||
typedef int64_t s64;
|
||||
typedef unsigned int u32;
|
||||
typedef signed int s32;
|
||||
typedef unsigned short u16;
|
||||
typedef signed short s16;
|
||||
typedef unsigned char u8;
|
||||
typedef signed char s8;
|
||||
|
||||
#define MAJOR(dev) ((dev)>>8)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
typedef struct {
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
unsigned long bp;
|
||||
unsigned long bx;
|
||||
unsigned long r11;
|
||||
unsigned long r10;
|
||||
unsigned long r9;
|
||||
unsigned long r8;
|
||||
unsigned long ax;
|
||||
unsigned long cx;
|
||||
unsigned long dx;
|
||||
unsigned long si;
|
||||
unsigned long di;
|
||||
unsigned long orig_ax;
|
||||
unsigned long ip;
|
||||
unsigned long cs;
|
||||
unsigned long flags;
|
||||
unsigned long sp;
|
||||
unsigned long ss;
|
||||
unsigned long fs_base;
|
||||
unsigned long gs_base;
|
||||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
unsigned long gs;
|
||||
} user_regs_struct_t;
|
||||
|
||||
typedef struct {
|
||||
unsigned short cwd;
|
||||
unsigned short swd;
|
||||
unsigned short twd; /* Note this is not the same as
|
||||
the 32bit/x87/FSAVE twd */
|
||||
unsigned short fop;
|
||||
u64 rip;
|
||||
u64 rdp;
|
||||
u32 mxcsr;
|
||||
u32 mxcsr_mask;
|
||||
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
|
||||
u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
|
||||
u32 padding[24];
|
||||
} user_fpregs_struct_t;
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
typedef struct {
|
||||
unsigned long bx;
|
||||
unsigned long cx;
|
||||
unsigned long dx;
|
||||
unsigned long si;
|
||||
unsigned long di;
|
||||
unsigned long bp;
|
||||
unsigned long ax;
|
||||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
unsigned long gs;
|
||||
unsigned long orig_ax;
|
||||
unsigned long ip;
|
||||
unsigned long cs;
|
||||
unsigned long flags;
|
||||
unsigned long sp;
|
||||
unsigned long ss;
|
||||
} user_regs_struct_t;
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifndef PAGE_SIZE
|
||||
# define PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
#endif /* CR_TYPES_H_ */
|
178
include/util.h
Normal file
178
include/util.h
Normal file
@ -0,0 +1,178 @@
|
||||
#ifndef UTIL_H_
|
||||
#define UTIL_H_
|
||||
|
||||
/*
|
||||
* Some bits are stolen from perf and kvm tools
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
|
||||
extern void printk(const char *format, ...);
|
||||
|
||||
#define pr_info(fmt, ...) printk(fmt, ##__VA_ARGS__)
|
||||
#define pr_error(fmt, ...) printk("Error (%s:%d): " fmt, __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#define pr_panic(fmt, ...) printk("PANIC (%s:%d): " fmt, __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#define pr_warning(fmt, ...) printk("Warning: " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define pr_error_jmp(label) \
|
||||
do { \
|
||||
printk("EJMP: %s:%d\n", __FILE__, __LINE__); \
|
||||
goto label; \
|
||||
} while (0)
|
||||
|
||||
#define jerr(code, label) \
|
||||
do { \
|
||||
if ((code)) \
|
||||
pr_error_jmp(label); \
|
||||
} while (0)
|
||||
|
||||
#define jerr_cond(code, cond, label) \
|
||||
do { \
|
||||
if ((code) cond) \
|
||||
pr_error_jmp(label); \
|
||||
} while (0)
|
||||
|
||||
#define jerr_rc(code, rc, label) \
|
||||
do { \
|
||||
rc = (code); \
|
||||
if (rc) \
|
||||
pr_error_jmp(label); \
|
||||
} while (0)
|
||||
|
||||
#if 0
|
||||
#define pr_debug(fmt, ...) \
|
||||
do { \
|
||||
printk("%s (%s:%d): " fmt, \
|
||||
__func__, __FILE__, __LINE__, \
|
||||
##__VA_ARGS__); \
|
||||
} while (0)
|
||||
#else
|
||||
#define pr_debug(fmt, ...)
|
||||
#endif
|
||||
|
||||
#define die(fmt, ...) \
|
||||
do { \
|
||||
printk("die (%s:%d): " fmt, __FILE__, \
|
||||
__LINE__, ##__VA_ARGS__); \
|
||||
exit(1); \
|
||||
} while (0)
|
||||
|
||||
#define pr_perror(fmt, ...) \
|
||||
do { \
|
||||
pr_error("%s: " fmt, strerror(errno), \
|
||||
##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define stop_task(pid) kill(pid, SIGSTOP)
|
||||
#define continue_task(pid) kill(pid, SIGCONT)
|
||||
|
||||
#define write_ptr(fd, ptr) \
|
||||
write(fd, (ptr), sizeof(*(ptr)))
|
||||
|
||||
#define write_ptr_safe(fd, ptr, err) \
|
||||
jerr(write_ptr(fd, ptr) != sizeof(*(ptr)), err)
|
||||
|
||||
#define write_safe(fd, ptr, size, err) \
|
||||
jerr(write(fd, (ptr), (size)) != (size), err)
|
||||
|
||||
#define write_safe_imm(fd, imm, err) \
|
||||
do { \
|
||||
typeof(imm) x__ = imm; \
|
||||
write_ptr_safe(fd, &x__, err); \
|
||||
} while (0)
|
||||
|
||||
#define read_safe(fd, ptr, size, err) \
|
||||
jerr(read(fd, ptr, (size)) != (size), err)
|
||||
|
||||
#define read_ptr_safe(fd, ptr, err) \
|
||||
jerr(read(fd, ptr, sizeof(*(ptr))) != sizeof(*(ptr)), err)
|
||||
|
||||
#define read_safe_eof(fd, ptr, size, rc, err, eof) \
|
||||
do { \
|
||||
rc = read(fd, ptr, (size)); \
|
||||
if (!rc) \
|
||||
goto eof; \
|
||||
if (rc != (size)) \
|
||||
goto err; \
|
||||
} while (0)
|
||||
|
||||
#define read_ptr_safe_eof(fd, ptr, rc, err, eof) \
|
||||
read_safe_eof(fd, ptr, sizeof(*(ptr)), rc, err, eof)
|
||||
|
||||
int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
|
||||
int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes);
|
||||
int ptrace_show_area(pid_t pid, void *addr, long bytes);
|
||||
int ptrace_show_area_r(pid_t pid, void *addr, long bytes);
|
||||
|
||||
int seize_task(pid_t pid);
|
||||
int unseize_task(pid_t pid);
|
||||
|
||||
void printk_registers(user_regs_struct_t *regs);
|
||||
void printk_siginfo(siginfo_t *siginfo);
|
||||
|
||||
struct vma_area;
|
||||
struct list_head;
|
||||
|
||||
void printk_vma(struct vma_area *vma_area);
|
||||
|
||||
/* A special marker */
|
||||
#define is_ending_vma(vma) ((vma)->start == 0 && (vma)->end == 0)
|
||||
|
||||
#define pr_info_vma_list(head) \
|
||||
do { \
|
||||
struct vma_area *vma; \
|
||||
list_for_each_entry(vma, head, list) \
|
||||
pr_info_vma(vma); \
|
||||
} while (0)
|
||||
|
||||
#define alloc_vma_area() \
|
||||
({ \
|
||||
struct vma_area *p__ = xzalloc(sizeof(*p__)); \
|
||||
if (p__) { \
|
||||
p__->shmid = -1; \
|
||||
p__->vm_file_fd = -1; \
|
||||
p__->vma.fd = -1; \
|
||||
} \
|
||||
p__; \
|
||||
})
|
||||
|
||||
#define pr_info_vma(vma_area) printk_vma(vma_area)
|
||||
#define pr_info_registers(regs) printk_registers(regs)
|
||||
#define pr_info_siginfo(siginfo) printk_siginfo(siginfo)
|
||||
|
||||
int reopen_fd_as(int new_fd, int old_fd);
|
||||
int parse_maps(pid_t pid, struct list_head *vma_list);
|
||||
|
||||
#define __xalloc(op, size, ...) \
|
||||
({ \
|
||||
void *___p = op( __VA_ARGS__ ); \
|
||||
if (!___p) \
|
||||
pr_error("%s: Can't allocate %li bytes\n", \
|
||||
__func__, (long)(size)); \
|
||||
___p; \
|
||||
})
|
||||
|
||||
#define xmalloc(size) __xalloc(malloc, size, size)
|
||||
#define xzalloc(size) __xalloc(calloc, size, 1, size)
|
||||
#define xrealloc(p, size) __xalloc(realloc, size, p, size)
|
||||
|
||||
#define xfree(p) if (p) free(p)
|
||||
|
||||
#define xrealloc_safe(pptr, size) \
|
||||
({ \
|
||||
int __ret = -1; \
|
||||
void *new = xrealloc(*pptr, size); \
|
||||
if (new) { \
|
||||
*pptr = new; \
|
||||
__ret = 0; \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#endif /* UTIL_H_ */
|
636
kernel/binfmt-elf-for-cr-4
Normal file
636
kernel/binfmt-elf-for-cr-4
Normal file
@ -0,0 +1,636 @@
|
||||
elf: Add support for loading files
|
||||
|
||||
This patch add ability to run checkpoint files by enhancing
|
||||
Elf file format.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
arch/x86/include/asm/elf.h | 3
|
||||
arch/x86/vdso/vma.c | 22 ++
|
||||
fs/binfmt_elf.c | 404 ++++++++++++++++++++++++++++++++++++++++++++-
|
||||
include/linux/elf_ckpt.h | 135 +++++++++++++++
|
||||
4 files changed, 562 insertions(+), 2 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/arch/x86/include/asm/elf.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/arch/x86/include/asm/elf.h
|
||||
+++ linux-2.6.git/arch/x86/include/asm/elf.h
|
||||
@@ -314,7 +314,8 @@ struct linux_binprm;
|
||||
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
|
||||
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
|
||||
int uses_interp);
|
||||
-
|
||||
+extern int arch_setup_additional_pages_at(struct linux_binprm *bprm,
|
||||
+ void *addr, int uses_interp);
|
||||
extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
|
||||
#define compat_arch_setup_additional_pages syscall32_setup_pages
|
||||
|
||||
Index: linux-2.6.git/arch/x86/vdso/vma.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/arch/x86/vdso/vma.c
|
||||
+++ linux-2.6.git/arch/x86/vdso/vma.c
|
||||
@@ -137,6 +137,28 @@ up_fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int arch_setup_additional_pages_at(struct linux_binprm *bprm, void *addr, int uses_interp)
|
||||
+{
|
||||
+ struct mm_struct *mm = current->mm;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!vdso_enabled)
|
||||
+ return 0;
|
||||
+
|
||||
+ down_write(&mm->mmap_sem);
|
||||
+ current->mm->context.vdso = addr;
|
||||
+ ret = install_special_mapping(mm, (unsigned long)addr, vdso_size,
|
||||
+ VM_READ | VM_EXEC |
|
||||
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
|
||||
+ VM_ALWAYSDUMP,
|
||||
+ vdso_pages);
|
||||
+ if (ret)
|
||||
+ current->mm->context.vdso = NULL;
|
||||
+
|
||||
+ up_write(&mm->mmap_sem);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static __init int vdso_setup(char *s)
|
||||
{
|
||||
vdso_enabled = simple_strtoul(s, NULL, 0);
|
||||
Index: linux-2.6.git/fs/binfmt_elf.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/binfmt_elf.c
|
||||
+++ linux-2.6.git/fs/binfmt_elf.c
|
||||
@@ -36,6 +36,11 @@
|
||||
#include <asm/param.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
+#include <linux/elf_ckpt.h>
|
||||
+#include <linux/flex_array.h>
|
||||
+#include <asm/tlbflush.h>
|
||||
+#include <asm/desc.h>
|
||||
+
|
||||
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
|
||||
static int load_elf_library(struct file *);
|
||||
static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
|
||||
@@ -556,6 +561,395 @@ static unsigned long randomize_stack_top
|
||||
#endif
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+
|
||||
+static int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
|
||||
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
|
||||
+{
|
||||
+ struct thread_struct *thread = ¤t->thread;
|
||||
+ struct elf_phdr *elf_phdr_pages;
|
||||
+ struct elf_phdr *elf_phdr_core;
|
||||
+ struct flex_array *fa = NULL;
|
||||
+ struct vma_entry *vma_entry_ptr;
|
||||
+ int nr_vma_found, nr_vma_mapped;
|
||||
+ struct vma_entry vma_entry;
|
||||
+ struct file *file = NULL;
|
||||
+ unsigned long elf_entry;
|
||||
+ unsigned long map_addr;
|
||||
+
|
||||
+ unsigned long start_code, end_code, start_data, end_data;
|
||||
+ unsigned long start_brk, brk, start_stack;
|
||||
+ unsigned long elf_bss, elf_brk;
|
||||
+ unsigned long vdso;
|
||||
+
|
||||
+ struct core_entry core_entry;
|
||||
+ int i, ret = -ENOEXEC;
|
||||
+ loff_t off;
|
||||
+
|
||||
+ int cpu, seg;
|
||||
+
|
||||
+ BUILD_BUG_ON(CKPT_GDT_ENTRY_TLS_ENTRIES != GDT_ENTRY_TLS_ENTRIES);
|
||||
+ BUILD_BUG_ON(CKPT_PAGE_SIZE != PAGE_SIZE);
|
||||
+
|
||||
+ elf_phdr_core = NULL;
|
||||
+ elf_phdr_pages = NULL;
|
||||
+ nr_vma_found = 0;
|
||||
+ nr_vma_mapped = 0;
|
||||
+
|
||||
+ elf_bss = 0;
|
||||
+ elf_brk = 0;
|
||||
+
|
||||
+ start_code = -1UL;
|
||||
+ end_code = 0;
|
||||
+
|
||||
+ start_data = -1UL;
|
||||
+ end_data = 0;
|
||||
+
|
||||
+ start_stack = -1UL;
|
||||
+ start_brk = -1UL;
|
||||
+ brk = -1UL;
|
||||
+
|
||||
+ vdso = -1UL;
|
||||
+
|
||||
+ fa = flex_array_alloc(sizeof(vma_entry), elf_ex->e_phnum, GFP_KERNEL);
|
||||
+ if (!fa || flex_array_prealloc(fa, 0, elf_ex->e_phnum, GFP_KERNEL)) {
|
||||
+ ret = -ENOMEM;
|
||||
+ if (fa) {
|
||||
+ flex_array_free(fa);
|
||||
+ fa = NULL;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Flush all traces of the currently running executable */
|
||||
+ ret = flush_old_exec(bprm);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
+ /* No return point */
|
||||
+ current->flags &= ~PF_FORKNOEXEC;
|
||||
+ current->mm->def_flags = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We don't care about parameters passed (such as argc, argv, env)
|
||||
+ * when execute checkpoint file because we're to substitute
|
||||
+ * all the things anyway -- so drop any previous memory mappings.
|
||||
+ */
|
||||
+ do_munmap(current->mm, 0, TASK_SIZE);
|
||||
+
|
||||
+ SET_PERSONALITY(loc->elf_ex);
|
||||
+
|
||||
+ for (i = 0; i < elf_ex->e_phnum; i++) {
|
||||
+
|
||||
+ switch (elf_phdr[i].p_type) {
|
||||
+ case PT_CKPT_VMA:
|
||||
+ ret = kernel_read(bprm->file, elf_phdr[i].p_offset,
|
||||
+ (char *)&vma_entry, sizeof(vma_entry));
|
||||
+ if (ret != sizeof(vma_entry)) {
|
||||
+ pr_err("elf-ckpt: Can't read vma_entry\n");
|
||||
+ ret = -EIO;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (flex_array_put(fa, i, &vma_entry, GFP_KERNEL))
|
||||
+ BUG();
|
||||
+
|
||||
+ /* We need to know if there is executable stack */
|
||||
+ if (vma_entry.status & VMA_AREA_STACK) {
|
||||
+ if (vma_entry.flags & PROT_EXEC)
|
||||
+ current->personality |= READ_IMPLIES_EXEC;
|
||||
+ }
|
||||
+
|
||||
+ nr_vma_found++;
|
||||
+ continue;
|
||||
+ case PT_CKPT_CORE:
|
||||
+ elf_phdr_core = &elf_phdr[i];
|
||||
+ continue;
|
||||
+ case PT_CKPT_PAGES:
|
||||
+ elf_phdr_pages = &elf_phdr[i];
|
||||
+ continue;
|
||||
+ default:
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Be sure it has the file structure we expect to see. */
|
||||
+ if (!elf_phdr_pages || !elf_phdr_core || !nr_vma_found) {
|
||||
+ send_sig(SIGKILL, current, 0);
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * VMA randomization still needs to be set (just in case if
|
||||
+ * the program we restore will exec something else later).
|
||||
+ */
|
||||
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
||||
+ current->flags |= PF_RANDOMIZE;
|
||||
+
|
||||
+ setup_new_exec(bprm);
|
||||
+
|
||||
+ current->mm->free_area_cache = current->mm->mmap_base;
|
||||
+ current->mm->cached_hole_size = 0;
|
||||
+
|
||||
+ for (i = 0; i < nr_vma_found; i++) {
|
||||
+ vma_entry_ptr = flex_array_get(fa, i);
|
||||
+
|
||||
+ if (vma_entry_ptr->status & VMA_AREA_HEAP) {
|
||||
+ start_brk = vma_entry_ptr->start;
|
||||
+ }
|
||||
+
|
||||
+ if (vma_entry_ptr->status & VMA_AREA_VDSO) {
|
||||
+ vdso = vma_entry_ptr->start;
|
||||
+ }
|
||||
+
|
||||
+ if (!(vma_entry_ptr->status & VMA_AREA_REGULAR))
|
||||
+ continue;
|
||||
+
|
||||
+ if (vma_entry_ptr->fd != -1) {
|
||||
+ file = fget((unsigned int)vma_entry_ptr->fd);
|
||||
+ if (!file) {
|
||||
+ send_sig(SIGKILL, current, 0);
|
||||
+ ret = -EBADF;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /* Reuse this field to handle error cases */
|
||||
+ vma_entry_ptr->fd = (__u64)file;
|
||||
+ } else
|
||||
+ file = NULL;
|
||||
+
|
||||
+ down_write(¤t->mm->mmap_sem);
|
||||
+ map_addr = do_mmap(file,
|
||||
+ vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->end - vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->prot,
|
||||
+ vma_entry_ptr->flags | MAP_FIXED,
|
||||
+ vma_entry_ptr->pgoff);
|
||||
+ up_write(¤t->mm->mmap_sem);
|
||||
+
|
||||
+ if (file) {
|
||||
+ fput(file);
|
||||
+ do_close((unsigned int)vma_entry_ptr->fd);
|
||||
+ }
|
||||
+
|
||||
+ if (BAD_ADDR(map_addr)) {
|
||||
+ send_sig(SIGKILL, current, 0);
|
||||
+ ret = IS_ERR((void *)map_addr) ? PTR_ERR((void*)map_addr) : -EINVAL;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * FIXME
|
||||
+ * Some heuristics to guess previously loaded real
|
||||
+ * elf file structure. Probably this things should
|
||||
+ * be exported via /proc somewhere instead.
|
||||
+ */
|
||||
+
|
||||
+ if (vma_entry_ptr->status & VMA_AREA_STACK) {
|
||||
+ /* Note if stack is VM_GROWSUP -- it should be reversed */
|
||||
+ start_stack = vma_entry_ptr->start;
|
||||
+ }
|
||||
+
|
||||
+ if (vma_entry_ptr->prot & PROT_EXEC) {
|
||||
+ if (start_code > vma_entry_ptr->start)
|
||||
+ start_code = vma_entry_ptr->start;
|
||||
+ if (end_code < vma_entry_ptr->end)
|
||||
+ end_code = vma_entry_ptr->end;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Neither .bss nor .data was being file mapped.
|
||||
+ * FIXME: .rodata are loaded by interp.
|
||||
+ */
|
||||
+ if (!file) {
|
||||
+ if (vma_entry_ptr->prot & (PROT_WRITE)) {
|
||||
+ if (start_data > vma_entry_ptr->start)
|
||||
+ start_data = vma_entry_ptr->start;
|
||||
+ if (end_data < vma_entry_ptr->end)
|
||||
+ end_data = vma_entry_ptr->end;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ nr_vma_mapped++;
|
||||
+ }
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ if (vdso == -1UL) {
|
||||
+ pr_err("elf-ckpt: Can't find VDSO address\n");
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ /* Restore core data */
|
||||
+ ret = kernel_read(bprm->file, elf_phdr_core->p_offset,
|
||||
+ (char *)&core_entry, sizeof(core_entry));
|
||||
+ if (ret != sizeof(core_entry)) {
|
||||
+ pr_err("elf-ckpt: Can't read core_entry\n");
|
||||
+ ret = -EIO;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ elf_entry = core_entry.gpregs.ip;
|
||||
+ bprm->p = start_stack;
|
||||
+
|
||||
+ current->mm->start_code = start_code;
|
||||
+ current->mm->end_code = end_code;
|
||||
+ current->mm->start_data = start_data;
|
||||
+ current->mm->end_data = end_data;
|
||||
+ current->mm->start_stack = start_stack;
|
||||
+ current->mm->start_brk = start_brk;
|
||||
+ current->mm->brk = brk;
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ ret = arch_setup_additional_pages_at(bprm, (void *)vdso, 0);
|
||||
+ if (ret) {
|
||||
+ pr_err("elf-ckpt: Can't setup additional pages at %lx with %d\n",
|
||||
+ vdso, ret);
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ /*
|
||||
+ * Restore pages
|
||||
+ */
|
||||
+ off = elf_phdr_pages->p_offset;
|
||||
+ while (1) {
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct page *page;
|
||||
+ void *page_data;
|
||||
+ __u64 va;
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)&va, sizeof(va));
|
||||
+ if (ret != sizeof(va)) {
|
||||
+ pr_err("elf-ckpt: Can't read page virtual address: "
|
||||
+ "ret = %d off = %lx\n", ret, (unsigned long)off);
|
||||
+ ret = -EIO;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /* End of pages reached */
|
||||
+ if (!va)
|
||||
+ break;
|
||||
+
|
||||
+ vma = find_vma(current->mm, (unsigned long)va);
|
||||
+ if (!vma) {
|
||||
+ pr_err("elf-ckpt: No VMA for page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -ESRCH;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ ret = get_user_pages(current, current->mm, (unsigned long)va,
|
||||
+ 1, 1, 1, &page, NULL);
|
||||
+ if (ret != 1) {
|
||||
+ pr_err("elf-ckpt: Can't get user page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -EFAULT;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ page_data = kmap(page);
|
||||
+ ret = kernel_read(bprm->file, off + sizeof(va), page_data, PAGE_SIZE);
|
||||
+ kunmap(page);
|
||||
+ put_page(page);
|
||||
+
|
||||
+ if (ret != PAGE_SIZE) {
|
||||
+ pr_err("elf-ckpt: Can't read data on page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -EFAULT;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ off += sizeof(va) + PAGE_SIZE;
|
||||
+ }
|
||||
+
|
||||
+ set_binfmt(&elf_format);
|
||||
+
|
||||
+ /*
|
||||
+ * Registers setup.
|
||||
+ *
|
||||
+ * Since we might be modifying MSRs we're
|
||||
+ * to be sure the task wont be preempted
|
||||
+ * until modification is complete.
|
||||
+ */
|
||||
+ cpu = get_cpu();
|
||||
+
|
||||
+ regs->ip = core_entry.gpregs.ip;
|
||||
+ regs->sp = core_entry.gpregs.sp;
|
||||
+ regs->cs = core_entry.gpregs.cs;
|
||||
+ regs->ss = core_entry.gpregs.ss;
|
||||
+ regs->flags = core_entry.gpregs.flags;
|
||||
+ regs->r15 = core_entry.gpregs.r15;
|
||||
+ regs->r14 = core_entry.gpregs.r14;
|
||||
+ regs->r13 = core_entry.gpregs.r13;
|
||||
+ regs->r12 = core_entry.gpregs.r12;
|
||||
+ regs->bp = core_entry.gpregs.bp;
|
||||
+ regs->bx = core_entry.gpregs.bx;
|
||||
+ regs->r11 = core_entry.gpregs.r11;
|
||||
+ regs->r10 = core_entry.gpregs.r10;
|
||||
+ regs->r8 = core_entry.gpregs.r8;
|
||||
+ regs->ax = core_entry.gpregs.ax;
|
||||
+ regs->cx = core_entry.gpregs.cx;
|
||||
+ regs->dx = core_entry.gpregs.dx;
|
||||
+ regs->si = core_entry.gpregs.si;
|
||||
+ regs->di = core_entry.gpregs.di;
|
||||
+ regs->orig_ax = core_entry.gpregs.orig_ax;
|
||||
+
|
||||
+ thread->usersp = core_entry.gpregs.sp;
|
||||
+ thread->ds = core_entry.gpregs.ds;
|
||||
+ thread->es = core_entry.gpregs.es;
|
||||
+ thread->fs = core_entry.gpregs.fs;
|
||||
+ thread->gs = core_entry.gpregs.gs;
|
||||
+
|
||||
+ thread->fsindex = thread->fs;
|
||||
+ thread->gsindex = thread->gs;
|
||||
+
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
|
||||
+ thread->tls_array[i].a = core_entry.tls_array[i].a;
|
||||
+ thread->tls_array[i].b = core_entry.tls_array[i].b;
|
||||
+ }
|
||||
+
|
||||
+ load_TLS(thread, cpu);
|
||||
+
|
||||
+ seg = thread->fsindex;
|
||||
+ loadsegment(fs, seg);
|
||||
+ savesegment(fs, seg);
|
||||
+
|
||||
+ if (seg != thread->fsindex) {
|
||||
+ pr_err("Fixup on FS loading exception: %i %i\n",
|
||||
+ thread->fsindex, seg);
|
||||
+ }
|
||||
+
|
||||
+ if (core_entry.gpregs.fs_base)
|
||||
+ wrmsrl(MSR_FS_BASE, core_entry.gpregs.fs_base);
|
||||
+
|
||||
+ if (core_entry.gpregs.gs_base)
|
||||
+ wrmsrl(MSR_GS_BASE, core_entry.gpregs.gs_base);
|
||||
+
|
||||
+ put_cpu();
|
||||
+
|
||||
+ ret = 0;
|
||||
+out:
|
||||
+ if (fa)
|
||||
+ flex_array_free(fa);
|
||||
+ return ret;
|
||||
+
|
||||
+out_unmap:
|
||||
+ for (i = 0; i < nr_vma_mapped; i++) {
|
||||
+ vma_entry_ptr = flex_array_get(fa, i);
|
||||
+ down_write(¤t->mm->mmap_sem);
|
||||
+ do_munmap(current->mm, vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->end - vma_entry_ptr->start);
|
||||
+ up_write(¤t->mm->mmap_sem);
|
||||
+ }
|
||||
+ goto out;
|
||||
+}
|
||||
+#else
|
||||
+static int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
|
||||
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
|
||||
+{
|
||||
+ return -ENOEXEC;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
|
||||
{
|
||||
struct file *interpreter = NULL; /* to shut gcc up */
|
||||
@@ -592,7 +986,9 @@ static int load_elf_binary(struct linux_
|
||||
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
|
||||
goto out;
|
||||
|
||||
- if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
|
||||
+ if (loc->elf_ex.e_type != ET_EXEC &&
|
||||
+ loc->elf_ex.e_type != ET_DYN &&
|
||||
+ loc->elf_ex.e_type != ET_CKPT)
|
||||
goto out;
|
||||
if (!elf_check_arch(&loc->elf_ex))
|
||||
goto out;
|
||||
@@ -619,6 +1015,12 @@ static int load_elf_binary(struct linux_
|
||||
goto out_free_ph;
|
||||
}
|
||||
|
||||
+ if (loc->elf_ex.e_type == ET_CKPT) {
|
||||
+ retval = load_elf_ckpt(bprm, regs, &loc->elf_ex,
|
||||
+ (struct elf_phdr *)elf_phdata);
|
||||
+ goto out_free_ph;
|
||||
+ }
|
||||
+
|
||||
elf_ppnt = elf_phdata;
|
||||
elf_bss = 0;
|
||||
elf_brk = 0;
|
||||
Index: linux-2.6.git/include/linux/elf_ckpt.h
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/include/linux/elf_ckpt.h
|
||||
@@ -0,0 +1,135 @@
|
||||
+#ifndef _LINUX_ELF_CHECKPOINT_H
|
||||
+#define _LINUX_ELF_CHECKPOINT_H
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/elf-em.h>
|
||||
+
|
||||
+#ifdef __KERNEL__
|
||||
+
|
||||
+#include <asm/elf.h>
|
||||
+
|
||||
+/*
|
||||
+ * Elf extension includes new Elf file type
|
||||
+ * and program header types as well.
|
||||
+ */
|
||||
+#define ET_CKPT 5
|
||||
+
|
||||
+#define PT_CKPT_OFFSET 0x01010101
|
||||
+
|
||||
+#define PT_CKPT_VMA (PT_LOOS + PT_CKPT_OFFSET + 1)
|
||||
+#define PT_CKPT_CORE (PT_LOOS + PT_CKPT_OFFSET + 2)
|
||||
+#define PT_CKPT_PAGES (PT_LOOS + PT_CKPT_OFFSET + 3)
|
||||
+
|
||||
+#define CKPT_PAGE_SIZE 4096
|
||||
+#define CKPT_GDT_ENTRY_TLS_ENTRIES 3
|
||||
+
|
||||
+#define HEADER_VERSION 1
|
||||
+#define HEADER_ARCH_X86_64 1
|
||||
+
|
||||
+#define VMA_AREA_REGULAR (1 << 0)
|
||||
+#define VMA_AREA_STACK (1 << 1)
|
||||
+#define VMA_AREA_VSYSCALL (1 << 2)
|
||||
+#define VMA_AREA_VDSO (1 << 3)
|
||||
+#define VMA_FORCE_READ (1 << 4)
|
||||
+#define VMA_AREA_HEAP (1 << 5)
|
||||
+#define VMA_FILE_PRIVATE (1 << 6)
|
||||
+#define VMA_FILE_SHARED (1 << 7)
|
||||
+#define VMA_ANON_SHARED (1 << 8)
|
||||
+#define VMA_ANON_PRIVATE (1 << 9)
|
||||
+#define VMA_FORCE_WRITE (1 << 10)
|
||||
+
|
||||
+struct vma_entry {
|
||||
+ __u64 start;
|
||||
+ __u64 end;
|
||||
+ __u64 pgoff;
|
||||
+ __u32 prot;
|
||||
+ __u32 flags;
|
||||
+ __u32 status;
|
||||
+ __u32 pid;
|
||||
+ __s64 fd;
|
||||
+ __u64 ino;
|
||||
+ __u32 dev_maj;
|
||||
+ __u32 dev_min;
|
||||
+} __packed;
|
||||
+
|
||||
+struct page_entry {
|
||||
+ __u64 va;
|
||||
+ __u8 data[CKPT_PAGE_SIZE];
|
||||
+} __packed;
|
||||
+
|
||||
+struct image_header {
|
||||
+ __u16 version;
|
||||
+ __u16 arch;
|
||||
+ __u32 flags;
|
||||
+} __packed;
|
||||
+
|
||||
+struct user_regs_entry {
|
||||
+ __u64 r15;
|
||||
+ __u64 r14;
|
||||
+ __u64 r13;
|
||||
+ __u64 r12;
|
||||
+ __u64 bp;
|
||||
+ __u64 bx;
|
||||
+ __u64 r11;
|
||||
+ __u64 r10;
|
||||
+ __u64 r9;
|
||||
+ __u64 r8;
|
||||
+ __u64 ax;
|
||||
+ __u64 cx;
|
||||
+ __u64 dx;
|
||||
+ __u64 si;
|
||||
+ __u64 di;
|
||||
+ __u64 orig_ax;
|
||||
+ __u64 ip;
|
||||
+ __u64 cs;
|
||||
+ __u64 flags;
|
||||
+ __u64 sp;
|
||||
+ __u64 ss;
|
||||
+ __u64 fs_base;
|
||||
+ __u64 gs_base;
|
||||
+ __u64 ds;
|
||||
+ __u64 es;
|
||||
+ __u64 fs;
|
||||
+ __u64 gs;
|
||||
+} __packed;
|
||||
+
|
||||
+struct desc_struct_entry {
|
||||
+ union {
|
||||
+ struct {
|
||||
+ __u32 a;
|
||||
+ __u32 b;
|
||||
+ };
|
||||
+ struct {
|
||||
+ __u16 limit0;
|
||||
+ __u16 base0;
|
||||
+ unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
|
||||
+ unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
|
||||
+ };
|
||||
+ };
|
||||
+} __packed;
|
||||
+
|
||||
+struct user_fpregs_entry {
|
||||
+ __u16 cwd;
|
||||
+ __u16 swd;
|
||||
+ __u16 twd;
|
||||
+ __u16 fop;
|
||||
+ __u64 rip;
|
||||
+ __u64 rdp;
|
||||
+ __u32 mxcsr;
|
||||
+ __u32 mxcsr_mask;
|
||||
+ __u32 st_space[32];
|
||||
+ __u32 xmm_space[64];
|
||||
+ __u32 padding[24];
|
||||
+} __packed;
|
||||
+
|
||||
+struct core_entry {
|
||||
+ struct image_header header;
|
||||
+ struct user_regs_entry gpregs;
|
||||
+ struct user_fpregs_entry fpregs;
|
||||
+ struct desc_struct tls_array[CKPT_GDT_ENTRY_TLS_ENTRIES];
|
||||
+ __u32 personality;
|
||||
+} __packed;
|
||||
+
|
||||
+#endif /* __KERNEL__ */
|
||||
+
|
||||
+#endif /* _LINUX_ELF_CHECKPOINT_H */
|
172
kernel/cr-clone-with-pid-support
Normal file
172
kernel/cr-clone-with-pid-support
Normal file
@ -0,0 +1,172 @@
|
||||
Allow processes to be created with specified pid
|
||||
|
||||
We will need it to restore processes so they would not
|
||||
even notice that they were being checkpointed.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
|
||||
---
|
||||
include/linux/pid.h | 2 -
|
||||
include/linux/sched.h | 1
|
||||
kernel/fork.c | 10 ++++++-
|
||||
kernel/pid.c | 70 ++++++++++++++++++++++++++++++++++++--------------
|
||||
4 files changed, 62 insertions(+), 21 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/include/linux/pid.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/pid.h
|
||||
+++ linux-2.6.git/include/linux/pid.h
|
||||
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
|
||||
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
|
||||
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
|
||||
|
||||
-extern struct pid *alloc_pid(struct pid_namespace *ns);
|
||||
+extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
|
||||
extern void free_pid(struct pid *pid);
|
||||
|
||||
/*
|
||||
Index: linux-2.6.git/include/linux/sched.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/sched.h
|
||||
+++ linux-2.6.git/include/linux/sched.h
|
||||
@@ -23,6 +23,7 @@
|
||||
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
|
||||
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
|
||||
and is now available for re-use. */
|
||||
+#define CLONE_CHILD_USEPID 0x02000000 /* use the given pid */
|
||||
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
|
||||
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
|
||||
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
||||
Index: linux-2.6.git/kernel/fork.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/kernel/fork.c
|
||||
+++ linux-2.6.git/kernel/fork.c
|
||||
@@ -1239,8 +1239,16 @@ static struct task_struct *copy_process(
|
||||
goto bad_fork_cleanup_io;
|
||||
|
||||
if (pid != &init_struct_pid) {
|
||||
+ int want_pid = 0;
|
||||
+
|
||||
+ if (clone_flags & CLONE_CHILD_USEPID) {
|
||||
+ retval = get_user(want_pid, child_tidptr);
|
||||
+ if (retval)
|
||||
+ goto bad_fork_cleanup_io;
|
||||
+ }
|
||||
+
|
||||
retval = -ENOMEM;
|
||||
- pid = alloc_pid(p->nsproxy->pid_ns);
|
||||
+ pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
|
||||
if (!pid)
|
||||
goto bad_fork_cleanup_io;
|
||||
}
|
||||
Index: linux-2.6.git/kernel/pid.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/kernel/pid.c
|
||||
+++ linux-2.6.git/kernel/pid.c
|
||||
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_name
|
||||
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
|
||||
}
|
||||
|
||||
-static int alloc_pidmap(struct pid_namespace *pid_ns)
|
||||
+static int alloc_pidmap_page(struct pidmap *map)
|
||||
+{
|
||||
+ if (unlikely(!map->page)) {
|
||||
+ void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
+ /*
|
||||
+ * Free the page if someone raced with us
|
||||
+ * installing it:
|
||||
+ */
|
||||
+ spin_lock_irq(&pidmap_lock);
|
||||
+ if (!map->page) {
|
||||
+ map->page = page;
|
||||
+ page = NULL;
|
||||
+ }
|
||||
+ spin_unlock_irq(&pidmap_lock);
|
||||
+ kfree(page);
|
||||
+ if (unlikely(!map->page))
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
|
||||
+{
|
||||
+ int offset;
|
||||
+ struct pidmap *map;
|
||||
+
|
||||
+ offset = pid & BITS_PER_PAGE_MASK;
|
||||
+ map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
|
||||
+
|
||||
+ if (alloc_pidmap_page(map) < 0)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (!test_and_set_bit(offset, map->page)) {
|
||||
+ atomic_dec(&map->nr_free);
|
||||
+ return pid;
|
||||
+ }
|
||||
+
|
||||
+ return -EBUSY;
|
||||
+}
|
||||
+
|
||||
+static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
|
||||
{
|
||||
int i, offset, max_scan, pid, last = pid_ns->last_pid;
|
||||
struct pidmap *map;
|
||||
|
||||
+ if (desired_pid)
|
||||
+ return set_pidmap(pid_ns, desired_pid);
|
||||
+
|
||||
pid = last + 1;
|
||||
if (pid >= pid_max)
|
||||
pid = RESERVED_PIDS;
|
||||
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_names
|
||||
*/
|
||||
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
|
||||
for (i = 0; i <= max_scan; ++i) {
|
||||
- if (unlikely(!map->page)) {
|
||||
- void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
- /*
|
||||
- * Free the page if someone raced with us
|
||||
- * installing it:
|
||||
- */
|
||||
- spin_lock_irq(&pidmap_lock);
|
||||
- if (!map->page) {
|
||||
- map->page = page;
|
||||
- page = NULL;
|
||||
- }
|
||||
- spin_unlock_irq(&pidmap_lock);
|
||||
- kfree(page);
|
||||
- if (unlikely(!map->page))
|
||||
- break;
|
||||
- }
|
||||
+ if (alloc_pidmap_page(map) < 0)
|
||||
+ break;
|
||||
+
|
||||
if (likely(atomic_read(&map->nr_free))) {
|
||||
do {
|
||||
if (!test_and_set_bit(offset, map->page)) {
|
||||
@@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
}
|
||||
|
||||
-struct pid *alloc_pid(struct pid_namespace *ns)
|
||||
+struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
|
||||
{
|
||||
struct pid *pid;
|
||||
enum pid_type type;
|
||||
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespa
|
||||
|
||||
tmp = ns;
|
||||
for (i = ns->level; i >= 0; i--) {
|
||||
- nr = alloc_pidmap(tmp);
|
||||
+ nr = alloc_pidmap(tmp, this_ns_pid);
|
||||
if (nr < 0)
|
||||
goto out_free;
|
||||
|
||||
pid->numbers[i].nr = nr;
|
||||
pid->numbers[i].ns = tmp;
|
||||
tmp = tmp->parent;
|
||||
+ this_ns_pid = 0;
|
||||
}
|
||||
|
||||
get_pid_ns(ns);
|
46
kernel/cr-proc-add-children
Normal file
46
kernel/cr-proc-add-children
Normal file
@ -0,0 +1,46 @@
|
||||
proc: Introduce the Children: line in /proc/<pid>/status
|
||||
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
|
||||
Although we can get the pids of some task's issue, this is just
|
||||
more convenient to have them this way.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/proc/array.c | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/array.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/array.c
|
||||
+++ linux-2.6.git/fs/proc/array.c
|
||||
@@ -158,6 +158,18 @@ static inline const char *get_task_state
|
||||
return *p;
|
||||
}
|
||||
|
||||
+static void task_children(struct seq_file *m, struct task_struct *p, struct pid_namespace *ns)
|
||||
+{
|
||||
+ struct task_struct *c;
|
||||
+
|
||||
+ seq_printf(m, "Children:");
|
||||
+ read_lock(&tasklist_lock);
|
||||
+ list_for_each_entry(c, &p->children, sibling)
|
||||
+ seq_printf(m, " %d", pid_nr_ns(task_pid(c), ns));
|
||||
+ read_unlock(&tasklist_lock);
|
||||
+ seq_putc(m, '\n');
|
||||
+}
|
||||
+
|
||||
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *p)
|
||||
{
|
||||
@@ -192,6 +204,8 @@ static inline void task_state(struct seq
|
||||
cred->uid, cred->euid, cred->suid, cred->fsuid,
|
||||
cred->gid, cred->egid, cred->sgid, cred->fsgid);
|
||||
|
||||
+ task_children(m, p, ns);
|
||||
+
|
||||
task_lock(p);
|
||||
if (p->files)
|
||||
fdt = files_fdtable(p->files);
|
522
kernel/cr-proc-map-files-21
Normal file
522
kernel/cr-proc-map-files-21
Normal file
@ -0,0 +1,522 @@
|
||||
fs, proc: Introduce the /proc/<pid>/map_files/ directory v14
|
||||
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
|
||||
This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks
|
||||
one for each mapping with file, the name of a symlink is "vma->vm_start-vma->vm_end",
|
||||
the target is the file. Opening a symlink results in a file that point exactly
|
||||
to the same inode as them vma's one.
|
||||
|
||||
For example the ls -l of some arbitrary /proc/<pid>/map_files/
|
||||
|
||||
| lr-x------ 1 root root 64 Aug 26 06:40 7f8f80403000-7f8f80404000 -> /lib64/libc-2.5.so
|
||||
| lr-x------ 1 root root 64 Aug 26 06:40 7f8f8061e000-7f8f80620000 -> /lib64/libselinux.so.1
|
||||
| lr-x------ 1 root root 64 Aug 26 06:40 7f8f80826000-7f8f80827000 -> /lib64/libacl.so.1.1.0
|
||||
| lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a2f000-7f8f80a30000 -> /lib64/librt-2.5.so
|
||||
| lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a30000-7f8f80a4c000 -> /lib64/ld-2.5.so
|
||||
|
||||
This *helps* checkpointing process in three ways:
|
||||
|
||||
1. When dumping a task mappings we do know exact file that is mapped by particular
|
||||
region. We do this by opening /proc/$pid/map_files/$address symlink the way we do
|
||||
with file descriptors.
|
||||
|
||||
2. This also helps in determining which anonymous shared mappings are shared with
|
||||
each other by comparing the inodes of them.
|
||||
|
||||
3. When restoring a set of processes in case two of them has a mapping shared, we map
|
||||
the memory by the 1st one and then open its /proc/$pid/map_files/$address file and
|
||||
map it by the 2nd task.
|
||||
|
||||
Using /proc/$pid/maps for this is quite inconvenient since it brings repeatable
|
||||
re-reading and reparsing for this text file which slows down restore procedure
|
||||
significantly. Also as being pointed in (3) it is a way easier to use top level
|
||||
shared mapping in children as /proc/$pid/map_files/$address when needed.
|
||||
|
||||
v2: (spotted by Tejun Heo)
|
||||
- /proc/<pid>/mfd changed to /proc/<pid>/map_files
|
||||
- find_vma helper is used instead of linear search
|
||||
- routines are re-grouped
|
||||
- d_revalidate is set now
|
||||
|
||||
v3:
|
||||
- d_revalidate reworked, now it should drops no longer valid dentries (Tejun Heo)
|
||||
- ptrace_may_access added into proc_map_files_lookup (Vasiliy Kulikov)
|
||||
- because of filldir (which eventually might need to lock mmap_sem)
|
||||
the proc_map_files_readdir() was reworked to call proc_fill_cache()
|
||||
with unlocked mmap_sem
|
||||
|
||||
v4: (feedback by Tejun Heo and Vasiliy Kulikov)
|
||||
- instead of saving data in proc_inode we rather make a dentry name
|
||||
to keep both vm_start and vm_end accordingly
|
||||
- d_revalidate now honor task credentials
|
||||
|
||||
v5: (feedback by Kirill A. Shutemov)
|
||||
- don't forget to release mmap_sem on error path
|
||||
|
||||
v6:
|
||||
- sizeof get used in map_files_info which shrink member a bit on
|
||||
x86-32 (by Kirill A. Shutemov)
|
||||
- map_name_to_addr returns -EINVAL instead of -1
|
||||
which is more appropriate (by Tejun Heo)
|
||||
|
||||
v7:
|
||||
- add [get/set]attr handlers for
|
||||
proc_map_files_inode_operations (by Vasiliy Kulikov)
|
||||
|
||||
v8:
|
||||
- Kirill A. Shutemov spotted a parasite semicolon
|
||||
which ruined the ptrace_check call, fixed.
|
||||
|
||||
v9: (feedback by Andrew Morton)
|
||||
- find_exact_vma moved into include/linux/mm.h as an inline helper
|
||||
- proc_map_files_setattr uses either kmalloc or vmalloc depending
|
||||
on how many objects are to be allocated
|
||||
- no more map_name_to_addr but dname_to_vma_addr introduced instead
|
||||
and it uses sscanf because in one case the find_exact_vma() is used
|
||||
only to confirm existence of vma area the boolean flag is used
|
||||
- fancy justification dropped
|
||||
- still the proc_map_files_get/setattr leaved untouched
|
||||
until additional fd/ patches applied first.
|
||||
|
||||
v10: (feedback by Andrew Morton)
|
||||
- flex_arrays are used instead of kmalloc/vmalloc calls
|
||||
- map_files_d_revalidate use ptrace_may_access for
|
||||
security reason (by Vasiliy Kulikov)
|
||||
|
||||
v11:
|
||||
- should use fput and drop !ret test from a loop code
|
||||
(feedback by Andrew Morton)
|
||||
- no need for 'used' variable, use existing
|
||||
nr_files with file->pos predicate
|
||||
- if preallocation fails no need to go further,
|
||||
simply release mmap semaphore and jump out
|
||||
|
||||
v12:
|
||||
- rework map_files_d_revalidate to make sure
|
||||
the task get released on return (by Vasiliy Kulikov)
|
||||
|
||||
v13:
|
||||
- proc_map_files_inode_operations are set to be the same
|
||||
as proc_fd_inode_operations, ie to include .permission
|
||||
pointing to proc_fd_permission
|
||||
|
||||
v14: (by Vasiliy Kulikov)
|
||||
- for security reason map_files/ entries are allowed for
|
||||
readers with CAP_SYS_ADMIN credentials granted only
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Reviewed-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
CC: Tejun Heo <tj@kernel.org>
|
||||
CC: Vasiliy Kulikov <segoon@openwall.com>
|
||||
CC: "Kirill A. Shutemov" <kirill@shutemov.name>
|
||||
CC: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
CC: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
CC: Andrew Morton <akpm@linux-foundation.org>
|
||||
CC: Pavel Machek <pavel@ucw.cz>
|
||||
---
|
||||
fs/proc/base.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/mm.h | 12 +
|
||||
2 files changed, 357 insertions(+)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -83,6 +83,7 @@
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/flex_array.h>
|
||||
#ifdef CONFIG_HARDWALL
|
||||
#include <asm/hardwall.h>
|
||||
#endif
|
||||
@@ -133,6 +134,8 @@ struct pid_entry {
|
||||
NULL, &proc_single_file_operations, \
|
||||
{ .proc_show = show } )
|
||||
|
||||
+static int proc_fd_permission(struct inode *inode, int mask);
|
||||
+
|
||||
/*
|
||||
* Count the number of hardlinks for the pid_entry table, excluding the .
|
||||
* and .. links.
|
||||
@@ -2201,6 +2204,347 @@ static const struct file_operations proc
|
||||
};
|
||||
|
||||
/*
|
||||
+ * dname_to_vma_addr - maps a dentry name into two unsigned longs
|
||||
+ * which represent vma start and end addresses.
|
||||
+ */
|
||||
+static int dname_to_vma_addr(struct dentry *dentry,
|
||||
+ unsigned long *start, unsigned long *end)
|
||||
+{
|
||||
+ if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
+{
|
||||
+ unsigned long vm_start, vm_end;
|
||||
+ bool exact_vma_exists = false;
|
||||
+ struct mm_struct *mm = NULL;
|
||||
+ struct task_struct *task;
|
||||
+ const struct cred *cred;
|
||||
+ struct inode *inode;
|
||||
+ int status = 0;
|
||||
+
|
||||
+ if (nd && nd->flags & LOOKUP_RCU)
|
||||
+ return -ECHILD;
|
||||
+
|
||||
+ if (!capable(CAP_SYS_ADMIN)) {
|
||||
+ status = -EACCES;
|
||||
+ goto out_notask;
|
||||
+ }
|
||||
+
|
||||
+ inode = dentry->d_inode;
|
||||
+ task = get_proc_task(inode);
|
||||
+ if (!task)
|
||||
+ goto out_notask;
|
||||
+
|
||||
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
|
||||
+ goto out;
|
||||
+
|
||||
+ mm = get_task_mm(task);
|
||||
+ if (!mm)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
|
||||
+ down_read(&mm->mmap_sem);
|
||||
+ exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+ }
|
||||
+
|
||||
+ mmput(mm);
|
||||
+
|
||||
+ if (exact_vma_exists) {
|
||||
+ if (task_dumpable(task)) {
|
||||
+ rcu_read_lock();
|
||||
+ cred = __task_cred(task);
|
||||
+ inode->i_uid = cred->euid;
|
||||
+ inode->i_gid = cred->egid;
|
||||
+ rcu_read_unlock();
|
||||
+ } else {
|
||||
+ inode->i_uid = 0;
|
||||
+ inode->i_gid = 0;
|
||||
+ }
|
||||
+ security_task_to_inode(task, inode);
|
||||
+ status = 1;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ put_task_struct(task);
|
||||
+
|
||||
+out_notask:
|
||||
+ if (status <= 0)
|
||||
+ d_drop(dentry);
|
||||
+
|
||||
+ return status;
|
||||
+}
|
||||
+
|
||||
+static const struct dentry_operations tid_map_files_dentry_operations = {
|
||||
+ .d_revalidate = map_files_d_revalidate,
|
||||
+ .d_delete = pid_delete_dentry,
|
||||
+};
|
||||
+
|
||||
+static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
|
||||
+{
|
||||
+ unsigned long vm_start, vm_end;
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct task_struct *task;
|
||||
+ struct mm_struct *mm;
|
||||
+ int rc;
|
||||
+
|
||||
+ rc = -ENOENT;
|
||||
+ task = get_proc_task(dentry->d_inode);
|
||||
+ if (!task)
|
||||
+ goto out;
|
||||
+
|
||||
+ mm = get_task_mm(task);
|
||||
+ put_task_struct(task);
|
||||
+ if (!mm)
|
||||
+ goto out;
|
||||
+
|
||||
+ rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
|
||||
+ if (rc)
|
||||
+ goto out_mmput;
|
||||
+
|
||||
+ down_read(&mm->mmap_sem);
|
||||
+ vma = find_exact_vma(mm, vm_start, vm_end);
|
||||
+ if (vma && vma->vm_file) {
|
||||
+ *path = vma->vm_file->f_path;
|
||||
+ path_get(path);
|
||||
+ rc = 0;
|
||||
+ }
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+
|
||||
+out_mmput:
|
||||
+ mmput(mm);
|
||||
+out:
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
+struct map_files_info {
|
||||
+ struct file *file;
|
||||
+ unsigned long len;
|
||||
+ unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
|
||||
+};
|
||||
+
|
||||
+static struct dentry *
|
||||
+proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
|
||||
+ struct task_struct *task, const void *ptr)
|
||||
+{
|
||||
+ const struct file *file = ptr;
|
||||
+ struct proc_inode *ei;
|
||||
+ struct inode *inode;
|
||||
+
|
||||
+ if (!file)
|
||||
+ return ERR_PTR(-ENOENT);
|
||||
+
|
||||
+ inode = proc_pid_make_inode(dir->i_sb, task);
|
||||
+ if (!inode)
|
||||
+ return ERR_PTR(-ENOENT);
|
||||
+
|
||||
+ ei = PROC_I(inode);
|
||||
+ ei->op.proc_get_link = proc_map_files_get_link;
|
||||
+
|
||||
+ inode->i_op = &proc_pid_link_inode_operations;
|
||||
+ inode->i_size = 64;
|
||||
+ inode->i_mode = S_IFLNK;
|
||||
+
|
||||
+ if (file->f_mode & FMODE_READ)
|
||||
+ inode->i_mode |= S_IRUSR;
|
||||
+ if (file->f_mode & FMODE_WRITE)
|
||||
+ inode->i_mode |= S_IWUSR;
|
||||
+
|
||||
+ d_set_d_op(dentry, &tid_map_files_dentry_operations);
|
||||
+ d_add(dentry, inode);
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static struct dentry *proc_map_files_lookup(struct inode *dir,
|
||||
+ struct dentry *dentry, struct nameidata *nd)
|
||||
+{
|
||||
+ unsigned long vm_start, vm_end;
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct task_struct *task;
|
||||
+ struct dentry *result;
|
||||
+ struct mm_struct *mm;
|
||||
+
|
||||
+ result = ERR_PTR(-EACCES);
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ goto out;
|
||||
+
|
||||
+ result = ERR_PTR(-ENOENT);
|
||||
+ task = get_proc_task(dir);
|
||||
+ if (!task)
|
||||
+ goto out;
|
||||
+
|
||||
+ result = ERR_PTR(-EACCES);
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_put_task;
|
||||
+
|
||||
+ result = ERR_PTR(-ENOENT);
|
||||
+ if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
|
||||
+ goto out_unlock;
|
||||
+
|
||||
+ mm = get_task_mm(task);
|
||||
+ if (!mm)
|
||||
+ goto out_unlock;
|
||||
+
|
||||
+ down_read(&mm->mmap_sem);
|
||||
+ vma = find_exact_vma(mm, vm_start, vm_end);
|
||||
+ if (!vma)
|
||||
+ goto out_no_vma;
|
||||
+
|
||||
+ result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
|
||||
+
|
||||
+out_no_vma:
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+ mmput(mm);
|
||||
+out_unlock:
|
||||
+ unlock_trace(task);
|
||||
+out_put_task:
|
||||
+ put_task_struct(task);
|
||||
+out:
|
||||
+ return result;
|
||||
+}
|
||||
+
|
||||
+static const struct inode_operations proc_map_files_inode_operations = {
|
||||
+ .lookup = proc_map_files_lookup,
|
||||
+ .permission = proc_fd_permission,
|
||||
+ .setattr = proc_setattr,
|
||||
+};
|
||||
+
|
||||
+static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||
+{
|
||||
+ struct dentry *dentry = filp->f_path.dentry;
|
||||
+ struct inode *inode = dentry->d_inode;
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct task_struct *task;
|
||||
+ struct mm_struct *mm;
|
||||
+ ino_t ino;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = -EACCES;
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = -ENOENT;
|
||||
+ task = get_proc_task(inode);
|
||||
+ if (!task)
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = -EACCES;
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_put_task;
|
||||
+
|
||||
+ ret = 0;
|
||||
+ switch (filp->f_pos) {
|
||||
+ case 0:
|
||||
+ ino = inode->i_ino;
|
||||
+ if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
|
||||
+ goto out_unlock;
|
||||
+ filp->f_pos++;
|
||||
+ case 1:
|
||||
+ ino = parent_ino(dentry);
|
||||
+ if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
|
||||
+ goto out_unlock;
|
||||
+ filp->f_pos++;
|
||||
+ default:
|
||||
+ {
|
||||
+ unsigned long nr_files, pos, i;
|
||||
+ struct flex_array *fa = NULL;
|
||||
+ struct map_files_info info;
|
||||
+ struct map_files_info *p;
|
||||
+
|
||||
+ mm = get_task_mm(task);
|
||||
+ if (!mm)
|
||||
+ goto out_unlock;
|
||||
+ down_read(&mm->mmap_sem);
|
||||
+
|
||||
+ nr_files = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We need two passes here:
|
||||
+ *
|
||||
+ * 1) Collect vmas of mapped files with mmap_sem taken
|
||||
+ * 2) Release mmap_sem and instantiate entries
|
||||
+ *
|
||||
+ * otherwise we get lockdep complained, since filldir()
|
||||
+ * routine might require mmap_sem taken in might_fault().
|
||||
+ */
|
||||
+
|
||||
+ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
|
||||
+ if (vma->vm_file && ++pos > filp->f_pos)
|
||||
+ nr_files++;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_files) {
|
||||
+ fa = flex_array_alloc(sizeof(info), nr_files, GFP_KERNEL);
|
||||
+ if (!fa || flex_array_prealloc(fa, 0, nr_files, GFP_KERNEL)) {
|
||||
+ ret = -ENOMEM;
|
||||
+ if (fa)
|
||||
+ flex_array_free(fa);
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+ mmput(mm);
|
||||
+ goto out_unlock;
|
||||
+ }
|
||||
+ for (i = 0, vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
|
||||
+ if (!vma->vm_file)
|
||||
+ continue;
|
||||
+ if (++pos <= filp->f_pos)
|
||||
+ continue;
|
||||
+
|
||||
+ get_file(vma->vm_file);
|
||||
+ info.file = vma->vm_file;
|
||||
+ info.len = snprintf(info.name, sizeof(info.name),
|
||||
+ "%lx-%lx", vma->vm_start,
|
||||
+ vma->vm_end);
|
||||
+ if (flex_array_put(fa, i++, &info, GFP_KERNEL))
|
||||
+ BUG();
|
||||
+ }
|
||||
+ }
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+
|
||||
+ for (i = 0; i < nr_files; i++) {
|
||||
+ p = flex_array_get(fa, i);
|
||||
+ ret = proc_fill_cache(filp, dirent, filldir,
|
||||
+ p->name, p->len,
|
||||
+ proc_map_files_instantiate,
|
||||
+ task, p->file);
|
||||
+ if (ret)
|
||||
+ break;
|
||||
+ filp->f_pos++;
|
||||
+ fput(p->file);
|
||||
+ }
|
||||
+ for (; i < nr_files; i++) {
|
||||
+ /*
|
||||
+ * In case of error don't forget
|
||||
+ * to put rest of file refs.
|
||||
+ */
|
||||
+ p = flex_array_get(fa, i);
|
||||
+ fput(p->file);
|
||||
+ }
|
||||
+ if (fa)
|
||||
+ flex_array_free(fa);
|
||||
+ mmput(mm);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+out_unlock:
|
||||
+ unlock_trace(task);
|
||||
+out_put_task:
|
||||
+ put_task_struct(task);
|
||||
+out:
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations proc_map_files_operations = {
|
||||
+ .read = generic_read_dir,
|
||||
+ .readdir = proc_map_files_readdir,
|
||||
+ .llseek = default_llseek,
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
* /proc/pid/fd needs a special permission handler so that a process can still
|
||||
* access /proc/self/fd after it has executed a setuid().
|
||||
*/
|
||||
@@ -2815,6 +3159,7 @@ static const struct inode_operations pro
|
||||
static const struct pid_entry tgid_base_stuff[] = {
|
||||
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
|
||||
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
||||
+ DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
|
||||
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
||||
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
|
||||
#ifdef CONFIG_NET
|
||||
Index: linux-2.6.git/include/linux/mm.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/mm.h
|
||||
+++ linux-2.6.git/include/linux/mm.h
|
||||
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(st
|
||||
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
+/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
|
||||
+static inline struct vm_area_struct *
|
||||
+find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end)
|
||||
+{
|
||||
+ struct vm_area_struct *vma = find_vma(mm, vm_start);
|
||||
+
|
||||
+ if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
|
||||
+ vma = NULL;
|
||||
+
|
||||
+ return vma;
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_MMU
|
||||
pgprot_t vm_get_page_prot(unsigned long vm_flags);
|
||||
#else
|
27
kernel/cr-statfs-callback-for-pipefs
Normal file
27
kernel/cr-statfs-callback-for-pipefs
Normal file
@ -0,0 +1,27 @@
|
||||
vfs: Add ->statfs callback for pipefs
|
||||
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
|
||||
This is done to make it possible to distinguish pipes
|
||||
from fifos when opening one via /proc/<pid>/fd/ link.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Reviewed-by: Tejun Heo <tj@kernel.org>
|
||||
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/pipe.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
Index: linux-2.6.git/fs/pipe.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/pipe.c
|
||||
+++ linux-2.6.git/fs/pipe.c
|
||||
@@ -1254,6 +1254,7 @@ out:
|
||||
|
||||
static const struct super_operations pipefs_ops = {
|
||||
.destroy_inode = free_inode_nonrcu,
|
||||
+ .statfs = simple_statfs,
|
||||
};
|
||||
|
||||
/*
|
86
kernel/fs-add-do-close
Normal file
86
kernel/fs-add-do-close
Normal file
@ -0,0 +1,86 @@
|
||||
fs: Add do_close helper
|
||||
|
||||
To be able to close file descriptors right from inside
|
||||
kernel space do_close() helper is added. We need it at
|
||||
checkpoint restore time.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/open.c | 32 ++++++++++++++++++++------------
|
||||
include/linux/fs.h | 1 +
|
||||
2 files changed, 21 insertions(+), 12 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/open.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/open.c
|
||||
+++ linux-2.6.git/fs/open.c
|
||||
@@ -1056,17 +1056,11 @@ int filp_close(struct file *filp, fl_own
|
||||
|
||||
EXPORT_SYMBOL(filp_close);
|
||||
|
||||
-/*
|
||||
- * Careful here! We test whether the file pointer is NULL before
|
||||
- * releasing the fd. This ensures that one clone task can't release
|
||||
- * an fd while another clone is opening it.
|
||||
- */
|
||||
-SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
+int do_close(unsigned int fd)
|
||||
{
|
||||
struct file * filp;
|
||||
struct files_struct *files = current->files;
|
||||
struct fdtable *fdt;
|
||||
- int retval;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
fdt = files_fdtable(files);
|
||||
@@ -1079,7 +1073,25 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
FD_CLR(fd, fdt->close_on_exec);
|
||||
__put_unused_fd(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
- retval = filp_close(filp, files);
|
||||
+
|
||||
+ return filp_close(filp, files);
|
||||
+
|
||||
+out_unlock:
|
||||
+ spin_unlock(&files->file_lock);
|
||||
+ return -EBADF;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(do_close);
|
||||
+
|
||||
+/*
|
||||
+ * Careful here! We test whether the file pointer is NULL before
|
||||
+ * releasing the fd. This ensures that one clone task can't release
|
||||
+ * an fd while another clone is opening it.
|
||||
+ */
|
||||
+SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = do_close(fd);
|
||||
|
||||
/* can't restart close syscall because file table entry was cleared */
|
||||
if (unlikely(retval == -ERESTARTSYS ||
|
||||
@@ -1089,10 +1101,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
retval = -EINTR;
|
||||
|
||||
return retval;
|
||||
-
|
||||
-out_unlock:
|
||||
- spin_unlock(&files->file_lock);
|
||||
- return -EBADF;
|
||||
}
|
||||
EXPORT_SYMBOL(sys_close);
|
||||
|
||||
Index: linux-2.6.git/include/linux/fs.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/fs.h
|
||||
+++ linux-2.6.git/include/linux/fs.h
|
||||
@@ -2027,6 +2027,7 @@ extern struct file *file_open_root(struc
|
||||
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
|
||||
const struct cred *);
|
||||
extern int filp_close(struct file *, fl_owner_t id);
|
||||
+extern int do_close(unsigned int fd);
|
||||
extern char * getname(const char __user *);
|
||||
|
||||
/* fs/ioctl.c */
|
45
kernel/fs-proc-add-tls
Normal file
45
kernel/fs-proc-add-tls
Normal file
@ -0,0 +1,45 @@
|
||||
fs, proc: Add /proc/$pid/tls entry
|
||||
|
||||
To be able to restart checkpointed tasks we need
|
||||
to know TLS status at dumping time. Export this
|
||||
information by /proc/$pid/tls entry.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/proc/base.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -3150,6 +3150,21 @@ static int proc_pid_personality(struct s
|
||||
return err;
|
||||
}
|
||||
|
||||
+static int proc_pid_tls(struct seq_file *m, struct pid_namespace *ns,
|
||||
+ struct pid *pid, struct task_struct *task)
|
||||
+{
|
||||
+ int err = lock_trace(task);
|
||||
+ if (!err) {
|
||||
+ int i;
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
+ seq_printf(m, "%x %x\n",
|
||||
+ task->thread.tls_array[i].a,
|
||||
+ task->thread.tls_array[i].b);
|
||||
+ unlock_trace(task);
|
||||
+ }
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Thread groups
|
||||
*/
|
||||
@@ -3169,6 +3184,7 @@ static const struct pid_entry tgid_base_
|
||||
INF("auxv", S_IRUSR, proc_pid_auxv),
|
||||
ONE("status", S_IRUGO, proc_pid_status),
|
||||
ONE("personality", S_IRUGO, proc_pid_personality),
|
||||
+ ONE("tls", S_IRUGO, proc_pid_tls),
|
||||
INF("limits", S_IRUGO, proc_pid_limits),
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
|
108
kernel/fs-proc-switch-to-dentry
Normal file
108
kernel/fs-proc-switch-to-dentry
Normal file
@ -0,0 +1,108 @@
|
||||
fs, proc: Make proc_get_link to use dentry instead of inode
|
||||
|
||||
This patch prepares the ground for the next "map_files"
|
||||
patch which needs a name of a link file to analyse.
|
||||
|
||||
So instead of squashing this change into one big
|
||||
patch the separate one is done.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
CC: Pavel Emelyanov <xemul@parallels.com>
|
||||
CC: Tejun Heo <tj@kernel.org>
|
||||
CC: Vasiliy Kulikov <segoon@openwall.com>
|
||||
CC: "Kirill A. Shutemov" <kirill@shutemov.name>
|
||||
CC: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
CC: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
CC: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
fs/proc/base.c | 20 ++++++++++----------
|
||||
include/linux/proc_fs.h | 2 +-
|
||||
2 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -165,9 +165,9 @@ static int get_task_root(struct task_str
|
||||
return result;
|
||||
}
|
||||
|
||||
-static int proc_cwd_link(struct inode *inode, struct path *path)
|
||||
+static int proc_cwd_link(struct dentry *dentry, struct path *path)
|
||||
{
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
+ struct task_struct *task = get_proc_task(dentry->d_inode);
|
||||
int result = -ENOENT;
|
||||
|
||||
if (task) {
|
||||
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *i
|
||||
return result;
|
||||
}
|
||||
|
||||
-static int proc_root_link(struct inode *inode, struct path *path)
|
||||
+static int proc_root_link(struct dentry *dentry, struct path *path)
|
||||
{
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
+ struct task_struct *task = get_proc_task(dentry->d_inode);
|
||||
int result = -ENOENT;
|
||||
|
||||
if (task) {
|
||||
@@ -1580,13 +1580,13 @@ static const struct file_operations proc
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
-static int proc_exe_link(struct inode *inode, struct path *exe_path)
|
||||
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct mm_struct *mm;
|
||||
struct file *exe_file;
|
||||
|
||||
- task = get_proc_task(inode);
|
||||
+ task = get_proc_task(dentry->d_inode);
|
||||
if (!task)
|
||||
return -ENOENT;
|
||||
mm = get_task_mm(task);
|
||||
@@ -1616,7 +1616,7 @@ static void *proc_pid_follow_link(struct
|
||||
if (!proc_fd_access_allowed(inode))
|
||||
goto out;
|
||||
|
||||
- error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
|
||||
+ error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
|
||||
out:
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
@@ -1655,7 +1655,7 @@ static int proc_pid_readlink(struct dent
|
||||
if (!proc_fd_access_allowed(inode))
|
||||
goto out;
|
||||
|
||||
- error = PROC_I(inode)->op.proc_get_link(inode, &path);
|
||||
+ error = PROC_I(inode)->op.proc_get_link(dentry, &path);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@@ -1959,9 +1959,9 @@ out_task:
|
||||
return rc;
|
||||
}
|
||||
|
||||
-static int proc_fd_link(struct inode *inode, struct path *path)
|
||||
+static int proc_fd_link(struct dentry *dentry, struct path *path)
|
||||
{
|
||||
- return proc_fd_info(inode, path, NULL);
|
||||
+ return proc_fd_info(dentry->d_inode, path, NULL);
|
||||
}
|
||||
|
||||
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
Index: linux-2.6.git/include/linux/proc_fs.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/proc_fs.h
|
||||
+++ linux-2.6.git/include/linux/proc_fs.h
|
||||
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations u
|
||||
extern const struct proc_ns_operations ipcns_operations;
|
||||
|
||||
union proc_op {
|
||||
- int (*proc_get_link)(struct inode *, struct path *);
|
||||
+ int (*proc_get_link)(struct dentry *, struct path *);
|
||||
int (*proc_read)(struct task_struct *task, char *page);
|
||||
int (*proc_show)(struct seq_file *m,
|
||||
struct pid_namespace *ns, struct pid *pid,
|
@ -0,0 +1,28 @@
|
||||
From: Vasiliy Kulikov <segooon@gmail.com>
|
||||
|
||||
In the patch "proc: fix races against execve() of /proc/PID/fd**"
|
||||
proc_pid_fd_link_getattr() leaked task_struct if ptrace check fails.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff -puN fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix fs/proc/base.c
|
||||
--- a/fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix
|
||||
+++ a/fs/proc/base.c
|
||||
@@ -1681,9 +1681,9 @@ static int proc_pid_fd_link_getattr(stru
|
||||
|
||||
generic_fillattr(inode, stat);
|
||||
unlock_trace(task);
|
||||
- put_task_struct(task);
|
||||
rc = 0;
|
||||
out_task:
|
||||
+ put_task_struct(task);
|
||||
return rc;
|
||||
}
|
||||
|
||||
_
|
255
kernel/proc-fix-races-against-execve-of-proc-pid-fd.patch
Normal file
255
kernel/proc-fix-races-against-execve-of-proc-pid-fd.patch
Normal file
@ -0,0 +1,255 @@
|
||||
From: Vasiliy Kulikov <segoon@openwall.com>
|
||||
|
||||
fd* files are restricted to the task's owner, and other users may not get
|
||||
direct access to them. But one may open any of these files and run any
|
||||
setuid program, keeping opened file descriptors. As there are permission
|
||||
checks on open(), but not on readdir() and read(), operations on the kept
|
||||
file descriptors will not be checked. It makes it possible to violate
|
||||
procfs permission model.
|
||||
|
||||
Reading fdinfo/* may disclosure current fds' position and flags, reading
|
||||
directory contents of fdinfo/ and fd/ may disclosure the number of opened
|
||||
files by the target task. This information is not sensible per se, but it
|
||||
can reveal some private information (like length of a password stored in a
|
||||
file) under certain conditions.
|
||||
|
||||
Used existing (un)lock_trace functions to check for ptrace_may_access(),
|
||||
but instead of using EPERM return code from it use EACCES to be consistent
|
||||
with existing proc_pid_follow_link()/proc_pid_readlink() return code. If
|
||||
they differ, attacker can guess what fds exist by analyzing stat() return
|
||||
code. Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
|
||||
readdir() and lookup() for fd/ and fdinfo/.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Cc: <stable@kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 146 +++++++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 103 insertions(+), 43 deletions(-)
|
||||
|
||||
diff -puN fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd fs/proc/base.c
|
||||
--- a/fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd
|
||||
+++ a/fs/proc/base.c
|
||||
@@ -1652,12 +1652,46 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
+static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
+ struct kstat *stat)
|
||||
+{
|
||||
+ struct inode *inode = dentry->d_inode;
|
||||
+ struct task_struct *task = get_proc_task(inode);
|
||||
+ int rc;
|
||||
+
|
||||
+ if (task == NULL)
|
||||
+ return -ESRCH;
|
||||
+
|
||||
+ rc = -EACCES;
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_task;
|
||||
+
|
||||
+ generic_fillattr(inode, stat);
|
||||
+ unlock_trace(task);
|
||||
+ put_task_struct(task);
|
||||
+ rc = 0;
|
||||
+out_task:
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
static const struct inode_operations proc_pid_link_inode_operations = {
|
||||
.readlink = proc_pid_readlink,
|
||||
.follow_link = proc_pid_follow_link,
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
+static const struct inode_operations proc_fdinfo_link_inode_operations = {
|
||||
+ .setattr = proc_setattr,
|
||||
+ .getattr = proc_pid_fd_link_getattr,
|
||||
+};
|
||||
+
|
||||
+static const struct inode_operations proc_fd_link_inode_operations = {
|
||||
+ .readlink = proc_pid_readlink,
|
||||
+ .follow_link = proc_pid_follow_link,
|
||||
+ .setattr = proc_setattr,
|
||||
+ .getattr = proc_pid_fd_link_getattr,
|
||||
+};
|
||||
+
|
||||
|
||||
/* building an inode */
|
||||
|
||||
@@ -1889,49 +1923,61 @@ out:
|
||||
|
||||
static int proc_fd_info(struct inode *inode, struct path *path, char *info)
|
||||
{
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
- struct files_struct *files = NULL;
|
||||
+ struct task_struct *task;
|
||||
+ struct files_struct *files;
|
||||
struct file *file;
|
||||
int fd = proc_fd(inode);
|
||||
+ int rc;
|
||||
|
||||
- if (task) {
|
||||
- files = get_files_struct(task);
|
||||
- put_task_struct(task);
|
||||
- }
|
||||
- if (files) {
|
||||
- /*
|
||||
- * We are not taking a ref to the file structure, so we must
|
||||
- * hold ->file_lock.
|
||||
- */
|
||||
- spin_lock(&files->file_lock);
|
||||
- file = fcheck_files(files, fd);
|
||||
- if (file) {
|
||||
- unsigned int f_flags;
|
||||
- struct fdtable *fdt;
|
||||
-
|
||||
- fdt = files_fdtable(files);
|
||||
- f_flags = file->f_flags & ~O_CLOEXEC;
|
||||
- if (FD_ISSET(fd, fdt->close_on_exec))
|
||||
- f_flags |= O_CLOEXEC;
|
||||
-
|
||||
- if (path) {
|
||||
- *path = file->f_path;
|
||||
- path_get(&file->f_path);
|
||||
- }
|
||||
- if (info)
|
||||
- snprintf(info, PROC_FDINFO_MAX,
|
||||
- "pos:\t%lli\n"
|
||||
- "flags:\t0%o\n",
|
||||
- (long long) file->f_pos,
|
||||
- f_flags);
|
||||
- spin_unlock(&files->file_lock);
|
||||
- put_files_struct(files);
|
||||
- return 0;
|
||||
+ task = get_proc_task(inode);
|
||||
+ if (!task)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ rc = -EACCES;
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_task;
|
||||
+
|
||||
+ rc = -ENOENT;
|
||||
+ files = get_files_struct(task);
|
||||
+ if (files == NULL)
|
||||
+ goto out_unlock;
|
||||
+
|
||||
+ /*
|
||||
+ * We are not taking a ref to the file structure, so we must
|
||||
+ * hold ->file_lock.
|
||||
+ */
|
||||
+ spin_lock(&files->file_lock);
|
||||
+ file = fcheck_files(files, fd);
|
||||
+ if (file) {
|
||||
+ unsigned int f_flags;
|
||||
+ struct fdtable *fdt;
|
||||
+
|
||||
+ fdt = files_fdtable(files);
|
||||
+ f_flags = file->f_flags & ~O_CLOEXEC;
|
||||
+ if (FD_ISSET(fd, fdt->close_on_exec))
|
||||
+ f_flags |= O_CLOEXEC;
|
||||
+
|
||||
+ if (path) {
|
||||
+ *path = file->f_path;
|
||||
+ path_get(&file->f_path);
|
||||
}
|
||||
- spin_unlock(&files->file_lock);
|
||||
- put_files_struct(files);
|
||||
- }
|
||||
- return -ENOENT;
|
||||
+ if (info)
|
||||
+ snprintf(info, PROC_FDINFO_MAX,
|
||||
+ "pos:\t%lli\n"
|
||||
+ "flags:\t0%o\n",
|
||||
+ (long long) file->f_pos,
|
||||
+ f_flags);
|
||||
+ rc = 0;
|
||||
+ } else
|
||||
+ rc = -ENOENT;
|
||||
+ spin_unlock(&files->file_lock);
|
||||
+ put_files_struct(files);
|
||||
+
|
||||
+out_unlock:
|
||||
+ unlock_trace(task);
|
||||
+out_task:
|
||||
+ put_task_struct(task);
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static int proc_fd_link(struct inode *inode, struct path *path)
|
||||
@@ -2026,7 +2072,7 @@ static struct dentry *proc_fd_instantiat
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
- inode->i_op = &proc_pid_link_inode_operations;
|
||||
+ inode->i_op = &proc_fd_link_inode_operations;
|
||||
inode->i_size = 64;
|
||||
ei->op.proc_get_link = proc_fd_link;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
@@ -2058,7 +2104,12 @@ static struct dentry *proc_lookupfd_comm
|
||||
if (fd == ~0U)
|
||||
goto out;
|
||||
|
||||
+ result = ERR_PTR(-EACCES);
|
||||
+ if (lock_trace(task))
|
||||
+ goto out;
|
||||
+
|
||||
result = instantiate(dir, dentry, task, &fd);
|
||||
+ unlock_trace(task);
|
||||
out:
|
||||
put_task_struct(task);
|
||||
out_no_task:
|
||||
@@ -2078,23 +2129,28 @@ static int proc_readfd_common(struct fil
|
||||
retval = -ENOENT;
|
||||
if (!p)
|
||||
goto out_no_task;
|
||||
+
|
||||
+ retval = -EACCES;
|
||||
+ if (lock_trace(p))
|
||||
+ goto out;
|
||||
+
|
||||
retval = 0;
|
||||
|
||||
fd = filp->f_pos;
|
||||
switch (fd) {
|
||||
case 0:
|
||||
if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
filp->f_pos++;
|
||||
case 1:
|
||||
ino = parent_ino(dentry);
|
||||
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
filp->f_pos++;
|
||||
default:
|
||||
files = get_files_struct(p);
|
||||
if (!files)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
rcu_read_lock();
|
||||
for (fd = filp->f_pos-2;
|
||||
fd < files_fdtable(files)->max_fds;
|
||||
@@ -2118,6 +2174,9 @@ static int proc_readfd_common(struct fil
|
||||
rcu_read_unlock();
|
||||
put_files_struct(files);
|
||||
}
|
||||
+
|
||||
+out_unlock:
|
||||
+ unlock_trace(p);
|
||||
out:
|
||||
put_task_struct(p);
|
||||
out_no_task:
|
||||
@@ -2195,6 +2254,7 @@ static struct dentry *proc_fdinfo_instan
|
||||
ei->fd = fd;
|
||||
inode->i_mode = S_IFREG | S_IRUSR;
|
||||
inode->i_fop = &proc_fdinfo_file_operations;
|
||||
+ inode->i_op = &proc_fdinfo_link_inode_operations;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
d_add(dentry, inode);
|
||||
/* Close the race of the process dying before we return the dentry */
|
||||
_
|
118
kernel/proc-force-dcache-drop-on-unauthorized-access.patch
Normal file
118
kernel/proc-force-dcache-drop-on-unauthorized-access.patch
Normal file
@ -0,0 +1,118 @@
|
||||
From: Vasiliy Kulikov <segoon@openwall.com>
|
||||
|
||||
The patch "proc: fix races against execve() of /proc/PID/fd**" is still a
|
||||
partial fix for a setxid problem. link(2) is a yet another way to
|
||||
identify whether a specific fd is opened by a privileged process. By
|
||||
calling link(2) against /proc/PID/fd/* an attacker may identify whether
|
||||
the fd number is valid for PID by analysing link(2) return code.
|
||||
|
||||
Both getattr() and link() can be used by the attacker iff the dentry is
|
||||
present in the dcache. In this case ->lookup() is not called and the only
|
||||
way to check ptrace permissions is either operation handler or
|
||||
->revalidate(). The easiest solution to prevent any unauthorized access
|
||||
to /proc/PID/fd*/ files is to force the dentry drop on each unauthorized
|
||||
access attempt.
|
||||
|
||||
If an attacker keeps opened fd of /proc/PID/fd/ and dcache contains a
|
||||
specific dentry for some /proc/PID/fd/XXX, any future attemp to use the
|
||||
dentry by the attacker would lead to the dentry drop as a result of a
|
||||
failed ptrace check in ->revalidate(). Then the attacker cannot spawn a
|
||||
dentry for the specific fd number because of ptrace check in ->lookup().
|
||||
|
||||
The dentry drop can be still observed by an attacker by analysing
|
||||
information from /proc/slabinfo, which is addressed in the successive
|
||||
patch.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Cc: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Cc: Christoph Lameter <cl@linux-foundation.org>
|
||||
Cc: Pekka Enberg <penberg@kernel.org>
|
||||
Cc: Matt Mackall <mpm@selenic.com>
|
||||
Cc: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 42 ++++++------------------------------------
|
||||
1 file changed, 6 insertions(+), 36 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -1665,46 +1665,12 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
-static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
- struct kstat *stat)
|
||||
-{
|
||||
- struct inode *inode = dentry->d_inode;
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
- int rc;
|
||||
-
|
||||
- if (task == NULL)
|
||||
- return -ESRCH;
|
||||
-
|
||||
- rc = -EACCES;
|
||||
- if (lock_trace(task))
|
||||
- goto out_task;
|
||||
-
|
||||
- generic_fillattr(inode, stat);
|
||||
- unlock_trace(task);
|
||||
- rc = 0;
|
||||
-out_task:
|
||||
- put_task_struct(task);
|
||||
- return rc;
|
||||
-}
|
||||
-
|
||||
static const struct inode_operations proc_pid_link_inode_operations = {
|
||||
.readlink = proc_pid_readlink,
|
||||
.follow_link = proc_pid_follow_link,
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
-static const struct inode_operations proc_fdinfo_link_inode_operations = {
|
||||
- .setattr = proc_setattr,
|
||||
- .getattr = proc_pid_fd_link_getattr,
|
||||
-};
|
||||
-
|
||||
-static const struct inode_operations proc_fd_link_inode_operations = {
|
||||
- .readlink = proc_pid_readlink,
|
||||
- .follow_link = proc_pid_follow_link,
|
||||
- .setattr = proc_setattr,
|
||||
- .getattr = proc_pid_fd_link_getattr,
|
||||
-};
|
||||
-
|
||||
|
||||
/* building an inode */
|
||||
|
||||
@@ -2013,6 +1979,11 @@ static int tid_fd_revalidate(struct dent
|
||||
task = get_proc_task(inode);
|
||||
fd = proc_fd(inode);
|
||||
|
||||
+ if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
|
||||
+ put_task_struct(task);
|
||||
+ task = NULL;
|
||||
+ }
|
||||
+
|
||||
if (task) {
|
||||
files = get_files_struct(task);
|
||||
if (files) {
|
||||
@@ -2085,7 +2056,7 @@ static struct dentry *proc_fd_instantiat
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
- inode->i_op = &proc_fd_link_inode_operations;
|
||||
+ inode->i_op = &proc_pid_link_inode_operations;
|
||||
inode->i_size = 64;
|
||||
ei->op.proc_get_link = proc_fd_link;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
@@ -2267,7 +2238,6 @@ static struct dentry *proc_fdinfo_instan
|
||||
ei->fd = fd;
|
||||
inode->i_mode = S_IFREG | S_IRUSR;
|
||||
inode->i_fop = &proc_fdinfo_file_operations;
|
||||
- inode->i_op = &proc_fdinfo_link_inode_operations;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
d_add(dentry, inode);
|
||||
/* Close the race of the process dying before we return the dentry */
|
@ -0,0 +1,26 @@
|
||||
From: Pavel Emelyanov <xemul@openvz.org>
|
||||
|
||||
On reading sysctl dirs we should return -EISDIR instead of -EINVAL.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
Cc: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/proc_sysctl.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff -puN fs/proc/proc_sysctl.c~procfs-report-eisdir-when-reading-sysctl-dirs-in-proc fs/proc/proc_sysctl.c
|
||||
--- a/fs/proc/proc_sysctl.c~procfs-report-eisdir-when-reading-sysctl-dirs-in-proc
|
||||
+++ a/fs/proc/proc_sysctl.c
|
||||
@@ -360,6 +360,7 @@ static const struct file_operations proc
|
||||
};
|
||||
|
||||
static const struct file_operations proc_sys_dir_file_operations = {
|
||||
+ .read = generic_read_dir,
|
||||
.readdir = proc_sys_readdir,
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
||||
_
|
5
kernel/readme
Normal file
5
kernel/readme
Normal file
@ -0,0 +1,5 @@
|
||||
The kernel patches series. See "series" file to obtain
|
||||
order of appliance. Not all patches do address C/R directly
|
||||
but some of them are needed due to dependencies.
|
||||
|
||||
Has been tested on Linux 3.1-rc3.
|
12
kernel/series
Normal file
12
kernel/series
Normal file
@ -0,0 +1,12 @@
|
||||
cr-proc-add-children
|
||||
procfs-report-eisdir-when-reading-sysctl-dirs-in-proc.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd-fix.patch
|
||||
proc-force-dcache-drop-on-unauthorized-access.patch
|
||||
cr-statfs-callback-for-pipefs
|
||||
cr-clone-with-pid-support
|
||||
fs-proc-switch-to-dentry
|
||||
cr-proc-map-files-21
|
||||
fs-proc-add-tls
|
||||
fs-add-do-close
|
||||
binfmt-elf-for-cr-4
|
19
parasite-elf.lds.S
Normal file
19
parasite-elf.lds.S
Normal file
@ -0,0 +1,19 @@
|
||||
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
|
||||
OUTPUT_ARCH(i386:x86-64)
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = 0;
|
||||
.text : {
|
||||
*(.parasite.head.text)
|
||||
*(.text)
|
||||
. = ALIGN(8);
|
||||
}
|
||||
.data : {
|
||||
*(.data)
|
||||
*(.rodata)
|
||||
*(.bss)
|
||||
*(.parasite.stack)
|
||||
. = ALIGN(8);
|
||||
}
|
||||
}
|
514
parasite-syscall.c
Normal file
514
parasite-syscall.c
Normal file
@ -0,0 +1,514 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "syscall.h"
|
||||
#include "types.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "parasite-syscall.h"
|
||||
#include "parasite-blob.h"
|
||||
#include "parasite.h"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static const char code_syscall[] = {0x0f, 0x05, 0xcc, 0xcc,
|
||||
0xcc, 0xcc, 0xcc, 0xcc};
|
||||
|
||||
#define code_syscall_size (round_up(sizeof(code_syscall), sizeof(long)))
|
||||
#define parasite_size (round_up(sizeof(parasite_blob), sizeof(long)))
|
||||
|
||||
static int syscall_fits_vma_area(struct vma_area *vma_area)
|
||||
{
|
||||
return can_run_syscall((unsigned long)vma_area->vma.start,
|
||||
(unsigned long)vma_area->vma.start,
|
||||
(unsigned long)vma_area->vma.end);
|
||||
}
|
||||
|
||||
int can_run_syscall(unsigned long ip, unsigned long start, unsigned long end)
|
||||
{
|
||||
return ip >= start && ip < (end - code_syscall_size);
|
||||
}
|
||||
|
||||
void *mmap_seized(pid_t pid, user_regs_struct_t *regs,
|
||||
void *addr, size_t length, int prot,
|
||||
int flags, int fd, off_t offset)
|
||||
{
|
||||
user_regs_struct_t params = *regs;
|
||||
void *mmaped = NULL;
|
||||
int ret;
|
||||
|
||||
params.ax = (unsigned long)__NR_mmap; /* mmap */
|
||||
params.di = (unsigned long)addr; /* @addr */
|
||||
params.si = (unsigned long)length; /* @length */
|
||||
params.dx = (unsigned long)prot; /* @prot */
|
||||
params.r10 = (unsigned long)flags; /* @flags */
|
||||
params.r8 = (unsigned long)fd; /* @fd */
|
||||
params.r9 = (unsigned long)offset; /* @offset */
|
||||
|
||||
ret = syscall_seized(pid, regs, ¶ms, ¶ms);
|
||||
if (ret)
|
||||
goto err;
|
||||
mmaped = (void *)params.ax;
|
||||
|
||||
/* error code from the kernel space */
|
||||
if ((long)mmaped < 0)
|
||||
mmaped = NULL;
|
||||
err:
|
||||
return mmaped;
|
||||
}
|
||||
|
||||
int munmap_seized(pid_t pid, user_regs_struct_t *regs,
|
||||
void *addr, size_t length)
|
||||
{
|
||||
user_regs_struct_t params = *regs;
|
||||
int ret;
|
||||
|
||||
params.ax = (unsigned long)__NR_munmap; /* mmap */
|
||||
params.di = (unsigned long)addr; /* @addr */
|
||||
params.si = (unsigned long)length; /* @length */
|
||||
|
||||
ret = syscall_seized(pid, regs, ¶ms, ¶ms);
|
||||
if (!ret)
|
||||
ret = (int)params.ax;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kill_seized(pid_t pid, user_regs_struct_t *where)
|
||||
{
|
||||
user_regs_struct_t params = *where;
|
||||
int ret;
|
||||
|
||||
params.ax = (unsigned long)__NR_exit; /* exit */
|
||||
params.di = (unsigned long)-1; /* @error-code */
|
||||
|
||||
ret = syscall_seized(pid, where, ¶ms, ¶ms);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int syscall_seized(pid_t pid,
|
||||
user_regs_struct_t *where,
|
||||
user_regs_struct_t *params,
|
||||
user_regs_struct_t *result)
|
||||
{
|
||||
user_regs_struct_t regs_orig, regs;
|
||||
unsigned long start_ip;
|
||||
char saved[sizeof(code_syscall)];
|
||||
siginfo_t siginfo;
|
||||
int status;
|
||||
int ret = -1;
|
||||
|
||||
BUILD_BUG_ON(sizeof(code_syscall) != BUILTIN_SYSCALL_SIZE);
|
||||
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
|
||||
|
||||
start_ip = (unsigned long)where->ip;
|
||||
|
||||
jerr(ptrace_peek_area(pid, (void *)saved, (void *)start_ip, code_syscall_size), err);
|
||||
jerr(ptrace_poke_area(pid, (void *)code_syscall, (void *)start_ip, code_syscall_size), err);
|
||||
|
||||
again:
|
||||
jerr(ptrace(PTRACE_GETREGS, pid, NULL, ®s), err);
|
||||
regs_orig = regs;
|
||||
|
||||
regs.ip = start_ip;
|
||||
regs.ax = params->ax;
|
||||
regs.di = params->di;
|
||||
regs.si = params->si;
|
||||
regs.dx = params->dx;
|
||||
regs.r10 = params->r10;
|
||||
regs.r8 = params->r8;
|
||||
regs.r9 = params->r9;
|
||||
regs.orig_ax = -1; /* avoid end-of-syscall processing */
|
||||
|
||||
jerr(ptrace(PTRACE_SETREGS, pid, NULL, ®s), err_restore);
|
||||
|
||||
/*
|
||||
* Most ideas are taken from Tejun Heo's parasite thread
|
||||
* https://code.google.com/p/ptrace-parasite/
|
||||
*/
|
||||
|
||||
/*
|
||||
* Run the parasite code, at the completion it'll trigger
|
||||
* int3 and inform us that all is done.
|
||||
*/
|
||||
|
||||
jerr(ptrace(PTRACE_CONT, pid, NULL, NULL), err_restore_full);
|
||||
jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore_full);
|
||||
jerr(!WIFSTOPPED(status), err_restore_full);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo),err_restore_full);
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, pid, NULL, ®s), err_restore_full);
|
||||
|
||||
if (WSTOPSIG(status) != SIGTRAP || siginfo.si_code != SI_KERNEL) {
|
||||
retry_signal:
|
||||
/* pr_debug("** delivering signal %d si_code=%d\n",
|
||||
siginfo.si_signo, siginfo.si_code); */
|
||||
/* FIXME: jerr(siginfo.si_code > 0, err_restore_full); */
|
||||
jerr(ptrace(PTRACE_SETREGS, pid, NULL, (void *)®s_orig), err_restore_full);
|
||||
jerr(ptrace(PTRACE_INTERRUPT, pid, NULL, NULL), err_restore_full);
|
||||
jerr(ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)siginfo.si_signo), err_restore_full);
|
||||
|
||||
jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore_full);
|
||||
jerr(!WIFSTOPPED(status), err_restore_full);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo), err_restore_full);
|
||||
|
||||
if (siginfo.si_code >> 8 != PTRACE_EVENT_STOP)
|
||||
goto retry_signal;
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* Our code is done.
|
||||
*/
|
||||
jerr(ptrace(PTRACE_INTERRUPT, pid, NULL, NULL), err_restore_full);
|
||||
jerr(ptrace(PTRACE_CONT, pid, NULL, NULL), err_restore_full);
|
||||
|
||||
jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore_full);
|
||||
jerr(!WIFSTOPPED(status), err_restore_full);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo), err_restore_full);
|
||||
|
||||
jerr((siginfo.si_code >> 8 != PTRACE_EVENT_STOP), err_restore_full);
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, pid, NULL, ®s), err_restore_full);
|
||||
|
||||
ret = 0;
|
||||
*result = regs;
|
||||
|
||||
err_restore_full:
|
||||
if (ptrace(PTRACE_SETREGS, pid, NULL, ®s_orig))
|
||||
pr_panic("Can't restore registers (pid: %d)\n", pid);
|
||||
|
||||
err_restore:
|
||||
if (ptrace_poke_area(pid, (void *)saved, (void *)start_ip, code_syscall_size))
|
||||
pr_panic("Crap... Can't restore data (pid: %d)\n", pid);
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vma_area *get_vma_by_ip(struct list_head *vma_area_list, unsigned long ip)
|
||||
{
|
||||
struct vma_area *vma_area;
|
||||
|
||||
list_for_each_entry(vma_area, vma_area_list, list) {
|
||||
if (in_vma_area(vma_area, ip)) {
|
||||
if (vma_area->vma.prot & PROT_EXEC) {
|
||||
if (syscall_fits_vma_area(vma_area))
|
||||
return vma_area;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_area_list,
|
||||
struct cr_fdset *cr_fdset, int fd_type)
|
||||
{
|
||||
parasite_args_cmd_dumppages_t parasite_dumppages = { };
|
||||
parasite_args_t parasite_arg = { };
|
||||
|
||||
user_regs_struct_t regs, regs_orig;
|
||||
unsigned long nrpages_dumped = 0;
|
||||
struct vma_area *vma_area;
|
||||
siginfo_t siginfo;
|
||||
int status, path_len, ret = -1;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping pages (type: %d pid: %d)\n", fd_type, ctl->pid);
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
path_len = strlen(cr_fdset->desc[fd_type].name) + 1;
|
||||
|
||||
if (path_len > sizeof(parasite_dumppages.open_path)) {
|
||||
pr_panic("Dumping pages path is too long (%d while %d allowed)\n",
|
||||
path_len, sizeof(parasite_dumppages.open_path));
|
||||
goto err;
|
||||
}
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, ctl->pid, NULL, ®s_orig), err);
|
||||
|
||||
parasite_arg.command = PARASITE_CMD_DUMPPAGES;
|
||||
parasite_arg.args_size = sizeof(parasite_dumppages);
|
||||
parasite_arg.args = ¶site_dumppages;
|
||||
|
||||
strncpy(parasite_dumppages.open_path, cr_fdset->desc[fd_type].name,
|
||||
sizeof(parasite_dumppages.open_path));
|
||||
parasite_dumppages.open_flags = O_WRONLY;
|
||||
parasite_dumppages.open_mode = CR_FD_PERM;
|
||||
parasite_dumppages.fd = -1UL;
|
||||
|
||||
/*
|
||||
* Pass the command first, it's immutable.
|
||||
*/
|
||||
jerr(ptrace_poke_area((long)ctl->pid, (void *)¶site_arg.command,
|
||||
(void *)ctl->addr_cmd, sizeof(parasite_arg.command)),
|
||||
err_restore);
|
||||
|
||||
list_for_each_entry(vma_area, vma_area_list, list) {
|
||||
|
||||
/*
|
||||
* The special areas are not dumped.
|
||||
*/
|
||||
if (!(vma_area->vma.status & VMA_AREA_REGULAR))
|
||||
continue;
|
||||
|
||||
/* No dumps for file-shared mappings */
|
||||
if (vma_area->vma.status & VMA_FILE_SHARED)
|
||||
continue;
|
||||
|
||||
pr_info_vma(vma_area);
|
||||
|
||||
again:
|
||||
jerr(ptrace(PTRACE_GETREGS, ctl->pid, NULL, ®s), err_restore);
|
||||
regs.ip = ctl->parasite_ip;
|
||||
jerr(ptrace(PTRACE_SETREGS, ctl->pid, NULL, ®s), err_restore);
|
||||
|
||||
parasite_dumppages.vma_entry = vma_area->vma;
|
||||
|
||||
if (ptrace_poke_area((long)ctl->pid, (void *)parasite_arg.args,
|
||||
(void *)ctl->addr_args, parasite_arg.args_size)) {
|
||||
pr_error("Can't setup parasite arguments (pid: %d)\n", ctl->pid);
|
||||
goto err_restore;
|
||||
}
|
||||
|
||||
jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, NULL), err_restore);
|
||||
jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
|
||||
jerr(!WIFSTOPPED(status), err_restore);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
|
||||
|
||||
if (WSTOPSIG(status) != SIGTRAP || siginfo.si_code != SI_KERNEL) {
|
||||
retry_signal:
|
||||
/* pr_debug("** delivering signal %d si_code=%d\n",
|
||||
siginfo.si_signo, siginfo.si_code); */
|
||||
/* FIXME: jerr(siginfo.si_code > 0, err_restore_full); */
|
||||
jerr(ptrace(PTRACE_SETREGS, (long)ctl->pid, NULL, (void *)®s_orig), err_restore);
|
||||
jerr(ptrace(PTRACE_INTERRUPT, (long)ctl->pid, NULL, NULL), err_restore);
|
||||
jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, (void *)(unsigned long)siginfo.si_signo), err_restore);
|
||||
|
||||
jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
|
||||
jerr(!WIFSTOPPED(status), err_restore);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
|
||||
|
||||
if (siginfo.si_code >> 8 != PTRACE_EVENT_STOP)
|
||||
goto retry_signal;
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's a bit tricky, the file get opened inside
|
||||
* parasite but close via explicit syscall. Better would
|
||||
* be to add some 'status' and close inside parasite on
|
||||
* last call.
|
||||
*/
|
||||
if (parasite_dumppages.fd == -1UL) {
|
||||
if (ptrace_peek_area((long)ctl->pid,
|
||||
(void *)¶site_dumppages.fd,
|
||||
(void *)(ctl->addr_args +
|
||||
offsetof(parasite_args_cmd_dumppages_t, fd)),
|
||||
sizeof(parasite_dumppages.fd))) {
|
||||
pr_error("Can't get file descriptor back (pid: %d)\n", ctl->pid);
|
||||
goto err_restore;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get some statistics.
|
||||
*/
|
||||
if (ptrace_peek_area((long)ctl->pid,
|
||||
(void *)¶site_dumppages.nrpages_dumped,
|
||||
(void *)(ctl->addr_args +
|
||||
offsetof(parasite_args_cmd_dumppages_t, nrpages_dumped)),
|
||||
sizeof(parasite_dumppages.fd))) {
|
||||
pr_error("Can't get statistics (pid: %d)\n", ctl->pid);
|
||||
goto err_restore;
|
||||
}
|
||||
pr_info(" (dumped: %16li pages)\n", parasite_dumppages.nrpages_dumped);
|
||||
nrpages_dumped += parasite_dumppages.nrpages_dumped;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our code is done.
|
||||
*/
|
||||
jerr(ptrace(PTRACE_INTERRUPT, (long)ctl->pid, NULL, NULL), err_restore);
|
||||
jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, NULL), err_restore);
|
||||
|
||||
jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
|
||||
jerr(!WIFSTOPPED(status), err_restore);
|
||||
jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
|
||||
|
||||
jerr((siginfo.si_code >> 8 != PTRACE_EVENT_STOP), err_restore);
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, (long)ctl->pid, NULL, ®s), err_restore);
|
||||
|
||||
ret = 0;
|
||||
|
||||
/* Finally close the descriptor the parasite has opened */
|
||||
if (parasite_dumppages.fd != -1UL) {
|
||||
regs = regs_orig;
|
||||
regs.ax = __NR_close; /* close */
|
||||
regs.di = parasite_dumppages.fd; /* @fd */
|
||||
ret = syscall_seized(ctl->pid, ®s_orig, ®s, ®s);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't know the position in file since it's updated
|
||||
* outside of our process.
|
||||
*/
|
||||
lseek(cr_fdset->desc[CR_FD_PAGES].fd, 0, SEEK_END);
|
||||
|
||||
/* Ending page */
|
||||
write_ptr_safe(cr_fdset->desc[CR_FD_PAGES].fd, &zero_page_entry, err_restore);
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Summary: %16li pages dumped\n", nrpages_dumped);
|
||||
|
||||
err_restore:
|
||||
if (ptrace(PTRACE_SETREGS, (long)ctl->pid, NULL, ®s_orig))
|
||||
pr_panic("Can't restore registers (pid: %d)\n", ctl->pid);
|
||||
|
||||
err:
|
||||
pr_info("----------------------------------------\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int parasite_cure_seized(struct parasite_ctl **p_ctl,
|
||||
struct list_head *vma_area_list)
|
||||
{
|
||||
user_regs_struct_t regs, regs_orig;
|
||||
struct parasite_ctl *ctl;
|
||||
struct vma_area *vma_area;
|
||||
int ret = -1;
|
||||
|
||||
if (!p_ctl || !*p_ctl)
|
||||
return 0;
|
||||
|
||||
ctl = *p_ctl;
|
||||
|
||||
jerr(ptrace(PTRACE_GETREGS, ctl->pid, NULL, ®s), err);
|
||||
|
||||
regs_orig = regs;
|
||||
|
||||
vma_area = get_vma_by_ip(vma_area_list, regs.ip);
|
||||
if (!vma_area) {
|
||||
pr_error("No suitable VMA found to run cure (pid: %d)\n", ctl->pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
regs.ip = vma_area->vma.start;
|
||||
|
||||
ret = munmap_seized(ctl->pid, ®s,
|
||||
(void *)ctl->vma_area->vma.start,
|
||||
(size_t)vma_entry_len(&ctl->vma_area->vma));
|
||||
if (ret)
|
||||
pr_error("munmap_seized failed (pid: %d)\n", ctl->pid);
|
||||
|
||||
if (ptrace(PTRACE_SETREGS, ctl->pid, NULL, ®s_orig)) {
|
||||
ret = -1;
|
||||
pr_panic("PTRACE_SETREGS failed (pid: %d)\n", ctl->pid);
|
||||
}
|
||||
|
||||
free(*p_ctl), *p_ctl = NULL;
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct parasite_ctl *parasite_infect_seized(pid_t pid, void *addr_hint, struct list_head *vma_area_list)
|
||||
{
|
||||
user_regs_struct_t regs, regs_orig;
|
||||
struct parasite_ctl *ctl = NULL;
|
||||
struct vma_area *vma_area;
|
||||
void *mmaped;
|
||||
|
||||
ctl = xzalloc(sizeof(*ctl) + sizeof(*vma_area));
|
||||
if (!ctl) {
|
||||
pr_error("Parasite control block allocation failed (pid: %d)\n", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Setup control block */
|
||||
ctl->pid = pid;
|
||||
ctl->vma_area = (struct vma_area *)(char *)&ctl[sizeof(*ctl)];
|
||||
|
||||
if (ptrace(PTRACE_GETREGS, pid, NULL, ®s))
|
||||
pr_error_jmp(err_free);
|
||||
|
||||
vma_area = get_vma_by_ip(vma_area_list, regs.ip);
|
||||
if (!vma_area) {
|
||||
pr_error("No suitable VMA found to run parasite "
|
||||
"bootstrap code (pid: %d)\n", pid);
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
regs_orig = regs;
|
||||
|
||||
/*
|
||||
* Prepare for in-process syscall.
|
||||
*/
|
||||
ctl->vma_area->vma.prot = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
ctl->vma_area->vma.flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
|
||||
regs.ip = vma_area->vma.start;
|
||||
|
||||
mmaped = mmap_seized(pid, ®s, addr_hint, (size_t)parasite_size,
|
||||
(int)ctl->vma_area->vma.prot,
|
||||
(int)ctl->vma_area->vma.flags,
|
||||
(int)-1, (off_t)0);
|
||||
|
||||
if (!mmaped || (long)mmaped < 0) {
|
||||
pr_error("Can't allocate memory for parasite blob (pid: %d)\n", pid);
|
||||
goto err_restore_regs;
|
||||
}
|
||||
|
||||
ctl->parasite_ip = PARASITE_HEAD_ADDR((unsigned long)mmaped);
|
||||
ctl->parasite_complete_ip = PARASITE_COMPLETE_ADDR((unsigned long)mmaped);
|
||||
ctl->addr_cmd = PARASITE_CMD_ADDR((unsigned long)mmaped);
|
||||
ctl->addr_args = PARASITE_ARGS_ADDR((unsigned long)mmaped);
|
||||
|
||||
ctl->vma_area->vma.start= (u64)mmaped;
|
||||
ctl->vma_area->vma.end = (u64)(mmaped + parasite_size);
|
||||
|
||||
if (ptrace_poke_area(pid, parasite_blob, mmaped, parasite_size)) {
|
||||
pr_error("Can't inject parasite blob (pid: %d)\n", pid);
|
||||
goto err_munmap_restore;
|
||||
}
|
||||
|
||||
jerr(ptrace(PTRACE_SETREGS, pid, NULL, ®s_orig), err_munmap_restore);
|
||||
|
||||
return ctl;
|
||||
|
||||
err_munmap_restore:
|
||||
regs = regs_orig, regs.ip = vma_area->vma.start;
|
||||
if (munmap_seized(pid, ®s, mmaped, parasite_size))
|
||||
pr_panic("mmap_seized failed (pid: %d)\n", pid);
|
||||
err_restore_regs:
|
||||
if (ptrace(PTRACE_SETREGS, pid, NULL, ®s_orig))
|
||||
pr_panic("PTRACE_SETREGS failed (pid: %d)\n", pid);
|
||||
err_free:
|
||||
if (ctl)
|
||||
free(ctl);
|
||||
err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
# error x86-32 is not yet implemented
|
||||
#endif /* CONFIG_X86_64 */
|
339
parasite.c
Normal file
339
parasite.c
Normal file
@ -0,0 +1,339 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
#include "syscall.h"
|
||||
#include "parasite.h"
|
||||
#include "image.h"
|
||||
#include "crtools.h"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static void *brk_start, *brk_end, *brk_tail;
|
||||
|
||||
static struct page_entry page;
|
||||
static struct vma_entry vma;
|
||||
|
||||
void *memcpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
long d0, d1, d2;
|
||||
asm volatile(
|
||||
"rep ; movsq\n\t"
|
||||
"movq %4,%%rcx\n\t"
|
||||
"rep ; movsb\n\t"
|
||||
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
||||
: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
|
||||
: "memory");
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static void brk_init(void *brk)
|
||||
{
|
||||
brk_start = brk_tail = brk;
|
||||
brk_end = brk_start + PARASITE_BRK_SIZE;
|
||||
}
|
||||
|
||||
static void *brk_alloc(unsigned long bytes)
|
||||
{
|
||||
void *addr = NULL;
|
||||
if (brk_end > (brk_tail + bytes)) {
|
||||
addr = brk_tail;
|
||||
brk_tail+= bytes;
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void brk_free(unsigned long bytes)
|
||||
{
|
||||
if (brk_start >= (brk_tail - bytes))
|
||||
brk_tail -= bytes;
|
||||
}
|
||||
|
||||
static unsigned long builtin_strlen(char *str)
|
||||
{
|
||||
unsigned long len = 0;
|
||||
while (*str++)
|
||||
len++;
|
||||
return len;
|
||||
}
|
||||
|
||||
static const unsigned char hex[] = "0123456789abcdef";
|
||||
static char *long2hex(unsigned long v)
|
||||
{
|
||||
static char buf[32];
|
||||
char *p = buf;
|
||||
int i;
|
||||
|
||||
for (i = sizeof(long) - 1; i >= 0; i--) {
|
||||
*p++ = hex[ ((((unsigned char *)&v)[i]) & 0xf0) >> 4 ];
|
||||
*p++ = hex[ ((((unsigned char *)&v)[i]) & 0x0f) >> 0 ];
|
||||
}
|
||||
*p = 0;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void sys_write_msg(const char *msg)
|
||||
{
|
||||
int size = 0;
|
||||
while (msg[size])
|
||||
size++;
|
||||
sys_write(1, msg, size);
|
||||
}
|
||||
|
||||
static int restore_core(char *corefile)
|
||||
{
|
||||
int ret = PARASITE_ERR_FAIL;
|
||||
int fd_core;
|
||||
|
||||
fd_core = (int)sys_open(corefile, O_RDONLY, 0600);
|
||||
if (fd_core < 0) {
|
||||
ret = PARASITE_ERR_OPEN;
|
||||
goto err_open;
|
||||
}
|
||||
|
||||
/* Skip the header */
|
||||
sys_lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
|
||||
|
||||
/* First VMA areas */
|
||||
while (1) {
|
||||
unsigned long addr;
|
||||
|
||||
ret = sys_read(fd_core, &vma, sizeof(vma));
|
||||
if (ret && ret != sizeof(vma)) {
|
||||
ret = PARASITE_ERR_CORE_VMA;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (vma.start == 0 && vma.end == 0)
|
||||
break;
|
||||
|
||||
/* Make sure it's mapped into proper place */
|
||||
addr = sys_mmap((void *)vma.start,
|
||||
vma.end - vma.start,
|
||||
vma.prot,
|
||||
vma.flags | MAP_FIXED,
|
||||
vma.fd,
|
||||
vma.pgoff);
|
||||
if (addr != vma.start) {
|
||||
ret = PARASITE_ERR_MMAP;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now pages */
|
||||
while (1) {
|
||||
unsigned long count;
|
||||
|
||||
ret = sys_read(fd_core, &page.va, sizeof(page.va));
|
||||
if (ret && ret != sizeof(page.va)) {
|
||||
ret = PARASITE_ERR_CORE_PAGE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (page.va == 0)
|
||||
break;
|
||||
|
||||
ret = sys_read(fd_core, page.data, sizeof(page.data));
|
||||
if (ret && ret != sizeof(page.data)) {
|
||||
ret = PARASITE_ERR_CORE_PAGE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
memcpy((void *)page.va, page.data, sizeof(page.data));
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
sys_close(fd_core);
|
||||
|
||||
err_open:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dump_pages(parasite_args_cmd_dumppages_t *args)
|
||||
{
|
||||
int ret = PARASITE_ERR_FAIL;
|
||||
unsigned long nrpages, pfn, length;
|
||||
unsigned long prot_old, prot_new;
|
||||
unsigned char *map_brk = NULL;
|
||||
unsigned char *map;
|
||||
bool dump_all = false;
|
||||
|
||||
args->nrpages_dumped = 0;
|
||||
prot_old = prot_new = 0;
|
||||
|
||||
if (args->fd == -1UL) {
|
||||
args->fd = sys_open(args->open_path, args->open_flags, args->open_mode);
|
||||
if (args->fd < 0) {
|
||||
sys_write_msg("sys_open failed\n");
|
||||
ret = PARASITE_ERR_OPEN;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start from the end of file */
|
||||
sys_lseek(args->fd, 0, SEEK_END);
|
||||
|
||||
length = args->vma_entry.end - args->vma_entry.start;
|
||||
nrpages = length / PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* brk should allow us to handle up to 128M of memory,
|
||||
* otherwise call for mmap.
|
||||
*/
|
||||
map = brk_alloc(nrpages);
|
||||
if (map) {
|
||||
map_brk = map;
|
||||
} else {
|
||||
map = (void *)sys_mmap(NULL, nrpages,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1, 0);
|
||||
if ((long)map < 0) {
|
||||
sys_write_msg("sys_mmap failed\n");
|
||||
ret = PARASITE_ERR_MMAP;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
dump_all = !!(args->vma_entry.status & VMA_DUMP_ALL);
|
||||
|
||||
/*
|
||||
* Try to change page protection if needed so we would
|
||||
* be able to dump contents.
|
||||
*/
|
||||
if (!(args->vma_entry.prot & PROT_READ)) {
|
||||
prot_old = (unsigned long)args->vma_entry.prot;
|
||||
prot_new = prot_old | PROT_READ;
|
||||
if (sys_mprotect((unsigned long)args->vma_entry.start,
|
||||
(unsigned long)vma_entry_len(&args->vma_entry),
|
||||
prot_new)) {
|
||||
sys_write_msg("sys_mprotect failed\n");
|
||||
ret = PARASITE_ERR_MPROTECT;
|
||||
goto err_free;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Dumping the whole VMA range is not a common operation
|
||||
* so stick for mincore as a basis.
|
||||
*/
|
||||
|
||||
if (sys_mincore((unsigned long)args->vma_entry.start, length, map)) {
|
||||
sys_write_msg("sys_mincore failed\n");
|
||||
ret = PARASITE_ERR_MINCORE;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
for (pfn = 0; pfn < nrpages; pfn++) {
|
||||
unsigned long vaddr, written;
|
||||
|
||||
if ((map[pfn] & PAGE_RSS) || dump_all) {
|
||||
/*
|
||||
* That's the optimized write of
|
||||
* page_entry structure, see image.h
|
||||
*/
|
||||
vaddr = (unsigned long)args->vma_entry.start + pfn * PAGE_SIZE;
|
||||
written = 0;
|
||||
|
||||
written += sys_write(args->fd, &vaddr, sizeof(vaddr));
|
||||
written += sys_write(args->fd, (void *)vaddr, PAGE_SIZE);
|
||||
if (written != sizeof(vaddr) + PAGE_SIZE) {
|
||||
ret = PARASITE_ERR_WRITE;
|
||||
sys_write_msg("sys_write on page failed\n");
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
args->nrpages_dumped++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't left pages readable if they were not.
|
||||
*/
|
||||
if (prot_old != prot_new) {
|
||||
if (sys_mprotect((unsigned long)args->vma_entry.start,
|
||||
(unsigned long)vma_entry_len(&args->vma_entry),
|
||||
prot_old)) {
|
||||
sys_write_msg("PANIC: Ouch! sys_mprotect failed on resore\n");
|
||||
ret = PARASITE_ERR_MPROTECT;
|
||||
goto err_free;
|
||||
}
|
||||
}
|
||||
|
||||
err_free:
|
||||
if (map_brk)
|
||||
brk_free(nrpages);
|
||||
else
|
||||
sys_munmap(map, nrpages);
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __used parasite_service(unsigned long cmd, void *args, void *brk)
|
||||
{
|
||||
brk_init(brk);
|
||||
|
||||
switch (cmd) {
|
||||
case PARASITE_CMD_KILLME:
|
||||
sys_close(0);
|
||||
break;
|
||||
case PARASITE_CMD_PINGME:
|
||||
break;
|
||||
case PARASITE_CMD_DUMPPAGES:
|
||||
return dump_pages((parasite_args_cmd_dumppages_t *)args);
|
||||
break;
|
||||
case PARASITE_CMD_RESTORECORE:
|
||||
return restore_core((char *)args);
|
||||
break;
|
||||
default:
|
||||
sys_write_msg("Unknown command to parasite\n");
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __parasite_head __used parasite_head(void)
|
||||
{
|
||||
/*
|
||||
* The linker will handle the stack allocation.
|
||||
*/
|
||||
asm volatile("parasite_head_start: \n\t"
|
||||
"leaq parasite_stack(%rip), %rsp \n\t"
|
||||
"pushq $0 \n\t"
|
||||
"movq %rsp, %rbp \n\t"
|
||||
"movl parasite_cmd(%rip), %edi \n\t"
|
||||
"leaq parasite_args(%rip), %rsi \n\t"
|
||||
"leaq parasite_brk(%rip), %rdx \n\t"
|
||||
"call parasite_service \n\t"
|
||||
"parasite_service_complete: \n\t"
|
||||
"int $0x03 \n\t"
|
||||
".align 8 \n\t"
|
||||
"parasite_cmd: \n\t"
|
||||
".long 0 \n\t"
|
||||
"parasite_args: \n\t"
|
||||
".long 0 \n\t"
|
||||
".skip "__stringify(PARASITE_ARG_SIZE)",0 \n\t"
|
||||
".skip "__stringify(PARASITE_STACK_SIZE)", 0 \n\t"
|
||||
"parasite_stack: \n\t"
|
||||
".long 0 \n\t"
|
||||
"parasite_brk: \n\t"
|
||||
".skip "__stringify(PARASITE_BRK_SIZE)", 0 \n\t"
|
||||
".long 0 \n\t");
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
# error x86-32 bit mode not yet implemented
|
||||
#endif /* CONFIG_X86_64 */
|
19
parasite.lds.S
Normal file
19
parasite.lds.S
Normal file
@ -0,0 +1,19 @@
|
||||
OUTPUT_FORMAT("binary")
|
||||
OUTPUT_ARCH(i386:x86-64)
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = 0;
|
||||
.text : {
|
||||
*(.parasite.head.text)
|
||||
*(.text)
|
||||
. = ALIGN(8);
|
||||
}
|
||||
.data : {
|
||||
*(.data)
|
||||
*(.rodata)
|
||||
*(.bss)
|
||||
*(.parasite.stack)
|
||||
. = ALIGN(8);
|
||||
}
|
||||
}
|
322
rbtree.c
Normal file
322
rbtree.c
Normal file
@ -0,0 +1,322 @@
|
||||
/*
|
||||
* RBtree implementation adopted from the Linux
|
||||
* kernel sources.
|
||||
*/
|
||||
|
||||
#include "rbtree.h"
|
||||
|
||||
static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *right = node->rb_right;
|
||||
struct rb_node *parent = rb_parent(node);
|
||||
|
||||
if ((node->rb_right = right->rb_left))
|
||||
rb_set_parent(right->rb_left, node);
|
||||
right->rb_left = node;
|
||||
|
||||
rb_set_parent(right, parent);
|
||||
|
||||
if (parent) {
|
||||
if (node == parent->rb_left)
|
||||
parent->rb_left = right;
|
||||
else
|
||||
parent->rb_right = right;
|
||||
} else
|
||||
root->rb_node = right;
|
||||
rb_set_parent(node, right);
|
||||
}
|
||||
|
||||
static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *left = node->rb_left;
|
||||
struct rb_node *parent = rb_parent(node);
|
||||
|
||||
if ((node->rb_left = left->rb_right))
|
||||
rb_set_parent(left->rb_right, node);
|
||||
left->rb_right = node;
|
||||
|
||||
rb_set_parent(left, parent);
|
||||
|
||||
if (parent) {
|
||||
if (node == parent->rb_right)
|
||||
parent->rb_right = left;
|
||||
else
|
||||
parent->rb_left = left;
|
||||
} else
|
||||
root->rb_node = left;
|
||||
rb_set_parent(node, left);
|
||||
}
|
||||
|
||||
void rb_insert_color(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *parent, *gparent;
|
||||
|
||||
while ((parent = rb_parent(node)) && rb_is_red(parent)) {
|
||||
gparent = rb_parent(parent);
|
||||
|
||||
if (parent == gparent->rb_left) {
|
||||
{
|
||||
register struct rb_node *uncle = gparent->rb_right;
|
||||
if (uncle && rb_is_red(uncle)) {
|
||||
rb_set_black(uncle);
|
||||
rb_set_black(parent);
|
||||
rb_set_red(gparent);
|
||||
node = gparent;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent->rb_right == node) {
|
||||
register struct rb_node *tmp;
|
||||
__rb_rotate_left(parent, root);
|
||||
tmp = parent;
|
||||
parent = node;
|
||||
node = tmp;
|
||||
}
|
||||
|
||||
rb_set_black(parent);
|
||||
rb_set_red(gparent);
|
||||
__rb_rotate_right(gparent, root);
|
||||
} else {
|
||||
{
|
||||
register struct rb_node *uncle = gparent->rb_left;
|
||||
if (uncle && rb_is_red(uncle)) {
|
||||
rb_set_black(uncle);
|
||||
rb_set_black(parent);
|
||||
rb_set_red(gparent);
|
||||
node = gparent;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent->rb_left == node) {
|
||||
register struct rb_node *tmp;
|
||||
__rb_rotate_right(parent, root);
|
||||
tmp = parent;
|
||||
parent = node;
|
||||
node = tmp;
|
||||
}
|
||||
|
||||
rb_set_black(parent);
|
||||
rb_set_red(gparent);
|
||||
__rb_rotate_left(gparent, root);
|
||||
}
|
||||
}
|
||||
|
||||
rb_set_black(root->rb_node);
|
||||
}
|
||||
|
||||
static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct rb_node *other;
|
||||
|
||||
while ((!node || rb_is_black(node)) && node != root->rb_node) {
|
||||
if (parent->rb_left == node) {
|
||||
other = parent->rb_right;
|
||||
if (rb_is_red(other)) {
|
||||
rb_set_black(other);
|
||||
rb_set_red(parent);
|
||||
__rb_rotate_left(parent, root);
|
||||
other = parent->rb_right;
|
||||
}
|
||||
if ((!other->rb_left || rb_is_black(other->rb_left)) &&
|
||||
(!other->rb_right || rb_is_black(other->rb_right))) {
|
||||
rb_set_red(other);
|
||||
node = parent;
|
||||
parent = rb_parent(node);
|
||||
} else {
|
||||
if (!other->rb_right || rb_is_black(other->rb_right)) {
|
||||
rb_set_black(other->rb_left);
|
||||
rb_set_red(other);
|
||||
__rb_rotate_right(other, root);
|
||||
other = parent->rb_right;
|
||||
}
|
||||
rb_set_color(other, rb_color(parent));
|
||||
rb_set_black(parent);
|
||||
rb_set_black(other->rb_right);
|
||||
__rb_rotate_left(parent, root);
|
||||
node = root->rb_node;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
other = parent->rb_left;
|
||||
if (rb_is_red(other)) {
|
||||
rb_set_black(other);
|
||||
rb_set_red(parent);
|
||||
__rb_rotate_right(parent, root);
|
||||
other = parent->rb_left;
|
||||
}
|
||||
if ((!other->rb_left || rb_is_black(other->rb_left)) &&
|
||||
(!other->rb_right || rb_is_black(other->rb_right))) {
|
||||
rb_set_red(other);
|
||||
node = parent;
|
||||
parent = rb_parent(node);
|
||||
} else {
|
||||
if (!other->rb_left || rb_is_black(other->rb_left)) {
|
||||
rb_set_black(other->rb_right);
|
||||
rb_set_red(other);
|
||||
__rb_rotate_left(other, root);
|
||||
other = parent->rb_left;
|
||||
}
|
||||
rb_set_color(other, rb_color(parent));
|
||||
rb_set_black(parent);
|
||||
rb_set_black(other->rb_left);
|
||||
__rb_rotate_right(parent, root);
|
||||
node = root->rb_node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (node)
|
||||
rb_set_black(node);
|
||||
}
|
||||
|
||||
void rb_erase(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *child, *parent;
|
||||
int color;
|
||||
|
||||
if (!node->rb_left)
|
||||
child = node->rb_right;
|
||||
else if (!node->rb_right)
|
||||
child = node->rb_left;
|
||||
else {
|
||||
struct rb_node *old = node, *left;
|
||||
|
||||
node = node->rb_right;
|
||||
while ((left = node->rb_left) != NULL)
|
||||
node = left;
|
||||
|
||||
if (rb_parent(old)) {
|
||||
if (rb_parent(old)->rb_left == old)
|
||||
rb_parent(old)->rb_left = node;
|
||||
else
|
||||
rb_parent(old)->rb_right = node;
|
||||
} else
|
||||
root->rb_node = node;
|
||||
|
||||
child = node->rb_right;
|
||||
parent = rb_parent(node);
|
||||
color = rb_color(node);
|
||||
|
||||
if (parent == old) {
|
||||
parent = node;
|
||||
} else {
|
||||
if (child)
|
||||
rb_set_parent(child, parent);
|
||||
parent->rb_left = child;
|
||||
|
||||
node->rb_right = old->rb_right;
|
||||
rb_set_parent(old->rb_right, node);
|
||||
}
|
||||
|
||||
node->rb_parent_color = old->rb_parent_color;
|
||||
node->rb_left = old->rb_left;
|
||||
rb_set_parent(old->rb_left, node);
|
||||
|
||||
goto color;
|
||||
}
|
||||
|
||||
parent = rb_parent(node);
|
||||
color = rb_color(node);
|
||||
|
||||
if (child)
|
||||
rb_set_parent(child, parent);
|
||||
if (parent) {
|
||||
if (parent->rb_left == node)
|
||||
parent->rb_left = child;
|
||||
else
|
||||
parent->rb_right = child;
|
||||
} else
|
||||
root->rb_node = child;
|
||||
|
||||
color:
|
||||
if (color == RB_BLACK)
|
||||
__rb_erase_color(child, parent, root);
|
||||
}
|
||||
|
||||
struct rb_node *rb_first(const struct rb_root *root)
|
||||
{
|
||||
struct rb_node *n;
|
||||
|
||||
n = root->rb_node;
|
||||
if (!n)
|
||||
return NULL;
|
||||
while (n->rb_left)
|
||||
n = n->rb_left;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct rb_node *rb_last(const struct rb_root *root)
|
||||
{
|
||||
struct rb_node *n;
|
||||
|
||||
n = root->rb_node;
|
||||
if (!n)
|
||||
return NULL;
|
||||
while (n->rb_right)
|
||||
n = n->rb_right;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct rb_node *rb_next(const struct rb_node *node)
|
||||
{
|
||||
struct rb_node *parent;
|
||||
|
||||
if (rb_parent(node) == node)
|
||||
return NULL;
|
||||
|
||||
if (node->rb_right) {
|
||||
node = node->rb_right;
|
||||
while (node->rb_left)
|
||||
node=node->rb_left;
|
||||
return (struct rb_node *)node;
|
||||
}
|
||||
|
||||
while ((parent = rb_parent(node)) && node == parent->rb_right)
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
struct rb_node *rb_prev(const struct rb_node *node)
|
||||
{
|
||||
struct rb_node *parent;
|
||||
|
||||
if (rb_parent(node) == node)
|
||||
return NULL;
|
||||
|
||||
if (node->rb_left) {
|
||||
node = node->rb_left;
|
||||
while (node->rb_right)
|
||||
node=node->rb_right;
|
||||
return (struct rb_node *)node;
|
||||
}
|
||||
|
||||
while ((parent = rb_parent(node)) && node == parent->rb_left)
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
void rb_replace_node(struct rb_node *victim, struct rb_node *new,
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct rb_node *parent = rb_parent(victim);
|
||||
|
||||
if (parent) {
|
||||
if (victim == parent->rb_left)
|
||||
parent->rb_left = new;
|
||||
else
|
||||
parent->rb_right = new;
|
||||
} else {
|
||||
root->rb_node = new;
|
||||
}
|
||||
if (victim->rb_left)
|
||||
rb_set_parent(victim->rb_left, new);
|
||||
if (victim->rb_right)
|
||||
rb_set_parent(victim->rb_right, new);
|
||||
|
||||
*new = *victim;
|
||||
}
|
112
testee-static.c
Normal file
112
testee-static.c
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* A simple testee program
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sched.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// int pipefd[2];
|
||||
int fd_shared, fd_private;
|
||||
const char data_mark[] = "This is a data_mark marker";
|
||||
void *mmap_shared, *mmap_private, *mmap_anon, *map_unreadable;
|
||||
void *mmap_anon_shared;
|
||||
const char sep[] = "----------";
|
||||
unsigned long buf;
|
||||
int i;
|
||||
|
||||
(void)data_mark;
|
||||
|
||||
printf("%s pid %d\n", argv[0], getpid());
|
||||
|
||||
// if (pipe(pipefd)) {
|
||||
// perror("Can't create pipe");
|
||||
// goto err;
|
||||
// }
|
||||
|
||||
fd_shared = open("testee-shared.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_shared < 0) {
|
||||
perror("Can't open fd_shared file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
fd_private = open("testee-private.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_private < 0) {
|
||||
perror("Can't open fd_private file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (lseek(fd_shared, 1024, SEEK_SET) == -1 ||
|
||||
lseek(fd_private, 1024, SEEK_SET) == -1) {
|
||||
perror("Can't llsek");
|
||||
goto err;
|
||||
}
|
||||
|
||||
write(fd_shared, "", 1);
|
||||
write(fd_private, "", 1);
|
||||
|
||||
mmap_shared = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd_shared, 0);
|
||||
mmap_private = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd_private, 0);
|
||||
mmap_anon = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
map_unreadable = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
mmap_anon_shared= mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
||||
|
||||
if (mmap_shared == MAP_FAILED ||
|
||||
mmap_private == MAP_FAILED ||
|
||||
mmap_anon_shared == MAP_FAILED ||
|
||||
mmap_anon == MAP_FAILED ||
|
||||
map_unreadable == MAP_FAILED) {
|
||||
|
||||
perror("mmap failed");
|
||||
goto err;
|
||||
}
|
||||
|
||||
strcpy((char *)mmap_shared, sep);
|
||||
strcpy((char *)mmap_private, sep);
|
||||
strcpy((char *)mmap_anon, sep);
|
||||
strcpy((char *)map_unreadable, sep);
|
||||
strcpy((char *)mmap_anon_shared,sep);
|
||||
|
||||
for (i = 64; i < 128; i++) {
|
||||
((char *)mmap_shared)[i] = 0 + i;
|
||||
((char *)mmap_private)[i] = 64 + i;
|
||||
((char *)mmap_anon)[i] = 128 + i;
|
||||
((char *)map_unreadable)[i] = 190 + i;
|
||||
((char *)mmap_anon_shared)[i] = 0 + i;
|
||||
}
|
||||
|
||||
if (mprotect(map_unreadable, 1024, PROT_NONE)) {
|
||||
perror("mprotect failed");
|
||||
goto err;
|
||||
}
|
||||
|
||||
asm volatile("" ::: "memory");
|
||||
|
||||
fsync(fd_shared);
|
||||
fsync(fd_private);
|
||||
|
||||
sync();
|
||||
asm volatile("" ::: "memory");
|
||||
|
||||
while (1) {
|
||||
printf("ping: %d\n", getpid());
|
||||
// write(pipefd[1], &buf, sizeof(buf));
|
||||
sleep(6);
|
||||
}
|
||||
|
||||
err:
|
||||
/* resources are released by kernel */
|
||||
return 0;
|
||||
}
|
74
testee-threads.c
Normal file
74
testee-threads.c
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* A simple testee program with threads
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
|
||||
|
||||
static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
static int counter;
|
||||
|
||||
static void *f1(void *arg)
|
||||
{
|
||||
void *map_unreadable = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
(void)map_unreadable;
|
||||
|
||||
while (1) {
|
||||
pthread_mutex_lock(&mtx);
|
||||
|
||||
counter++;
|
||||
/* printf("Counter value: %d\n", counter); */
|
||||
|
||||
pthread_mutex_unlock(&mtx);
|
||||
sleep(2);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *f2(void *arg)
|
||||
{
|
||||
void *map_unreadable = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
(void)map_unreadable;
|
||||
|
||||
while (1) {
|
||||
pthread_mutex_lock(&mtx);
|
||||
|
||||
counter++;
|
||||
/* printf("Counter value: %d\n", counter); */
|
||||
|
||||
pthread_mutex_unlock(&mtx);
|
||||
sleep(3);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
pthread_t th1, th2;
|
||||
int rc1, rc2;
|
||||
|
||||
printf("%s pid %d\n", argv[0], getpid());
|
||||
|
||||
rc1 = pthread_create(&th1, NULL, &f1, NULL);
|
||||
rc2 = pthread_create(&th2, NULL, &f2, NULL);
|
||||
|
||||
if (rc1 | rc2)
|
||||
exit(1);
|
||||
|
||||
pthread_join(th1, NULL);
|
||||
pthread_join(th2, NULL);
|
||||
|
||||
exit(0);
|
||||
}
|
92
testee-unlinked.c
Normal file
92
testee-unlinked.c
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* A simple testee program
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sched.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int fd_shared, fd_private;
|
||||
const char data_mark[] = "This is a data_mark marker";
|
||||
void *mmap_shared, *mmap_private, *mmap_anon, *map_unreadable;
|
||||
const char sep[] = "----------";
|
||||
pid_t pid, child;
|
||||
int i;
|
||||
|
||||
printf("%s pid %d\n", argv[0], getpid());
|
||||
|
||||
fd_shared = open("testee-shared.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_shared < 0) {
|
||||
perror("Can't open fd_shared file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
fd_private = open("testee-private.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_private < 0) {
|
||||
perror("Can't open fd_private file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (lseek(fd_shared, 1024, SEEK_SET) == -1 ||
|
||||
lseek(fd_private, 1024, SEEK_SET) == -1) {
|
||||
perror("Can't llsek");
|
||||
goto err;
|
||||
}
|
||||
|
||||
write(fd_shared, "", 1);
|
||||
write(fd_private, "", 1);
|
||||
|
||||
mmap_shared = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd_shared, 0);
|
||||
mmap_private = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd_private, 0);
|
||||
|
||||
if (mmap_shared == MAP_FAILED ||
|
||||
mmap_private == MAP_FAILED) {
|
||||
|
||||
perror("mmap failed");
|
||||
goto err;
|
||||
}
|
||||
|
||||
strcpy((char *)mmap_shared, sep);
|
||||
strcpy((char *)mmap_private, sep);
|
||||
|
||||
for (i = 64; i < 128; i++) {
|
||||
((char *)mmap_shared)[i] = 0 + i;
|
||||
((char *)mmap_private)[i] = 64 + i;
|
||||
}
|
||||
|
||||
fsync(fd_shared);
|
||||
fsync(fd_private);
|
||||
|
||||
close(fd_shared);
|
||||
fsync(fd_private);
|
||||
|
||||
unlink("testee-shared.img");
|
||||
unlink("testee-private.img");
|
||||
|
||||
for (i = 64; i < 128; i++) {
|
||||
((char *)mmap_shared)[i] = 0 + i;
|
||||
((char *)mmap_private)[i] = 64 + i;
|
||||
}
|
||||
|
||||
msync(mmap_shared, 1024, MS_SYNC);
|
||||
msync(mmap_private, 1024, MS_SYNC);
|
||||
|
||||
while (1)
|
||||
sleep(1);
|
||||
|
||||
err:
|
||||
/* resources are released by kernel */
|
||||
return 0;
|
||||
}
|
231
testee.c
Normal file
231
testee.c
Normal file
@ -0,0 +1,231 @@
|
||||
/*
|
||||
* A simple testee program
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sched.h>
|
||||
|
||||
static int do_child(void *arg)
|
||||
{
|
||||
printf("do_child pid: %d\n", getpid());
|
||||
|
||||
void *stack, *mmap_anon;
|
||||
|
||||
stack = mmap(0, 4 * 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
|
||||
if (stack == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
mmap_anon = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_anon == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
while (1)
|
||||
sleep(6);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_clone(void)
|
||||
{
|
||||
pid_t pid = 0;
|
||||
int ret = 0;
|
||||
void *stack, *mmap_anon;
|
||||
|
||||
stack = mmap(0, 4 * 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
|
||||
if (stack == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
mmap_anon = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_anon == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
stack += 4 * 4096;
|
||||
|
||||
ret = clone(do_child, stack, CLONE_FS, NULL, NULL, NULL, &pid);
|
||||
if (ret < 0)
|
||||
perror("Failed clone");
|
||||
|
||||
printf("run_clone: %d stack: %p mmap_anon: %p ret %d\n",
|
||||
pid, stack, mmap_anon, ret);
|
||||
|
||||
if (stack == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
mmap_anon = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_anon == MAP_FAILED)
|
||||
return -1;
|
||||
|
||||
stack += 4 * 4096;
|
||||
|
||||
ret = clone(do_child, stack, CLONE_FS | CLONE_FILES | CLONE_VM, NULL, NULL, NULL, &pid);
|
||||
if (ret < 0)
|
||||
perror("Failed clone");
|
||||
|
||||
printf("run_clone: %d stack: %p mmap_anon: %p ret %d\n",
|
||||
pid, stack, mmap_anon, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// int pipefd[2];
|
||||
int fd_shared, fd_private;
|
||||
const char data_mark[] = "This is a data_mark marker";
|
||||
void *mmap_shared, *mmap_private, *mmap_anon, *map_unreadable;
|
||||
const char sep[] = "----------";
|
||||
pid_t pid, child;
|
||||
char suided_path[128];
|
||||
int i;
|
||||
|
||||
(void)data_mark;
|
||||
|
||||
printf("%s pid %d\n", argv[0], getpid());
|
||||
|
||||
// if (pipe(pipefd)) {
|
||||
// perror("Can't create pipe");
|
||||
// goto err;
|
||||
// }
|
||||
|
||||
fd_shared = open("testee-shared.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_shared < 0) {
|
||||
perror("Can't open fd_shared file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
fd_private = open("testee-private.img", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (fd_private < 0) {
|
||||
perror("Can't open fd_private file");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (lseek(fd_shared, 1024, SEEK_SET) == -1 ||
|
||||
lseek(fd_private, 1024, SEEK_SET) == -1) {
|
||||
perror("Can't llsek");
|
||||
goto err;
|
||||
}
|
||||
|
||||
write(fd_shared, "", 1);
|
||||
write(fd_private, "", 1);
|
||||
|
||||
mmap_shared = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd_shared, 0);
|
||||
mmap_private = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd_private, 0);
|
||||
mmap_anon = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
map_unreadable = mmap(NULL, 1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
if (mmap_shared == MAP_FAILED ||
|
||||
mmap_private == MAP_FAILED ||
|
||||
mmap_anon == MAP_FAILED ||
|
||||
map_unreadable == MAP_FAILED) {
|
||||
|
||||
perror("mmap failed");
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(suided_path, sizeof(suided_path),
|
||||
"/proc/%d/map_files/%lx-%lx",
|
||||
getpid(), (long)mmap_shared,
|
||||
(long)mmap_shared + 0x1000);
|
||||
|
||||
strcpy((char *)mmap_shared, sep);
|
||||
strcpy((char *)mmap_private, sep);
|
||||
strcpy((char *)mmap_anon, sep);
|
||||
strcpy((char *)map_unreadable, sep);
|
||||
|
||||
for (i = 64; i < 128; i++) {
|
||||
((char *)mmap_shared)[i] = 0 + i;
|
||||
((char *)mmap_private)[i] = 64 + i;
|
||||
((char *)mmap_anon)[i] = 128 + i;
|
||||
((char *)map_unreadable)[i] = 190 + i;
|
||||
}
|
||||
|
||||
if (mprotect(map_unreadable, 1024, PROT_NONE)) {
|
||||
perror("mprotect failed");
|
||||
goto err;
|
||||
}
|
||||
|
||||
asm volatile("" ::: "memory");
|
||||
|
||||
fsync(fd_shared);
|
||||
fsync(fd_private);
|
||||
|
||||
close(fd_shared);
|
||||
|
||||
if (argc > 1) {
|
||||
|
||||
printf("my-uid: %d\n", getuid());
|
||||
setuid(atoi(argv[1]));
|
||||
printf("my-uid: %d\n", getuid());
|
||||
}
|
||||
|
||||
fd_shared = open(suided_path, O_RDWR, 0600);
|
||||
printf("fd_shared for O_RDWR: %d\n", fd_shared);
|
||||
if (fd_shared >= 0) {
|
||||
write(fd_shared, "aaaa", sizeof("aaaa"));
|
||||
close(fd_shared);
|
||||
}
|
||||
|
||||
fd_shared = open(suided_path, O_TRUNC, 0600);
|
||||
printf("fd_shared for O_TRUNC: %d\n", fd_shared);
|
||||
if (fd_shared >= 0) {
|
||||
printf("tunc: %d\n", ftruncate(fd_shared, 512));
|
||||
close(fd_shared);
|
||||
}
|
||||
|
||||
fd_shared = open(suided_path, O_RDONLY, 0600);
|
||||
printf("fd_shared for O_RDONLY: %d\n", fd_shared);
|
||||
if (fd_shared >= 0)
|
||||
close(fd_shared);
|
||||
|
||||
sync();
|
||||
asm volatile("" ::: "memory");
|
||||
|
||||
pid = fork();
|
||||
if (pid == -1)
|
||||
goto err;
|
||||
|
||||
if (pid == 0) {
|
||||
long buf;
|
||||
child = fork();
|
||||
if (child == -1)
|
||||
goto err;
|
||||
if (child == 0) {
|
||||
printf("first child pid: %d\n", getpid());
|
||||
// while (read(pipefd[0], &buf, sizeof(buf)) > 0)
|
||||
// sleep(3);
|
||||
while (1) {
|
||||
printf("ping: %d\n", getpid());
|
||||
sleep(8);
|
||||
}
|
||||
} else {
|
||||
printf("first parent pid: %d\n", getpid());
|
||||
// run_clone();
|
||||
while (1) {
|
||||
printf("ping: %d\n", getpid());
|
||||
sleep(9);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
long buf = 0xdeadbeef;
|
||||
while (1) {
|
||||
printf("ping: %d\n", getpid());
|
||||
// write(pipefd[1], &buf, sizeof(buf));
|
||||
sleep(10);
|
||||
}
|
||||
}
|
||||
|
||||
err:
|
||||
/* resources are released by kernel */
|
||||
return 0;
|
||||
}
|
412
util.c
Normal file
412
util.c
Normal file
@ -0,0 +1,412 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
#include "list.h"
|
||||
#include "util.h"
|
||||
|
||||
#include "crtools.h"
|
||||
|
||||
static char big_buffer[PATH_MAX];
|
||||
|
||||
void printk(const char *format, ...)
|
||||
{
|
||||
va_list params;
|
||||
|
||||
va_start(params, format);
|
||||
vfprintf(stdout, format, params);
|
||||
va_end(params);
|
||||
}
|
||||
|
||||
int ptrace_show_area_r(pid_t pid, void *addr, long bytes)
|
||||
{
|
||||
unsigned long w, i;
|
||||
if (bytes & (sizeof(long) - 1))
|
||||
return -1;
|
||||
for (w = 0; w < bytes / sizeof(long); w++) {
|
||||
unsigned long *a = addr;
|
||||
unsigned long v;
|
||||
v = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL);
|
||||
if (v == -1U && errno)
|
||||
goto err;
|
||||
else {
|
||||
unsigned char *c = (unsigned char *)&v;
|
||||
for (i = sizeof(v)/sizeof(*c); i > 0; i--)
|
||||
printk("%02x ", c[i - 1]);
|
||||
printk(" ");
|
||||
}
|
||||
}
|
||||
printk("\n");
|
||||
return 0;
|
||||
err:
|
||||
return -2;
|
||||
}
|
||||
|
||||
int ptrace_show_area(pid_t pid, void *addr, long bytes)
|
||||
{
|
||||
unsigned long w, i;
|
||||
if (bytes & (sizeof(long) - 1))
|
||||
return -1;
|
||||
printk("%016lx: ", (unsigned long)addr);
|
||||
for (w = 0; w < bytes / sizeof(long); w++) {
|
||||
unsigned long *a = addr;
|
||||
unsigned long v;
|
||||
v = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL);
|
||||
if (v == -1U && errno)
|
||||
goto err;
|
||||
else {
|
||||
unsigned char *c = (unsigned char *)&v;
|
||||
for (i = 0; i < sizeof(v)/sizeof(*c); i++)
|
||||
printk("%02x ", c[i]);
|
||||
printk(" ");
|
||||
}
|
||||
}
|
||||
printk("\n");
|
||||
return 0;
|
||||
err:
|
||||
return -2;
|
||||
}
|
||||
|
||||
int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes)
|
||||
{
|
||||
unsigned long w;
|
||||
if (bytes & (sizeof(long) - 1))
|
||||
return -1;
|
||||
for (w = 0; w < bytes / sizeof(long); w++) {
|
||||
unsigned long *d = dst, *a = addr;
|
||||
d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL);
|
||||
if (d[w] == -1U && errno)
|
||||
goto err;
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
return -2;
|
||||
}
|
||||
|
||||
int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes)
|
||||
{
|
||||
unsigned long w;
|
||||
if (bytes & (sizeof(long) - 1))
|
||||
return -1;
|
||||
for (w = 0; w < bytes / sizeof(long); w++) {
|
||||
unsigned long *s = src, *a = addr;
|
||||
if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w]))
|
||||
goto err;
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
return -2;
|
||||
}
|
||||
|
||||
void printk_registers(user_regs_struct_t *regs)
|
||||
{
|
||||
printk("ip : %16lx cs : %16lx ds : %16lx\n"
|
||||
"es : %16lx fs : %16lx gs : %16lx\n"
|
||||
"sp : %16lx ss : %16lx flags : %16lx\n"
|
||||
"ax : %16lx cx : %16lx dx : %16lx\n"
|
||||
"si : %16lx di : %16lx bp : %16lx\n"
|
||||
"bx : %16lx r8 : %16lx r9 : %16lx\n"
|
||||
"r10 : %16lx r11 : %16lx r12 : %16lx\n"
|
||||
"r13 : %16lx r14 : %16lx r15 : %16lx\n"
|
||||
"orig_ax: %16lx fs_base: %16lx gs_base: %16lx\n\n",
|
||||
regs->ip, regs->cs, regs->ds,
|
||||
regs->es, regs->fs, regs->gs,
|
||||
regs->sp, regs->ss, regs->flags,
|
||||
regs->ax, regs->cx, regs->dx,
|
||||
regs->si, regs->di, regs->bp,
|
||||
regs->bx, regs->r8, regs->r9,
|
||||
regs->r10, regs->r11, regs->r12,
|
||||
regs->r13, regs->r14, regs->r15,
|
||||
regs->orig_ax, regs->fs_base, regs->gs_base);
|
||||
}
|
||||
|
||||
void printk_siginfo(siginfo_t *siginfo)
|
||||
{
|
||||
printk("si_signo %d si_errno %d si_code %d\n",
|
||||
siginfo->si_signo, siginfo->si_errno, siginfo->si_code);
|
||||
}
|
||||
|
||||
void printk_vma(struct vma_area *vma_area)
|
||||
{
|
||||
if (!vma_area)
|
||||
return;
|
||||
|
||||
printk("s: %16lx e: %16lx l: %4liK p: %4x f: %4x fd: %4d pid: %4d dev:%02x:%02x:%04lx vf: %s st: %s spc: %s\n",
|
||||
vma_area->vma.start, vma_area->vma.end,
|
||||
(vma_area->vma.end - vma_area->vma.start) >> 10,
|
||||
vma_area->vma.prot,
|
||||
vma_area->vma.flags,
|
||||
vma_area->vma.fd,
|
||||
vma_area->vma.pid,
|
||||
vma_area->vma.dev_maj,
|
||||
vma_area->vma.dev_min,
|
||||
vma_area->vma.ino,
|
||||
vma_area->vm_file_fd < 0 ? "n" : "y",
|
||||
!vma_area->vma.status ? "--" :
|
||||
((vma_area->vma.status & VMA_FILE_PRIVATE) ? "FP" :
|
||||
((vma_area->vma.status & VMA_FILE_SHARED) ? "FS" :
|
||||
((vma_area->vma.status & VMA_ANON_SHARED) ? "AS" :
|
||||
((vma_area->vma.status & VMA_ANON_PRIVATE) ? "AP" : "--")))),
|
||||
!vma_area->vma.status ? "--" :
|
||||
((vma_area->vma.status & VMA_AREA_STACK) ? "stack" :
|
||||
((vma_area->vma.status & VMA_AREA_VSYSCALL) ? "vsyscall" :
|
||||
((vma_area->vma.status & VMA_AREA_VDSO) ? "vdso" : "n"))));
|
||||
}
|
||||
|
||||
int unseize_task(pid_t pid)
|
||||
{
|
||||
return ptrace(PTRACE_DETACH, pid, NULL, NULL);
|
||||
}
|
||||
|
||||
int seize_task(pid_t pid)
|
||||
{
|
||||
siginfo_t si;
|
||||
int status;
|
||||
int ret = 0;
|
||||
|
||||
jerr_rc(ptrace(PTRACE_SEIZE, pid, NULL,
|
||||
(void *)(unsigned long)PTRACE_SEIZE_DEVEL), ret, err);
|
||||
jerr_rc(ptrace(PTRACE_INTERRUPT, pid, NULL, NULL), ret, err);
|
||||
|
||||
ret = -10;
|
||||
if (wait4(pid, &status, __WALL, NULL) != pid)
|
||||
goto err;
|
||||
|
||||
ret = -20;
|
||||
if (!WIFSTOPPED(status))
|
||||
goto err;
|
||||
|
||||
jerr_rc(ptrace(PTRACE_GETSIGINFO, pid, NULL, &si), ret, err_cont);
|
||||
|
||||
ret = -30;
|
||||
if ((si.si_code >> 8) != PTRACE_EVENT_STOP)
|
||||
goto err_cont;
|
||||
|
||||
jerr_rc(ptrace(PTRACE_SETOPTIONS, pid, NULL,
|
||||
(void *)(unsigned long)PTRACE_O_TRACEEXIT), ret, err_cont);
|
||||
|
||||
err:
|
||||
return ret;
|
||||
|
||||
err_cont:
|
||||
continue_task(pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
int reopen_fd_as(int new_fd, int old_fd)
|
||||
{
|
||||
if (old_fd != new_fd) {
|
||||
int tmp = dup2(old_fd, new_fd);
|
||||
if (tmp < 0)
|
||||
return tmp;
|
||||
close(old_fd);
|
||||
}
|
||||
|
||||
return new_fd;
|
||||
}
|
||||
|
||||
int parse_maps(pid_t pid, struct list_head *vma_area_list)
|
||||
{
|
||||
struct vma_area *vma_area = NULL;
|
||||
u64 start, end, pgoff;
|
||||
char map_files_path[64];
|
||||
char maps_path[64];
|
||||
unsigned long ino;
|
||||
char r,w,x,s;
|
||||
int dev_maj, dev_min;
|
||||
int ret = -1;
|
||||
|
||||
DIR *map_files_dir = NULL;
|
||||
FILE *maps = NULL;
|
||||
|
||||
snprintf(maps_path, sizeof(maps_path), "/proc/%d/maps", pid);
|
||||
maps = fopen(maps_path, "r");
|
||||
if (!maps) {
|
||||
pr_perror("Can't open: %s\n", maps_path);
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(map_files_path, sizeof(map_files_path),
|
||||
"/proc/%d/map_files", pid);
|
||||
|
||||
/*
|
||||
* It might be a problem in kernel, either
|
||||
* I'm debugging it on old kernel ;)
|
||||
*/
|
||||
map_files_dir = opendir(map_files_path);
|
||||
if (!map_files_dir)
|
||||
pr_warning("Crap, can't open %s, old kernel?\n",
|
||||
map_files_path);
|
||||
|
||||
while (fgets(big_buffer, sizeof(big_buffer), maps)) {
|
||||
char vma_file_path[16+16+2];
|
||||
struct stat st_buf;
|
||||
|
||||
ret = sscanf(big_buffer, "%lx-%lx %c%c%c%c %lx %02x:%02x %lu",
|
||||
&start, &end, &r, &w, &x, &s, &pgoff, &dev_maj,
|
||||
&dev_min, &ino);
|
||||
if (ret != 10) {
|
||||
pr_error("Can't parse: %s", big_buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
vma_area = alloc_vma_area();
|
||||
if (!vma_area)
|
||||
return -1;
|
||||
|
||||
/* Figure out if it's file mapping */
|
||||
snprintf(vma_file_path, sizeof(vma_file_path), "%lx-%lx", start, end);
|
||||
|
||||
if (map_files_dir) {
|
||||
/*
|
||||
* Note that we "open" it in dumper process space
|
||||
* so later we might refer to it via /proc/self/fd/vm_file_fd
|
||||
* if needed.
|
||||
*/
|
||||
vma_area->vm_file_fd = openat(dirfd(map_files_dir),
|
||||
vma_file_path, O_RDONLY);
|
||||
if (vma_area->vm_file_fd < 0) {
|
||||
if (errno != ENOENT) {
|
||||
pr_perror("Failed opening %s/%s\n",
|
||||
map_files_path,
|
||||
vma_file_path);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vma_area->vma.pid = pid;
|
||||
vma_area->vma.start = start;
|
||||
vma_area->vma.end = end;
|
||||
vma_area->vma.pgoff = pgoff;
|
||||
|
||||
vma_area->vma.ino = ino;
|
||||
vma_area->vma.dev_maj = dev_maj;
|
||||
vma_area->vma.dev_min = dev_min;
|
||||
|
||||
vma_area->vma.prot = PROT_NONE;
|
||||
|
||||
if (r == 'r')
|
||||
vma_area->vma.prot |= PROT_READ;
|
||||
if (w == 'w')
|
||||
vma_area->vma.prot |= PROT_WRITE;
|
||||
if (x == 'x')
|
||||
vma_area->vma.prot |= PROT_EXEC;
|
||||
|
||||
if (s == 's')
|
||||
vma_area->vma.flags = MAP_SHARED;
|
||||
else if (s == 'p')
|
||||
vma_area->vma.flags = MAP_PRIVATE;
|
||||
|
||||
vma_area->vma.status = 0;
|
||||
|
||||
if (strstr(big_buffer, "[stack]"))
|
||||
vma_area->vma.status |= VMA_AREA_REGULAR | VMA_AREA_STACK;
|
||||
else if (strstr(big_buffer, "[vsyscall]"))
|
||||
vma_area->vma.status |= VMA_AREA_VSYSCALL;
|
||||
else if (strstr(big_buffer, "[vdso]"))
|
||||
vma_area->vma.status |= VMA_AREA_VDSO;
|
||||
else if (strstr(big_buffer, "[heap]"))
|
||||
vma_area->vma.status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
|
||||
else
|
||||
vma_area->vma.status = VMA_AREA_REGULAR;
|
||||
|
||||
/*
|
||||
* Some mapping hints for restore, we save this on
|
||||
* disk and restore might need to analyze it.
|
||||
*/
|
||||
if (vma_area->vm_file_fd >= 0) {
|
||||
|
||||
if (fstat(vma_area->vm_file_fd, &st_buf) < 0) {
|
||||
pr_perror("Failed fstat on %s%s\n",
|
||||
map_files_path,
|
||||
vma_file_path);
|
||||
goto err;
|
||||
}
|
||||
if (!S_ISREG(st_buf.st_mode)) {
|
||||
pr_error("Can't handle non-regular "
|
||||
"mapping on %s%s\n",
|
||||
map_files_path,
|
||||
vma_file_path);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* /dev/zero stands for anon-shared mapping
|
||||
* otherwise it's some file mapping.
|
||||
*/
|
||||
if (MAJOR(st_buf.st_dev) == 0) {
|
||||
if (!(vma_area->vma.flags & MAP_SHARED))
|
||||
goto err_bogus_mapping;
|
||||
vma_area->vma.status |= VMA_ANON_SHARED;
|
||||
vma_area->shmid = st_buf.st_ino;
|
||||
} else {
|
||||
if (vma_area->vma.flags & MAP_PRIVATE)
|
||||
vma_area->vma.status |= VMA_FILE_PRIVATE;
|
||||
else
|
||||
vma_area->vma.status |= VMA_FILE_SHARED;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* No file but mapping -- anonymous one.
|
||||
*/
|
||||
if (vma_area->vma.flags & MAP_SHARED)
|
||||
goto err_bogus_mapping;
|
||||
else
|
||||
vma_area->vma.status |= VMA_ANON_PRIVATE;
|
||||
}
|
||||
|
||||
list_add_tail(&vma_area->list, vma_area_list);
|
||||
}
|
||||
|
||||
vma_area = NULL;
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
if (maps)
|
||||
fclose(maps);
|
||||
|
||||
if (map_files_dir)
|
||||
closedir(map_files_dir);
|
||||
|
||||
xfree(vma_area);
|
||||
return ret;
|
||||
|
||||
err_bogus_mapping:
|
||||
pr_error("Bogus mapping %lx-%lx\n",
|
||||
vma_area->vma.start,
|
||||
vma_area->vma.end);
|
||||
goto err;
|
||||
}
|
562
xemul/0003-Image-dumping-via-proc-file.patch
Normal file
562
xemul/0003-Image-dumping-via-proc-file.patch
Normal file
@ -0,0 +1,562 @@
|
||||
From f7e9d28188e7e2fd0f13f2696f29f20d784cb8fd Mon Sep 17 00:00:00 2001
|
||||
From: root <root@ovzept.sw.ru>
|
||||
Date: Fri, 3 Jun 2011 18:16:10 +0400
|
||||
Subject: [PATCH] Image dumping via proc file
|
||||
|
||||
---
|
||||
fs/proc/Kconfig | 8
|
||||
fs/proc/Makefile | 1
|
||||
fs/proc/base.c | 3
|
||||
fs/proc/img_dump.c | 397 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/binfmt_img.h | 87 +++++++++
|
||||
include/linux/proc_fs.h | 2
|
||||
6 files changed, 498 insertions(+)
|
||||
create mode 100644 fs/proc/img_dump.c
|
||||
create mode 100644 include/linux/binfmt_img.h
|
||||
|
||||
Index: linux-2.6.git/fs/proc/Kconfig
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/Kconfig
|
||||
+++ linux-2.6.git/fs/proc/Kconfig
|
||||
@@ -67,3 +67,11 @@ config PROC_PAGE_MONITOR
|
||||
/proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
|
||||
/proc/kpagecount, and /proc/kpageflags. Disabling these
|
||||
interfaces will reduce the size of the kernel by approximately 4kb.
|
||||
+
|
||||
+config PROC_IMG
|
||||
+ default y
|
||||
+ depends on PROC_FS
|
||||
+ bool "Enable /proc/<pid>/dump file"
|
||||
+ help
|
||||
+ Say Y here if you want to be able to produce checkpoint-restore images
|
||||
+ for tasks via proc
|
||||
Index: linux-2.6.git/fs/proc/Makefile
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/Makefile
|
||||
+++ linux-2.6.git/fs/proc/Makefile
|
||||
@@ -28,3 +28,4 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o
|
||||
proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
|
||||
proc-$(CONFIG_PRINTK) += kmsg.o
|
||||
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
|
||||
+proc-$(CONFIG_PROC_IMG) += img_dump.o
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -2983,6 +2983,9 @@ static const struct pid_entry tgid_base_
|
||||
#endif
|
||||
INF("cmdline", S_IRUGO, proc_pid_cmdline),
|
||||
ONE("stat", S_IRUGO, proc_tgid_stat),
|
||||
+#ifdef CONFIG_PROC_IMG
|
||||
+ REG("dump", S_IRUSR|S_IWUSR, proc_pid_dump_operations),
|
||||
+#endif
|
||||
ONE("statm", S_IRUGO, proc_pid_statm),
|
||||
REG("maps", S_IRUGO, proc_maps_operations),
|
||||
#ifdef CONFIG_NUMA
|
||||
Index: linux-2.6.git/fs/proc/img_dump.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/fs/proc/img_dump.c
|
||||
@@ -0,0 +1,397 @@
|
||||
+#include <linux/proc_fs.h>
|
||||
+#include <linux/sched.h>
|
||||
+#include <linux/uaccess.h>
|
||||
+#include <linux/binfmt_img.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/highmem.h>
|
||||
+#include <linux/types.h>
|
||||
+#include "internal.h"
|
||||
+
|
||||
+static int img_dump_buffer(char __user *ubuf, size_t size, void *buf, int len, int pos)
|
||||
+{
|
||||
+ int ret;
|
||||
+ static size_t dumped = 0;
|
||||
+
|
||||
+ len -= pos;
|
||||
+ if (len > size)
|
||||
+ len = size;
|
||||
+
|
||||
+ ret = copy_to_user(ubuf, buf + pos, len);
|
||||
+ if (ret)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ dumped += len;
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static int img_dump_header(char __user *buf, size_t size, int pos)
|
||||
+{
|
||||
+ struct binfmt_img_header hdr;
|
||||
+
|
||||
+ hdr.magic = BINFMT_IMG_MAGIC;
|
||||
+ hdr.version = BINFMT_IMG_VERS_0;
|
||||
+
|
||||
+ return img_dump_buffer(buf, size, &hdr, sizeof(hdr), pos);
|
||||
+}
|
||||
+
|
||||
+static __u16 encode_segment(unsigned short seg)
|
||||
+{
|
||||
+ if (seg == 0)
|
||||
+ return CKPT_X86_SEG_NULL;
|
||||
+ BUG_ON((seg & 3) != 3);
|
||||
+
|
||||
+ if (seg == __USER_CS)
|
||||
+ return CKPT_X86_SEG_USER64_CS;
|
||||
+ if (seg == __USER_DS)
|
||||
+ return CKPT_X86_SEG_USER64_DS;
|
||||
+#ifdef CONFIG_COMPAT
|
||||
+ if (seg == __USER32_CS)
|
||||
+ return CKPT_X86_SEG_USER32_CS;
|
||||
+ if (seg == __USER32_DS)
|
||||
+ return CKPT_X86_SEG_USER32_DS;
|
||||
+#endif
|
||||
+
|
||||
+ if (seg & 4)
|
||||
+ return CKPT_X86_SEG_LDT | (seg >> 3);
|
||||
+
|
||||
+ seg >>= 3;
|
||||
+ if (GDT_ENTRY_TLS_MIN <= seg && seg <= GDT_ENTRY_TLS_MAX)
|
||||
+ return CKPT_X86_SEG_TLS | (seg - GDT_ENTRY_TLS_MIN);
|
||||
+
|
||||
+ printk(KERN_ERR "c/r: (decode) bad segment %#hx\n", seg);
|
||||
+ BUG();
|
||||
+}
|
||||
+
|
||||
+static __u64 encode_tls(struct desc_struct *d)
|
||||
+{
|
||||
+ return ((__u64)d->a << 32) + d->b;
|
||||
+}
|
||||
+
|
||||
+static int img_dump_regs(struct task_struct *p, char __user *buf, size_t size, int pos)
|
||||
+{
|
||||
+ struct binfmt_regs_image regi;
|
||||
+ struct pt_regs *regs;
|
||||
+ int i;
|
||||
+
|
||||
+ regs = task_pt_regs(p);
|
||||
+
|
||||
+ regi.r15 = regs->r15;
|
||||
+ regi.r14 = regs->r14;
|
||||
+ regi.r13 = regs->r13;
|
||||
+ regi.r12 = regs->r12;
|
||||
+ regi.r11 = regs->r11;
|
||||
+ regi.r10 = regs->r10;
|
||||
+ regi.r9 = regs->r9;
|
||||
+ regi.r8 = regs->r8;
|
||||
+ regi.ax = regs->ax;
|
||||
+ regi.orig_ax = regs->orig_ax;
|
||||
+ regi.bx = regs->bx;
|
||||
+ regi.cx = regs->cx;
|
||||
+ regi.dx = regs->dx;
|
||||
+ regi.si = regs->si;
|
||||
+ regi.di = regs->di;
|
||||
+ regi.ip = regs->ip;
|
||||
+ regi.flags = regs->flags;
|
||||
+ regi.bp = regs->bp;
|
||||
+ regi.sp = regs->sp;
|
||||
+
|
||||
+ /* segments */
|
||||
+ regi.gsindex = encode_segment(p->thread.gsindex);
|
||||
+ regi.fsindex = encode_segment(p->thread.fsindex);
|
||||
+ regi.cs = encode_segment(regs->cs);
|
||||
+ regi.ss = encode_segment(regs->ss);
|
||||
+ regi.ds = encode_segment(p->thread.ds);
|
||||
+ regi.es = encode_segment(p->thread.es);
|
||||
+
|
||||
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
+ regi.tls[i] = encode_tls(&p->thread.tls_array[i]);
|
||||
+
|
||||
+ if (p->thread.gsindex)
|
||||
+ regi.gs = 0;
|
||||
+ else
|
||||
+ regi.gs = p->thread.gs;
|
||||
+
|
||||
+ if (p->thread.fsindex)
|
||||
+ regi.fs = 0;
|
||||
+ else
|
||||
+ regi.fs = p->thread.fs;
|
||||
+
|
||||
+ return img_dump_buffer(buf, size, ®i, sizeof(regi), pos);
|
||||
+}
|
||||
+
|
||||
+static int img_dump_mm(struct mm_struct *mm, char __user *buf, size_t size, int pos)
|
||||
+{
|
||||
+ struct binfmt_mm_image mmi;
|
||||
+
|
||||
+ mmi.flags = mm->flags;
|
||||
+ mmi.def_flags = mm->def_flags;
|
||||
+ mmi.start_code = mm->start_code;
|
||||
+ mmi.end_code = mm->end_code;
|
||||
+ mmi.start_data = mm->start_data;
|
||||
+ mmi.end_data = mm->end_data;
|
||||
+ mmi.start_brk = mm->start_brk;
|
||||
+ mmi.brk = mm->brk;
|
||||
+ mmi.start_stack = mm->start_stack;
|
||||
+ mmi.arg_start = mm->arg_start;
|
||||
+ mmi.arg_end = mm->arg_end;
|
||||
+ mmi.env_start = mm->env_start;
|
||||
+ mmi.env_end = mm->env_end;
|
||||
+ mmi.exe_fd = 0;
|
||||
+
|
||||
+ return img_dump_buffer(buf, size, &mmi, sizeof(mmi), pos);
|
||||
+}
|
||||
+
|
||||
+static int img_dump_vma(struct vm_area_struct *vma, char __user *buf, size_t size, int pos)
|
||||
+{
|
||||
+ struct binfmt_vma_image vmai;
|
||||
+
|
||||
+ if (vma == NULL) {
|
||||
+ memset(&vmai, 0, sizeof(vmai));
|
||||
+ goto dumpit;
|
||||
+ }
|
||||
+
|
||||
+ printk("Dumping vma %016lx-%016lx %p/%p\n", vma->vm_start, vma->vm_end, vma, vma->vm_mm);
|
||||
+
|
||||
+ vmai.fd = 0;
|
||||
+ vmai.prot = 0;
|
||||
+ if (vma->vm_flags & VM_READ)
|
||||
+ vmai.prot |= PROT_READ;
|
||||
+ if (vma->vm_flags & VM_WRITE)
|
||||
+ vmai.prot |= PROT_WRITE;
|
||||
+ if (vma->vm_flags & VM_EXEC)
|
||||
+ vmai.prot |= PROT_EXEC;
|
||||
+
|
||||
+ vmai.flags = 0;
|
||||
+ if (vma->vm_file == NULL)
|
||||
+ vmai.flags |= MAP_ANONYMOUS;
|
||||
+ if (vma->vm_flags & VM_MAYSHARE)
|
||||
+ vmai.flags |= MAP_SHARED;
|
||||
+ else
|
||||
+ vmai.flags |= MAP_PRIVATE;
|
||||
+
|
||||
+ vmai.start = vma->vm_start;
|
||||
+ vmai.end = vma->vm_end;
|
||||
+ vmai.pgoff = vma->vm_pgoff;
|
||||
+
|
||||
+dumpit:
|
||||
+ return img_dump_buffer(buf, size, &vmai, sizeof(vmai), pos);
|
||||
+}
|
||||
+
|
||||
+static int img_dump_page(unsigned long addr, void *data, char __user *buf, size_t size, int pos)
|
||||
+{
|
||||
+ struct binfmt_page_image pgi;
|
||||
+ int ret = 0, tmp;
|
||||
+
|
||||
+ pgi.vaddr = addr;
|
||||
+
|
||||
+ if (pos < sizeof(pgi)) {
|
||||
+ tmp = img_dump_buffer(buf, size, &pgi, sizeof(pgi), pos);
|
||||
+ if (tmp < 0)
|
||||
+ return tmp;
|
||||
+
|
||||
+ ret = tmp;
|
||||
+ if (size <= ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ buf += ret;
|
||||
+ size -= ret;
|
||||
+ pos = 0;
|
||||
+ } else
|
||||
+ pos -= sizeof(pgi);
|
||||
+
|
||||
+ tmp = img_dump_buffer(buf, size, data, PAGE_SIZE, pos);
|
||||
+ if (tmp < 0)
|
||||
+ return tmp;
|
||||
+
|
||||
+ return ret + tmp;
|
||||
+}
|
||||
+
|
||||
+static inline int is_private_vma(struct vm_area_struct *vma)
|
||||
+{
|
||||
+ if (vma->vm_file == NULL)
|
||||
+ return 1;
|
||||
+ if (!(vma->vm_flags & VM_SHARED))
|
||||
+ return 1;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static ssize_t do_produce_dump(struct task_struct *p, char __user *buf,
|
||||
+ size_t size, loff_t *ppos)
|
||||
+{
|
||||
+ size_t img_pos = 0, img_ppos;
|
||||
+ size_t produced = 0;
|
||||
+ int len;
|
||||
+ loff_t pos = *ppos;
|
||||
+ struct mm_struct *mm;
|
||||
+ struct vm_area_struct *vma;
|
||||
+
|
||||
+#define move_pos(); do { \
|
||||
+ buf += len; \
|
||||
+ produced += len;\
|
||||
+ size -= len; \
|
||||
+ pos += len; \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define seek_pos(__size); do { \
|
||||
+ img_ppos = img_pos; \
|
||||
+ img_pos += (__size); \
|
||||
+ } while (0)
|
||||
+
|
||||
+ /* header */
|
||||
+ seek_pos(sizeof(struct binfmt_img_header));
|
||||
+ if (pos < img_pos) {
|
||||
+ len = img_dump_header(buf, size, pos - img_ppos);
|
||||
+ if (len < 0)
|
||||
+ goto err;
|
||||
+
|
||||
+ move_pos();
|
||||
+ if (size == 0)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /* registers */
|
||||
+ seek_pos(sizeof(struct binfmt_regs_image));
|
||||
+ if (pos < img_pos) {
|
||||
+ len = img_dump_regs(p, buf, size, pos - img_ppos);
|
||||
+ if (len < 0)
|
||||
+ goto err;
|
||||
+
|
||||
+ move_pos();
|
||||
+ if (size == 0)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /* memory */
|
||||
+ mm = get_task_mm(p);
|
||||
+ if (mm == NULL)
|
||||
+ return -EACCES;
|
||||
+
|
||||
+ down_read(&mm->mmap_sem);
|
||||
+
|
||||
+ seek_pos(sizeof(struct binfmt_mm_image));
|
||||
+ if (pos < img_pos) {
|
||||
+ len = img_dump_mm(mm, buf, size, pos - img_ppos);
|
||||
+ if (len < 0)
|
||||
+ goto err_mm;
|
||||
+
|
||||
+ move_pos();
|
||||
+ if (size == 0)
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+
|
||||
+ vma = mm->mmap;
|
||||
+ while (1) {
|
||||
+ seek_pos(sizeof(struct binfmt_vma_image));
|
||||
+ if (pos < img_pos) {
|
||||
+ len = img_dump_vma(vma, buf, size, pos - img_ppos);
|
||||
+ if (len < 0)
|
||||
+ goto err_mm;
|
||||
+
|
||||
+ move_pos();
|
||||
+ if (size == 0)
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+
|
||||
+ if (vma == NULL)
|
||||
+ break;
|
||||
+
|
||||
+ vma = vma->vm_next;
|
||||
+ }
|
||||
+
|
||||
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
|
||||
+ /* slow and stupid */
|
||||
+ unsigned long addr;
|
||||
+ struct page *page;
|
||||
+ void *pg_data;
|
||||
+
|
||||
+ if (!is_private_vma(vma))
|
||||
+ continue;
|
||||
+
|
||||
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
|
||||
+ page = follow_page(vma, addr, FOLL_FORCE | FOLL_DUMP | FOLL_GET);
|
||||
+ if (page == NULL)
|
||||
+ continue;
|
||||
+ if (IS_ERR(page)) /* huh? */
|
||||
+ continue;
|
||||
+
|
||||
+ seek_pos(sizeof(struct binfmt_page_image) + PAGE_SIZE);
|
||||
+ if (pos < img_pos) {
|
||||
+ pg_data = kmap(page);
|
||||
+ len = img_dump_page(addr, pg_data, buf, size, pos - img_ppos);
|
||||
+ kunmap(page);
|
||||
+
|
||||
+ if (len < 0) {
|
||||
+ put_page(page);
|
||||
+ goto err_mm;
|
||||
+ }
|
||||
+
|
||||
+ move_pos();
|
||||
+ if (size == 0) {
|
||||
+ put_page(page);
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ put_page(page);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ seek_pos(sizeof(struct binfmt_page_image));
|
||||
+ if (pos < img_pos) {
|
||||
+ struct binfmt_page_image zero;
|
||||
+
|
||||
+ memset(&zero, 0, sizeof(zero));
|
||||
+ len = img_dump_buffer(buf, size, &zero, sizeof(zero), pos - img_ppos);
|
||||
+ if (len < 0)
|
||||
+ goto err;
|
||||
+
|
||||
+ move_pos();
|
||||
+ }
|
||||
+
|
||||
+out_mm:
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+ mmput(mm);
|
||||
+out:
|
||||
+ *ppos = pos;
|
||||
+ return produced;
|
||||
+
|
||||
+err_mm:
|
||||
+ up_read(&mm->mmap_sem);
|
||||
+ mmput(mm);
|
||||
+err:
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static ssize_t img_dump_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
||||
+{
|
||||
+ struct task_struct *p;
|
||||
+
|
||||
+ p = get_proc_task(file->f_dentry->d_inode);
|
||||
+ if (p == NULL)
|
||||
+ return -ESRCH;
|
||||
+
|
||||
+ if (!(p->state & TASK_STOPPED)) {
|
||||
+ put_task_struct(p);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ return do_produce_dump(p, buf, size, ppos);
|
||||
+}
|
||||
+
|
||||
+static int img_dump_open(struct inode *inode, struct file *filp)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int img_dump_release(struct inode *inode, struct file *filp)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+const struct file_operations proc_pid_dump_operations = {
|
||||
+ .open = img_dump_open,
|
||||
+ .read = img_dump_read,
|
||||
+ .release = img_dump_release,
|
||||
+};
|
||||
Index: linux-2.6.git/include/linux/binfmt_img.h
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/include/linux/binfmt_img.h
|
||||
@@ -0,0 +1,87 @@
|
||||
+#ifndef __BINFMT_IMG_H__
|
||||
+#define __BINFMT_IMG_H__
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+
|
||||
+struct binfmt_img_header {
|
||||
+ __u32 magic;
|
||||
+ __u32 version;
|
||||
+};
|
||||
+
|
||||
+#define CKPT_TLS_ENTRIES 3
|
||||
+
|
||||
+struct binfmt_regs_image {
|
||||
+ __u64 r15;
|
||||
+ __u64 r14;
|
||||
+ __u64 r13;
|
||||
+ __u64 r12;
|
||||
+ __u64 r11;
|
||||
+ __u64 r10;
|
||||
+ __u64 r9;
|
||||
+ __u64 r8;
|
||||
+ __u64 ax;
|
||||
+ __u64 orig_ax;
|
||||
+ __u64 bx;
|
||||
+ __u64 cx;
|
||||
+ __u64 dx;
|
||||
+ __u64 si;
|
||||
+ __u64 di;
|
||||
+ __u64 ip;
|
||||
+ __u64 flags;
|
||||
+ __u64 bp;
|
||||
+ __u64 sp;
|
||||
+
|
||||
+ __u64 gs;
|
||||
+ __u64 fs;
|
||||
+ __u64 tls[CKPT_TLS_ENTRIES];
|
||||
+ __u16 gsindex;
|
||||
+ __u16 fsindex;
|
||||
+ __u16 cs;
|
||||
+ __u16 ss;
|
||||
+ __u16 ds;
|
||||
+ __u16 es;
|
||||
+};
|
||||
+
|
||||
+#define CKPT_X86_SEG_NULL 0
|
||||
+#define CKPT_X86_SEG_USER32_CS 1
|
||||
+#define CKPT_X86_SEG_USER32_DS 2
|
||||
+#define CKPT_X86_SEG_USER64_CS 3
|
||||
+#define CKPT_X86_SEG_USER64_DS 4
|
||||
+#define CKPT_X86_SEG_TLS 0x4000
|
||||
+#define CKPT_X86_SEG_LDT 0x8000
|
||||
+
|
||||
+struct binfmt_mm_image {
|
||||
+ __u64 flags;
|
||||
+ __u64 def_flags;
|
||||
+ __u64 start_code;
|
||||
+ __u64 end_code;
|
||||
+ __u64 start_data;
|
||||
+ __u64 end_data;
|
||||
+ __u64 start_brk;
|
||||
+ __u64 brk;
|
||||
+ __u64 start_stack;
|
||||
+ __u64 arg_start;
|
||||
+ __u64 arg_end;
|
||||
+ __u64 env_start;
|
||||
+ __u64 env_end;
|
||||
+ __u32 exe_fd;
|
||||
+};
|
||||
+
|
||||
+struct binfmt_vma_image {
|
||||
+ __u32 prot;
|
||||
+ __u32 flags;
|
||||
+ __u32 pad;
|
||||
+ __u32 fd;
|
||||
+ __u64 start;
|
||||
+ __u64 end;
|
||||
+ __u64 pgoff;
|
||||
+};
|
||||
+
|
||||
+struct binfmt_page_image {
|
||||
+ __u64 vaddr;
|
||||
+};
|
||||
+
|
||||
+#define BINFMT_IMG_MAGIC 0xa75b8d43
|
||||
+#define BINFMT_IMG_VERS_0 0x00000100
|
||||
+
|
||||
+#endif
|
||||
Index: linux-2.6.git/include/linux/proc_fs.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/proc_fs.h
|
||||
+++ linux-2.6.git/include/linux/proc_fs.h
|
||||
@@ -102,6 +102,8 @@ struct vmcore {
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
+extern const struct file_operations proc_pid_dump_operations;
|
||||
+
|
||||
extern void proc_root_init(void);
|
||||
|
||||
void proc_flush_task(struct task_struct *task);
|
371
xemul/0004-Images-execution-binfmt-handler.patch
Normal file
371
xemul/0004-Images-execution-binfmt-handler.patch
Normal file
@ -0,0 +1,371 @@
|
||||
From 0f8e07457aa91e9461665440ca258eb9f93bf2f9 Mon Sep 17 00:00:00 2001
|
||||
From: root <root@ovzept.sw.ru>
|
||||
Date: Fri, 3 Jun 2011 18:16:43 +0400
|
||||
Subject: [PATCH] Images execution binfmt handler
|
||||
|
||||
---
|
||||
fs/Kconfig.binfmt | 6 +
|
||||
fs/Makefile | 1 +
|
||||
fs/binfmt_img.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 331 insertions(+), 0 deletions(-)
|
||||
create mode 100644 fs/binfmt_img.c
|
||||
|
||||
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
|
||||
index 79e2ca7..0b2f48e 100644
|
||||
--- a/fs/Kconfig.binfmt
|
||||
+++ b/fs/Kconfig.binfmt
|
||||
@@ -161,3 +161,9 @@ config BINFMT_MISC
|
||||
You may say M here for module support and later load the module when
|
||||
you have use for it; the module is called binfmt_misc. If you
|
||||
don't know what to answer at this point, say Y.
|
||||
+
|
||||
+config BINFMT_IMG
|
||||
+ tristate "Kernel support for IMG binaries"
|
||||
+ depends on X86
|
||||
+ help
|
||||
+ Say M/Y here to enable support for checkpoint-restore images execution
|
||||
diff --git a/fs/Makefile b/fs/Makefile
|
||||
index fb68c2b..8221719 100644
|
||||
--- a/fs/Makefile
|
||||
+++ b/fs/Makefile
|
||||
@@ -33,6 +33,7 @@ obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
|
||||
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
|
||||
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
|
||||
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
|
||||
+obj-$(CONFIG_BINFMT_IMG) += binfmt_img.o
|
||||
|
||||
# binfmt_script is always there
|
||||
obj-y += binfmt_script.o
|
||||
diff --git a/fs/binfmt_img.c b/fs/binfmt_img.c
|
||||
new file mode 100644
|
||||
index 0000000..9b09797
|
||||
--- /dev/null
|
||||
+++ b/fs/binfmt_img.c
|
||||
@@ -0,0 +1,324 @@
|
||||
+#include <linux/binfmt_img.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/binfmts.h>
|
||||
+#include <linux/sched.h>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/file.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/highmem.h>
|
||||
+#include <asm/tlbflush.h>
|
||||
+#include <asm/desc.h>
|
||||
+
|
||||
+/*
|
||||
+ * The binary handler to save and restore a single task state
|
||||
+ */
|
||||
+
|
||||
+static int img_check_header(void *buf)
|
||||
+{
|
||||
+ struct binfmt_img_header *hdr = buf;
|
||||
+
|
||||
+ if (hdr->magic != BINFMT_IMG_MAGIC)
|
||||
+ return -ENOEXEC;
|
||||
+
|
||||
+ if (hdr->version != BINFMT_IMG_VERS_0)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return sizeof(*hdr);
|
||||
+}
|
||||
+
|
||||
+static unsigned short decode_segment(__u16 seg)
|
||||
+{
|
||||
+ if (seg == CKPT_X86_SEG_NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (seg == CKPT_X86_SEG_USER64_CS)
|
||||
+ return __USER_CS;
|
||||
+ if (seg == CKPT_X86_SEG_USER64_DS)
|
||||
+ return __USER_DS;
|
||||
+#ifdef CONFIG_COMPAT
|
||||
+ if (seg == CKPT_X86_SEG_USER32_CS)
|
||||
+ return __USER32_CS;
|
||||
+ if (seg == CKPT_X86_SEG_USER32_DS)
|
||||
+ return __USER32_DS;
|
||||
+#endif
|
||||
+
|
||||
+ if (seg & CKPT_X86_SEG_TLS) {
|
||||
+ seg &= ~CKPT_X86_SEG_TLS;
|
||||
+ return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
|
||||
+ }
|
||||
+ if (seg & CKPT_X86_SEG_LDT) {
|
||||
+ seg &= ~CKPT_X86_SEG_LDT;
|
||||
+ return (seg << 3) | 7;
|
||||
+ }
|
||||
+ BUG();
|
||||
+}
|
||||
+
|
||||
+static void decode_tls(struct desc_struct *d, __u64 val)
|
||||
+{
|
||||
+ d->a = (unsigned int)(val >> 32);
|
||||
+ d->b = (unsigned int)(val & 0xFFFFFFFF);
|
||||
+}
|
||||
+
|
||||
+static int img_restore_regs(struct linux_binprm *bprm, loff_t off, struct pt_regs *regs)
|
||||
+{
|
||||
+ int ret, i;
|
||||
+ struct binfmt_regs_image regi;
|
||||
+ struct thread_struct *th = ¤t->thread;
|
||||
+ unsigned short seg;
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)®i, sizeof(regi));
|
||||
+ if (ret != sizeof(regi))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ regs->r15 = regi.r15;
|
||||
+ regs->r14 = regi.r14;
|
||||
+ regs->r13 = regi.r13;
|
||||
+ regs->r12 = regi.r12;
|
||||
+ regs->r11 = regi.r11;
|
||||
+ regs->r10 = regi.r10;
|
||||
+ regs->r9 = regi.r9;
|
||||
+ regs->r8 = regi.r8;
|
||||
+ regs->ax = regi.ax;
|
||||
+ regs->orig_ax = regi.orig_ax;
|
||||
+ regs->bx = regi.bx;
|
||||
+ regs->cx = regi.cx;
|
||||
+ regs->dx = regi.dx;
|
||||
+ regs->si = regi.si;
|
||||
+ regs->di = regi.di;
|
||||
+ regs->ip = regi.ip;
|
||||
+ regs->flags = regi.flags;
|
||||
+ regs->bp = regi.bp;
|
||||
+ regs->sp = regi.sp;
|
||||
+
|
||||
+ regs->cs = decode_segment(regi.cs);
|
||||
+ regs->ss = decode_segment(regi.ss);
|
||||
+
|
||||
+ th->usersp = regi.sp;
|
||||
+ th->ds = decode_segment(regi.ds);
|
||||
+ th->es = decode_segment(regi.es);
|
||||
+ th->fsindex = decode_segment(regi.fsindex);
|
||||
+ th->gsindex = decode_segment(regi.gsindex);
|
||||
+
|
||||
+ th->fs = regi.fs;
|
||||
+ th->gs = regi.gs;
|
||||
+
|
||||
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
+ decode_tls(&th->tls_array[i], regi.tls[i]);
|
||||
+
|
||||
+ load_TLS(th, smp_processor_id());
|
||||
+
|
||||
+ seg = th->fsindex;
|
||||
+ loadsegment(fs, seg);
|
||||
+ savesegment(fs, seg);
|
||||
+ if (seg != th->fsindex) {
|
||||
+ printk("ERROR saving fs selector want %x, has %x\n",
|
||||
+ (unsigned int)th->fsindex, (unsigned int)seg);
|
||||
+ return -EFAULT;
|
||||
+ }
|
||||
+
|
||||
+ if (th->fs)
|
||||
+ wrmsrl(MSR_FS_BASE, th->fs);
|
||||
+ load_gs_index(th->gsindex);
|
||||
+ if (th->gs)
|
||||
+ wrmsrl(MSR_KERNEL_GS_BASE, th->gs);
|
||||
+
|
||||
+ return sizeof(regi);
|
||||
+}
|
||||
+
|
||||
+static int img_restore_mm(struct linux_binprm *bprm, loff_t off)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct binfmt_mm_image mmi;
|
||||
+ struct mm_struct *mm = current->mm;
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)&mmi, sizeof(mmi));
|
||||
+ if (ret != sizeof(mmi))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ mm->flags = mmi.flags;
|
||||
+ mm->def_flags = mmi.def_flags;
|
||||
+ mm->start_code = mmi.start_code;
|
||||
+ mm->end_code = mmi.end_code;
|
||||
+ mm->start_data = mmi.start_data;
|
||||
+ mm->end_data = mmi.end_data;
|
||||
+ mm->start_brk = mmi.start_brk;
|
||||
+ mm->brk = mmi.brk;
|
||||
+ mm->start_stack = mmi.start_stack;
|
||||
+ mm->arg_start = mmi.arg_start;
|
||||
+ mm->arg_end = mmi.arg_end;
|
||||
+ mm->env_start = mmi.env_start;
|
||||
+ mm->env_end = mmi.env_end;
|
||||
+
|
||||
+ if (mmi.exe_fd != 0) {
|
||||
+ struct file *f;
|
||||
+
|
||||
+ f = fget(mmi.exe_fd);
|
||||
+ if (f == NULL)
|
||||
+ return -EBADF;
|
||||
+
|
||||
+ fput(mm->exe_file);
|
||||
+ mm->exe_file = f;
|
||||
+ }
|
||||
+
|
||||
+ return sizeof(mmi);
|
||||
+}
|
||||
+
|
||||
+static int img_restore_vmas(struct linux_binprm *bprm, loff_t off)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct mm_struct *mm = current->mm;
|
||||
+ int len = 0;
|
||||
+
|
||||
+ do_munmap(mm, 0, TASK_SIZE);
|
||||
+
|
||||
+ while (1) {
|
||||
+ struct binfmt_vma_image vmai;
|
||||
+ unsigned long addr;
|
||||
+ struct file *file = NULL;
|
||||
+
|
||||
+ len += sizeof(vmai);
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)&vmai, sizeof(vmai));
|
||||
+ if (ret != sizeof(vmai))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ if (vmai.start == 0 && vmai.end == 0)
|
||||
+ break;
|
||||
+
|
||||
+ if (vmai.fd != 0) {
|
||||
+ file = fget(vmai.fd);
|
||||
+ if (file == NULL)
|
||||
+ return -EBADF;
|
||||
+ } else
|
||||
+ vmai.flags |= MAP_ANONYMOUS;
|
||||
+
|
||||
+ if (vmai.start <= mm->start_stack && vmai.end >= mm->start_stack)
|
||||
+ vmai.flags |= MAP_GROWSDOWN;
|
||||
+
|
||||
+ addr = do_mmap_pgoff(file, vmai.start, vmai.end - vmai.start,
|
||||
+ vmai.prot, vmai.flags | MAP_FIXED, vmai.pgoff);
|
||||
+
|
||||
+ if (vmai.fd) {
|
||||
+ fput(file);
|
||||
+ do_close(vmai.fd);
|
||||
+ }
|
||||
+
|
||||
+ if ((long)addr < 0 || (addr != vmai.start))
|
||||
+ return -ENXIO;
|
||||
+
|
||||
+ off += sizeof(vmai);
|
||||
+ }
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static int img_restore_pages(struct linux_binprm *bprm, loff_t off)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct mm_struct *mm = current->mm;
|
||||
+ int len = 0;
|
||||
+
|
||||
+ while (1) {
|
||||
+ struct binfmt_page_image pgi;
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct page *page;
|
||||
+ void *pg_data;
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)&pgi, sizeof(pgi));
|
||||
+ if (ret != sizeof(pgi))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ len += sizeof(pgi);
|
||||
+ if (pgi.vaddr == 0)
|
||||
+ break;
|
||||
+
|
||||
+ vma = find_vma(mm, pgi.vaddr);
|
||||
+ if (vma == NULL)
|
||||
+ return -ESRCH;
|
||||
+
|
||||
+ ret = get_user_pages(current, current->mm, (unsigned long)pgi.vaddr,
|
||||
+ 1, 1, 1, &page, NULL);
|
||||
+ if (ret != 1)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ pg_data = kmap(page);
|
||||
+ ret = kernel_read(bprm->file, off + sizeof(pgi), pg_data, PAGE_SIZE);
|
||||
+ kunmap(page);
|
||||
+ put_page(page);
|
||||
+
|
||||
+ if (ret != PAGE_SIZE)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ len += PAGE_SIZE;
|
||||
+ off += sizeof(pgi) + PAGE_SIZE;
|
||||
+ }
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static int img_restore_mem(struct linux_binprm *bprm, loff_t off)
|
||||
+{
|
||||
+ int ret;
|
||||
+ loff_t len = off;
|
||||
+
|
||||
+ ret = img_restore_mm(bprm, len);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ len += ret;
|
||||
+ ret = img_restore_vmas(bprm, len);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ len += ret;
|
||||
+ ret = img_restore_pages(bprm, len);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ len += ret;
|
||||
+ return len;
|
||||
+
|
||||
+}
|
||||
+
|
||||
+static int img_load_binary(struct linux_binprm * bprm, struct pt_regs * regs)
|
||||
+{
|
||||
+ int ret;
|
||||
+ loff_t len = 0;
|
||||
+
|
||||
+ ret = img_check_header(bprm->buf);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ len += ret;
|
||||
+ ret = img_restore_regs(bprm, len, regs);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ len += ret;
|
||||
+ ret = img_restore_mem(bprm, len);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct linux_binfmt img_binfmt = {
|
||||
+ .module = THIS_MODULE,
|
||||
+ .load_binary = img_load_binary,
|
||||
+};
|
||||
+
|
||||
+static __init int img_binfmt_init(void)
|
||||
+{
|
||||
+ return register_binfmt(&img_binfmt);
|
||||
+}
|
||||
+
|
||||
+static __exit void img_binfmt_exit(void)
|
||||
+{
|
||||
+ unregister_binfmt(&img_binfmt);
|
||||
+}
|
||||
+
|
||||
+module_init(img_binfmt_init);
|
||||
+module_exit(img_binfmt_exit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
--
|
||||
1.5.5.6
|
||||
|
96
xemul/binfmt_img.h
Normal file
96
xemul/binfmt_img.h
Normal file
@ -0,0 +1,96 @@
|
||||
#ifndef __BINFMT_IMG_H__
|
||||
#define __BINFMT_IMG_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define __packed __attribute__((packed))
|
||||
|
||||
struct binfmt_img_header {
|
||||
__u32 magic;
|
||||
__u32 version;
|
||||
__u16 arch;
|
||||
__u16 flags;
|
||||
} __packed;
|
||||
|
||||
#define CKPT_TLS_ENTRIES 3
|
||||
|
||||
struct binfmt_regs_image {
|
||||
union {
|
||||
struct {
|
||||
__u64 r15;
|
||||
__u64 r14;
|
||||
__u64 r13;
|
||||
__u64 r12;
|
||||
__u64 r11;
|
||||
__u64 r10;
|
||||
__u64 r9;
|
||||
__u64 r8;
|
||||
__u64 ax;
|
||||
__u64 orig_ax;
|
||||
__u64 bx;
|
||||
__u64 cx;
|
||||
__u64 dx;
|
||||
__u64 si;
|
||||
__u64 di;
|
||||
__u64 ip;
|
||||
__u64 flags;
|
||||
__u64 bp;
|
||||
__u64 sp;
|
||||
|
||||
__u64 gs;
|
||||
__u64 fs;
|
||||
__u64 tls[CKPT_TLS_ENTRIES];
|
||||
__u16 gsindex;
|
||||
__u16 fsindex;
|
||||
__u16 cs;
|
||||
__u16 ss;
|
||||
__u16 ds;
|
||||
__u16 es;
|
||||
} r;
|
||||
__u64 dummy[32];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
#define CKPT_X86_SEG_NULL 0
|
||||
#define CKPT_X86_SEG_USER32_CS 1
|
||||
#define CKPT_X86_SEG_USER32_DS 2
|
||||
#define CKPT_X86_SEG_USER64_CS 3
|
||||
#define CKPT_X86_SEG_USER64_DS 4
|
||||
#define CKPT_X86_SEG_TLS 0x4000
|
||||
#define CKPT_X86_SEG_LDT 0x8000
|
||||
|
||||
struct binfmt_mm_image {
|
||||
__u64 flags;
|
||||
__u64 def_flags;
|
||||
__u64 start_code;
|
||||
__u64 end_code;
|
||||
__u64 start_data;
|
||||
__u64 end_data;
|
||||
__u64 start_brk;
|
||||
__u64 brk;
|
||||
__u64 start_stack;
|
||||
__u64 arg_start;
|
||||
__u64 arg_end;
|
||||
__u64 env_start;
|
||||
__u64 env_end;
|
||||
__u32 exe_fd;
|
||||
} __packed;
|
||||
|
||||
struct binfmt_vma_image {
|
||||
__u32 prot;
|
||||
__u32 flags;
|
||||
__u32 pad;
|
||||
__u32 fd;
|
||||
__u64 start;
|
||||
__u64 end;
|
||||
__u64 pgoff;
|
||||
} __packed;
|
||||
|
||||
struct binfmt_page_image {
|
||||
__u64 vaddr;
|
||||
} __packed;
|
||||
|
||||
#define BINFMT_IMG_MAGIC 0xa75b8d43
|
||||
#define BINFMT_IMG_VERS_0 0x00000100
|
||||
|
||||
#endif
|
781
xemul/cr-dump.c
Normal file
781
xemul/cr-dump.c
Normal file
@ -0,0 +1,781 @@
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <linux/kdev_t.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "img_structs.h"
|
||||
|
||||
static int fdinfo_img;
|
||||
static int pages_img;
|
||||
static int core_img;
|
||||
static int shmem_img;
|
||||
static int pipes_img;
|
||||
|
||||
#define PIPEFS_MAGIC 0x50495045
|
||||
|
||||
static int prep_img_files(int pid)
|
||||
{
|
||||
__u32 type;
|
||||
char name[64];
|
||||
|
||||
sprintf(name, "fdinfo-%d.img", pid);
|
||||
fdinfo_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (fdinfo_img < 0) {
|
||||
perror("Can't open fdinfo");
|
||||
return 1;
|
||||
}
|
||||
|
||||
type = FDINFO_MAGIC;
|
||||
write(fdinfo_img, &type, 4);
|
||||
|
||||
sprintf(name, "pages-%d.img", pid);
|
||||
pages_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (pages_img < 0) {
|
||||
perror("Can't open shmem");
|
||||
return 1;
|
||||
}
|
||||
|
||||
type = PAGES_MAGIC;
|
||||
write(pages_img, &type, 4);
|
||||
|
||||
sprintf(name, "core-%d.img", pid);
|
||||
core_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (core_img < 0) {
|
||||
perror("Can't open core");
|
||||
return 1;
|
||||
}
|
||||
|
||||
sprintf(name, "shmem-%d.img", pid);
|
||||
shmem_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (shmem_img < 0) {
|
||||
perror("Can't open shmem");
|
||||
return 1;
|
||||
}
|
||||
|
||||
type = SHMEM_MAGIC;
|
||||
write(shmem_img, &type, 4);
|
||||
|
||||
sprintf(name, "pipes-%d.img", pid);
|
||||
pipes_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (pipes_img < 0) {
|
||||
perror("Can't open pipes");
|
||||
return 1;
|
||||
}
|
||||
|
||||
type = PIPES_MAGIC;
|
||||
write(pipes_img, &type, 4);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kill_imgfiles(int pid)
|
||||
{
|
||||
/* FIXME */
|
||||
}
|
||||
|
||||
static int stop_task(int pid)
|
||||
{
|
||||
return kill(pid, SIGSTOP);
|
||||
}
|
||||
|
||||
static void continue_task(int pid)
|
||||
{
|
||||
if (kill(pid, SIGCONT))
|
||||
perror("Can't cont task");
|
||||
}
|
||||
|
||||
static char big_tmp_str[PATH_MAX];
|
||||
|
||||
static int read_fd_params(int pid, char *fd, unsigned long *pos, unsigned int *flags)
|
||||
{
|
||||
char fd_str[128];
|
||||
int ifd;
|
||||
|
||||
sprintf(fd_str, "/proc/%d/fdinfo/%s", pid, fd);
|
||||
|
||||
printf("\tGetting fdinfo for fd %s\n", fd);
|
||||
ifd = open(fd_str, O_RDONLY);
|
||||
if (ifd < 0) {
|
||||
perror("Can't open fdinfo");
|
||||
return 1;
|
||||
}
|
||||
|
||||
read(ifd, big_tmp_str, sizeof(big_tmp_str));
|
||||
close(ifd);
|
||||
|
||||
sscanf(big_tmp_str, "pos:\t%lli\nflags:\t%o\n", pos, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
|
||||
int lclose, unsigned long pos, unsigned int flags)
|
||||
{
|
||||
char fd_str[128];
|
||||
int len;
|
||||
struct fdinfo_entry e;
|
||||
|
||||
sprintf(fd_str, "/proc/self/fd/%d", lfd);
|
||||
len = readlink(fd_str, big_tmp_str, sizeof(big_tmp_str) - 1);
|
||||
if (len < 0) {
|
||||
perror("Can't readlink fd");
|
||||
return 1;
|
||||
}
|
||||
|
||||
big_tmp_str[len] = '\0';
|
||||
printf("\tDumping path for %x fd via self %d [%s]\n", fd_name, lfd, big_tmp_str);
|
||||
|
||||
if (lclose)
|
||||
close(lfd);
|
||||
|
||||
e.type = type;
|
||||
e.addr = fd_name;
|
||||
e.len = len;
|
||||
e.pos = pos;
|
||||
e.flags = flags;
|
||||
|
||||
write(fdinfo_img, &e, sizeof(e));
|
||||
write(fdinfo_img, big_tmp_str, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_PIPE_BUF_SIZE 1024 /* FIXME - this is not so */
|
||||
#define SPLICE_F_NONBLOCK 0x2
|
||||
|
||||
static int dump_pipe_and_data(int lfd, struct pipes_entry *e)
|
||||
{
|
||||
int steal_pipe[2];
|
||||
int ret;
|
||||
|
||||
printf("\tDumping data from pipe %x\n", e->pipeid);
|
||||
if (pipe(steal_pipe) < 0) {
|
||||
perror("Can't create pipe for stealing data");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = tee(lfd, steal_pipe[1], MAX_PIPE_BUF_SIZE, SPLICE_F_NONBLOCK);
|
||||
if (ret < 0) {
|
||||
if (errno != EAGAIN) {
|
||||
perror("Can't pick pipe data");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
e->bytes = ret;
|
||||
write(pipes_img, e, sizeof(*e));
|
||||
|
||||
if (ret) {
|
||||
ret = splice(steal_pipe[0], NULL, pipes_img, NULL, ret, 0);
|
||||
if (ret < 0) {
|
||||
perror("Can't push pipe data");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
close(steal_pipe[0]);
|
||||
close(steal_pipe[1]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_one_pipe(int fd, int lfd, unsigned int id, unsigned int flags)
|
||||
{
|
||||
struct pipes_entry e;
|
||||
|
||||
printf("\tDumping pipe %d/%x flags %x\n", fd, id, flags);
|
||||
|
||||
e.fd = fd;
|
||||
e.pipeid = id;
|
||||
e.flags = flags;
|
||||
|
||||
if (flags & O_WRONLY) {
|
||||
e.bytes = 0;
|
||||
write(pipes_img, &e, sizeof(e));
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dump_pipe_and_data(lfd, &e);
|
||||
}
|
||||
|
||||
static int dump_one_fd(int dir, char *fd_name, unsigned long pos, unsigned int flags)
|
||||
{
|
||||
int fd;
|
||||
struct stat st_buf;
|
||||
struct statfs stfs_buf;
|
||||
|
||||
printf("\tDumping fd %s\n", fd_name);
|
||||
fd = openat(dir, fd_name, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
printf("Tried to openat %d/%d %s\n", getpid(), dir, fd_name);
|
||||
perror("Can't open fd");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (fstat(fd, &st_buf) < 0) {
|
||||
perror("Can't stat one");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (S_ISREG(st_buf.st_mode))
|
||||
return dump_one_reg_file(FDINFO_FD, atoi(fd_name), fd, 1, pos, flags);
|
||||
|
||||
if (S_ISFIFO(st_buf.st_mode)) {
|
||||
if (fstatfs(fd, &stfs_buf) < 0) {
|
||||
perror("Can't statfs one");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (stfs_buf.f_type == PIPEFS_MAGIC)
|
||||
return dump_one_pipe(atoi(fd_name), fd, st_buf.st_ino, flags);
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "0")) {
|
||||
printf("\tSkipping stdin\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "1")) {
|
||||
printf("\tSkipping stdout\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "2")) {
|
||||
printf("\tSkipping stderr\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(fd_name, "3")) {
|
||||
printf("\tSkipping tty\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Can't dump file %s of that type [%x]\n", fd_name, st_buf.st_mode);
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
static int dump_task_files(int pid)
|
||||
{
|
||||
char pid_fd_dir[64];
|
||||
DIR *fd_dir;
|
||||
struct dirent *de;
|
||||
unsigned long pos;
|
||||
unsigned int flags;
|
||||
|
||||
printf("Dumping open files for %d\n", pid);
|
||||
|
||||
sprintf(pid_fd_dir, "/proc/%d/fd", pid);
|
||||
fd_dir = opendir(pid_fd_dir);
|
||||
if (fd_dir == NULL) {
|
||||
perror("Can't open fd dir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ((de = readdir(fd_dir)) != NULL) {
|
||||
if (de->d_name[0] == '.')
|
||||
continue;
|
||||
|
||||
if (read_fd_params(pid, de->d_name, &pos, &flags))
|
||||
return 1;
|
||||
|
||||
if (dump_one_fd(dirfd(fd_dir), de->d_name, pos, flags))
|
||||
return 1;
|
||||
}
|
||||
|
||||
closedir(fd_dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define PAGE_RSS 0x1
|
||||
|
||||
static unsigned long rawhex(char *str, char **end)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
|
||||
while (1) {
|
||||
if (str[0] >= '0' && str[0] <= '9') {
|
||||
ret <<= 4;
|
||||
ret += str[0] - '0';
|
||||
} else if (str[0] >= 'a' && str[0] <= 'f') {
|
||||
ret <<= 4;
|
||||
ret += str[0] - 'a' + 0xA;
|
||||
} else if (str[0] >= 'A' && str[0] <= 'F') {
|
||||
ret <<= 4;
|
||||
ret += str[0] - 'A' + 0xA;
|
||||
} else {
|
||||
if (end)
|
||||
*end = str;
|
||||
return ret;
|
||||
}
|
||||
|
||||
str++;
|
||||
}
|
||||
}
|
||||
|
||||
static void map_desc_parm(char *desc, unsigned long *pgoff, unsigned long *len)
|
||||
{
|
||||
char *s;
|
||||
unsigned long start, end;
|
||||
|
||||
start = rawhex(desc, &s);
|
||||
if (*s != '-') {
|
||||
goto bug;
|
||||
}
|
||||
|
||||
end = rawhex(s + 1, &s);
|
||||
if (*s != ' ') {
|
||||
goto bug;
|
||||
}
|
||||
|
||||
s = strchr(s + 1, ' ');
|
||||
*pgoff = rawhex(s + 1, &s);
|
||||
if (*s != ' ') {
|
||||
goto bug;
|
||||
}
|
||||
|
||||
if (start > end)
|
||||
goto bug;
|
||||
|
||||
*len = end - start;
|
||||
|
||||
if (*len % PAGE_SIZE) {
|
||||
goto bug;
|
||||
}
|
||||
if (*pgoff % PAGE_SIZE) {
|
||||
goto bug;
|
||||
}
|
||||
|
||||
return;
|
||||
bug:
|
||||
fprintf(stderr, "BUG\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int dump_map_pages(int lfd, unsigned long start, unsigned long pgoff, unsigned long len)
|
||||
{
|
||||
unsigned int nrpages, pfn;
|
||||
void *mem;
|
||||
unsigned char *mc;
|
||||
|
||||
printf("\t\tDumping pages start %x len %x off %x\n", start, len, pgoff);
|
||||
mem = mmap(NULL, len, PROT_READ, MAP_FILE | MAP_PRIVATE, lfd, pgoff);
|
||||
if (mem == MAP_FAILED) {
|
||||
perror("Can't map");
|
||||
return 1;
|
||||
}
|
||||
|
||||
nrpages = len / PAGE_SIZE;
|
||||
mc = malloc(nrpages);
|
||||
if (mincore(mem, len, mc)) {
|
||||
perror("Can't mincore mapping");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (pfn = 0; pfn < nrpages; pfn++)
|
||||
if (mc[pfn] & PAGE_RSS) {
|
||||
__u64 vaddr;
|
||||
|
||||
vaddr = start + pfn * PAGE_SIZE;
|
||||
write(pages_img, &vaddr, 8);
|
||||
write(pages_img, mem + pfn * PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
|
||||
munmap(mem, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_anon_private_map(char *start)
|
||||
{
|
||||
printf("\tSkipping anon private mapping at %s\n", start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_anon_shared_map(char *_start, char *mdesc, int lfd, struct stat *st)
|
||||
{
|
||||
unsigned long pgoff, len;
|
||||
struct shmem_entry e;
|
||||
unsigned long start;
|
||||
struct stat buf;
|
||||
|
||||
map_desc_parm(mdesc, &pgoff, &len);
|
||||
|
||||
start = rawhex(_start, NULL);
|
||||
e.start = start;
|
||||
e.end = start + len;
|
||||
e.shmid = st->st_ino;
|
||||
|
||||
write(shmem_img, &e, sizeof(e));
|
||||
|
||||
if (dump_map_pages(lfd, start, pgoff, len))
|
||||
return 1;
|
||||
|
||||
close(lfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_file_shared_map(char *start, char *mdesc, int lfd)
|
||||
{
|
||||
printf("\tSkipping file shared mapping at %s\n", start);
|
||||
close(lfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_file_private_map(char *_start, char *mdesc, int lfd)
|
||||
{
|
||||
unsigned long pgoff, len;
|
||||
unsigned long start;
|
||||
|
||||
map_desc_parm(mdesc, &pgoff, &len);
|
||||
|
||||
start = rawhex(_start, NULL);
|
||||
if (dump_one_reg_file(FDINFO_MAP, start, lfd, 0, 0, O_RDONLY))
|
||||
return 1;
|
||||
|
||||
close(lfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_one_mapping(char *mdesc, DIR *mfd_dir)
|
||||
{
|
||||
char *flags, *tmp;
|
||||
char map_start[32];
|
||||
int lfd;
|
||||
struct stat st_buf;
|
||||
|
||||
tmp = strchr(mdesc, '-');
|
||||
memset(map_start, 0, sizeof(map_start));
|
||||
strncpy(map_start, mdesc, tmp - mdesc);
|
||||
flags = strchr(mdesc, ' ');
|
||||
flags++;
|
||||
|
||||
printf("\tDumping %s\n", map_start);
|
||||
lfd = openat(dirfd(mfd_dir), map_start, O_RDONLY);
|
||||
if (lfd == -1) {
|
||||
if (errno != ENOENT) {
|
||||
perror("Can't open mapping");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (flags[3] != 'p') {
|
||||
fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return dump_anon_private_map(map_start);
|
||||
}
|
||||
|
||||
if (fstat(lfd, &st_buf) < 0) {
|
||||
perror("Can't stat mapping!");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!S_ISREG(st_buf.st_mode)) {
|
||||
perror("Can't handle non-regular mapping");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (MAJOR(st_buf.st_dev) == 0) {
|
||||
if (flags[3] != 's') {
|
||||
fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* FIXME - this can be tmpfs visible file mapping */
|
||||
return dump_anon_shared_map(map_start, mdesc, lfd, &st_buf);
|
||||
}
|
||||
|
||||
if (flags[3] == 'p')
|
||||
return dump_file_private_map(map_start, mdesc, lfd);
|
||||
else
|
||||
return dump_file_shared_map(map_start, mdesc, lfd);
|
||||
}
|
||||
|
||||
static int dump_task_ext_mm(int pid)
|
||||
{
|
||||
char path[64];
|
||||
DIR *mfd_dir;
|
||||
FILE *maps;
|
||||
|
||||
printf("Dumping mappings for %d\n", pid);
|
||||
|
||||
sprintf(path, "/proc/%d/mfd", pid);
|
||||
mfd_dir = opendir(path);
|
||||
if (mfd_dir == NULL) {
|
||||
perror("Can't open mfd dir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sprintf(path, "/proc/%d/maps", pid);
|
||||
maps = fopen(path, "r");
|
||||
if (maps == NULL) {
|
||||
perror("Can't open maps file");
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (fgets(big_tmp_str, sizeof(big_tmp_str), maps) != NULL)
|
||||
if (dump_one_mapping(big_tmp_str, mfd_dir))
|
||||
return 1;
|
||||
|
||||
fclose(maps);
|
||||
closedir(mfd_dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_task_state(int pid)
|
||||
{
|
||||
char path[64];
|
||||
int dump_fd;
|
||||
void *mem;
|
||||
|
||||
printf("Dumping task image for %d\n", pid);
|
||||
sprintf(path, "/proc/%d/kstate_dump", pid);
|
||||
dump_fd = open(path, O_RDONLY);
|
||||
if (dump_fd < 0) {
|
||||
perror("Can't open dump file");
|
||||
return 1;
|
||||
}
|
||||
|
||||
mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
perror("Can't get mem");
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
int r, w;
|
||||
|
||||
r = read(dump_fd, mem, 4096);
|
||||
if (r == 0)
|
||||
break;
|
||||
if (r < 0) {
|
||||
perror("Can't read dump file");
|
||||
return 1;
|
||||
}
|
||||
|
||||
w = 0;
|
||||
while (w < r) {
|
||||
int ret;
|
||||
|
||||
ret = write(core_img, mem + w, r - w);
|
||||
if (ret <= 0) {
|
||||
perror("Can't write core");
|
||||
return 1;
|
||||
}
|
||||
|
||||
w += ret;
|
||||
}
|
||||
}
|
||||
|
||||
munmap(mem, 4096);
|
||||
close(dump_fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_one_task(int pid, int stop)
|
||||
{
|
||||
printf("Dumping task %d\n", pid);
|
||||
|
||||
if (prep_img_files(pid))
|
||||
return 1;
|
||||
|
||||
if (stop && stop_task(pid))
|
||||
goto err_task;
|
||||
|
||||
if (dump_task_files(pid))
|
||||
goto err;
|
||||
|
||||
if (dump_task_ext_mm(pid))
|
||||
goto err;
|
||||
|
||||
if (dump_task_state(pid))
|
||||
goto err;
|
||||
|
||||
if (stop)
|
||||
continue_task(pid);
|
||||
|
||||
printf("Dump is complete\n");
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (stop)
|
||||
continue_task(pid);
|
||||
err_task:
|
||||
kill_imgfiles(pid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int pstree_fd;
|
||||
static char big_tmp_str[4096];
|
||||
static int *pids, nr_pids;
|
||||
|
||||
static char *get_children_pids(int pid)
|
||||
{
|
||||
FILE *f;
|
||||
int len;
|
||||
char *ret, *tmp;
|
||||
|
||||
sprintf(big_tmp_str, "/proc/%d/status", pid);
|
||||
f = fopen(big_tmp_str, "r");
|
||||
if (f == NULL)
|
||||
return NULL;
|
||||
|
||||
while ((fgets(big_tmp_str, sizeof(big_tmp_str), f)) != NULL) {
|
||||
if (strncmp(big_tmp_str, "Children:", 9))
|
||||
continue;
|
||||
|
||||
tmp = big_tmp_str + 10;
|
||||
len = strlen(tmp);
|
||||
ret = malloc(len + 1);
|
||||
strcpy(ret, tmp);
|
||||
if (len)
|
||||
ret[len - 1] = ' ';
|
||||
|
||||
fclose(f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int dump_pid_and_children(int pid)
|
||||
{
|
||||
struct pstree_entry e;
|
||||
char *chlist, *tmp, *tmp2;
|
||||
|
||||
printf("\tReading %d children list\n", pid);
|
||||
chlist = get_children_pids(pid);
|
||||
if (chlist == NULL)
|
||||
return 1;
|
||||
|
||||
printf("\t%d has children %s\n", pid, chlist);
|
||||
|
||||
e.pid = pid;
|
||||
e.nr_children = 0;
|
||||
|
||||
pids = realloc(pids, (nr_pids + 1) * sizeof(int));
|
||||
pids[nr_pids++] = e.pid;
|
||||
|
||||
tmp = chlist;
|
||||
while ((tmp = strchr(tmp, ' ')) != NULL) {
|
||||
tmp++;
|
||||
e.nr_children++;
|
||||
}
|
||||
|
||||
write(pstree_fd, &e, sizeof(e));
|
||||
tmp = chlist;
|
||||
while (1) {
|
||||
__u32 cpid;
|
||||
|
||||
cpid = strtol(tmp, &tmp, 10);
|
||||
if (cpid == 0)
|
||||
break;
|
||||
if (*tmp != ' ') {
|
||||
fprintf(stderr, "Error in string with children!\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
write(pstree_fd, &cpid, sizeof(cpid));
|
||||
tmp++;
|
||||
}
|
||||
|
||||
tmp = chlist;
|
||||
while ((tmp2 = strchr(tmp, ' ')) != NULL) {
|
||||
*tmp2 = '\0';
|
||||
if (dump_pid_and_children(atoi(tmp)))
|
||||
return 1;
|
||||
tmp = tmp2 + 1;
|
||||
}
|
||||
|
||||
free(chlist);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __dump_all_tasks(void)
|
||||
{
|
||||
int i, pid;
|
||||
|
||||
printf("Dumping tasks' images for");
|
||||
for (i = 0; i < nr_pids; i++)
|
||||
printf(" %d", pids[i]);
|
||||
printf("\n");
|
||||
|
||||
printf("Stopping tasks\n");
|
||||
for (i = 0; i < nr_pids; i++)
|
||||
if (stop_task(pids[i]))
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_pids; i++) {
|
||||
if (dump_one_task(pids[i], 0))
|
||||
goto err;
|
||||
}
|
||||
|
||||
printf("Resuming tasks\n");
|
||||
for (i = 0; i < nr_pids; i++)
|
||||
continue_task(pids[i]);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
for (i = 0; i < nr_pids; i++)
|
||||
continue_task(pids[i]);
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
static int dump_all_tasks(int pid)
|
||||
{
|
||||
char *chlist;
|
||||
__u32 type;
|
||||
|
||||
pids = NULL;
|
||||
nr_pids = 0;
|
||||
|
||||
printf("Dumping process tree, start from %d\n", pid);
|
||||
|
||||
sprintf(big_tmp_str, "pstree-%d.img", pid);
|
||||
pstree_fd = open(big_tmp_str, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
||||
if (pstree_fd < 0) {
|
||||
perror("Can't create pstree");
|
||||
return 1;
|
||||
}
|
||||
|
||||
type = PSTREE_MAGIC;
|
||||
write(pstree_fd, &type, sizeof(type));
|
||||
|
||||
if (dump_pid_and_children(pid))
|
||||
return 1;
|
||||
|
||||
close(pstree_fd);
|
||||
|
||||
return __dump_all_tasks();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 3)
|
||||
goto usage;
|
||||
if (argv[1][0] != '-')
|
||||
goto usage;
|
||||
if (argv[1][1] == 'p')
|
||||
return dump_one_task(atoi(argv[2]), 1);
|
||||
if (argv[1][1] == 't')
|
||||
return dump_all_tasks(atoi(argv[2]));
|
||||
|
||||
usage:
|
||||
printf("Usage: %s (-p|-t) <pid>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
1115
xemul/cr-restore.c
Normal file
1115
xemul/cr-restore.c
Normal file
File diff suppressed because it is too large
Load Diff
354
xemul/img-show.c
Normal file
354
xemul/img-show.c
Normal file
@ -0,0 +1,354 @@
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <linux/types.h>
|
||||
#include <string.h>
|
||||
#include "img_structs.h"
|
||||
#include "binfmt_img.h"
|
||||
|
||||
static int show_fdinfo(int fd)
|
||||
{
|
||||
char data[1024];
|
||||
struct fdinfo_entry e;
|
||||
|
||||
while (1) {
|
||||
int ret;
|
||||
|
||||
ret = read(fd, &e, sizeof(e));
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (ret != sizeof(e)) {
|
||||
perror("Can't read");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = read(fd, data, e.len);
|
||||
if (ret != e.len) {
|
||||
perror("Can't read");
|
||||
return 1;
|
||||
}
|
||||
|
||||
data[e.len] = '\0';
|
||||
switch (e.type) {
|
||||
case FDINFO_FD:
|
||||
printf("fd %d [%s] pos %lx flags %o\n", (int)e.addr, data, e.pos, e.flags);
|
||||
break;
|
||||
case FDINFO_MAP:
|
||||
printf("map %lx [%s] flags %o\n", e.addr, data, e.flags);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown fdinfo entry type %d\n", e.type);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
static int show_mem(int fd)
|
||||
{
|
||||
__u64 vaddr;
|
||||
unsigned int data[2];
|
||||
|
||||
while (1) {
|
||||
if (read(fd, &vaddr, 8) == 0)
|
||||
break;
|
||||
if (vaddr == 0)
|
||||
break;
|
||||
|
||||
read(fd, &data[0], sizeof(unsigned int));
|
||||
lseek(fd, PAGE_SIZE - 2 * sizeof(unsigned int), SEEK_CUR);
|
||||
read(fd, &data[1], sizeof(unsigned int));
|
||||
|
||||
printf("\tpage 0x%lx [%x...%x]\n", (unsigned long)vaddr, data[0], data[1]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_pages(int fd)
|
||||
{
|
||||
return show_mem(fd);
|
||||
}
|
||||
|
||||
static int show_shmem(int fd)
|
||||
{
|
||||
int r;
|
||||
struct shmem_entry e;
|
||||
|
||||
while (1) {
|
||||
r = read(fd, &e, sizeof(e));
|
||||
if (r == 0)
|
||||
return 0;
|
||||
if (r != sizeof(e)) {
|
||||
perror("Can't read shmem entry");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%016lx-%016lx %016x\n", e.start, e.end, e.shmid);
|
||||
}
|
||||
}
|
||||
|
||||
static char *segval(__u16 seg)
|
||||
{
|
||||
switch (seg) {
|
||||
case CKPT_X86_SEG_NULL: return "nul";
|
||||
case CKPT_X86_SEG_USER32_CS: return "cs32";
|
||||
case CKPT_X86_SEG_USER32_DS: return "ds32";
|
||||
case CKPT_X86_SEG_USER64_CS: return "cs64";
|
||||
case CKPT_X86_SEG_USER64_DS: return "ds64";
|
||||
}
|
||||
|
||||
if (seg & CKPT_X86_SEG_TLS)
|
||||
return "tls";
|
||||
if (seg & CKPT_X86_SEG_LDT)
|
||||
return "ldt";
|
||||
|
||||
return "[unknown]";
|
||||
}
|
||||
|
||||
static int show_regs(int fd)
|
||||
{
|
||||
struct binfmt_regs_image ri;
|
||||
|
||||
if (read(fd, &ri, sizeof(ri)) != sizeof(ri)) {
|
||||
perror("Can't read registers from image");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Registers:\n");
|
||||
|
||||
printf("\tr15: %016lx\n", ri.r.r15);
|
||||
printf("\tr14: %016lx\n", ri.r.r14);
|
||||
printf("\tr13: %016lx\n", ri.r.r13);
|
||||
printf("\tr12: %016lx\n", ri.r.r12);
|
||||
printf("\tr11: %016lx\n", ri.r.r11);
|
||||
printf("\tr10: %016lx\n", ri.r.r10);
|
||||
printf("\tr9: %016lx\n", ri.r.r9);
|
||||
printf("\tr8: %016lx\n", ri.r.r8);
|
||||
printf("\tax: %016lx\n", ri.r.ax);
|
||||
printf("\torig_ax: %016lx\n", ri.r.orig_ax);
|
||||
printf("\tbx: %016lx\n", ri.r.bx);
|
||||
printf("\tcx: %016lx\n", ri.r.cx);
|
||||
printf("\tdx: %016lx\n", ri.r.dx);
|
||||
printf("\tsi: %016lx\n", ri.r.si);
|
||||
printf("\tdi: %016lx\n", ri.r.di);
|
||||
printf("\tip: %016lx\n", ri.r.ip);
|
||||
printf("\tflags: %016lx\n", ri.r.flags);
|
||||
printf("\tbp: %016lx\n", ri.r.bp);
|
||||
printf("\tsp: %016lx\n", ri.r.sp);
|
||||
printf("\tgs: %016lx\n", ri.r.gs);
|
||||
printf("\tfs: %016lx\n", ri.r.fs);
|
||||
printf("\tgsindex: %s\n", segval(ri.r.gsindex));
|
||||
printf("\tfsindex: %s\n", segval(ri.r.fsindex));
|
||||
printf("\tcs: %s\n", segval(ri.r.cs));
|
||||
printf("\tss: %s\n", segval(ri.r.ss));
|
||||
printf("\tds: %s\n", segval(ri.r.ds));
|
||||
printf("\tes: %s\n", segval(ri.r.es));
|
||||
|
||||
printf("\ttls0 %016lx\n", ri.r.tls[0]);
|
||||
printf("\ttls1 %016lx\n", ri.r.tls[1]);
|
||||
printf("\ttls2 %016lx\n", ri.r.tls[2]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_mm(int fd, unsigned long *stack)
|
||||
{
|
||||
struct binfmt_mm_image mi;
|
||||
|
||||
if (read(fd, &mi, sizeof(mi)) != sizeof(mi)) {
|
||||
perror("Can't read mm from image");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("MM:\n");
|
||||
printf("\tflags: %016lx\n", mi.flags);
|
||||
printf("\tdef_flags: %016lx\n", mi.def_flags);
|
||||
printf("\tstart_code: %016lx\n", mi.start_code);
|
||||
printf("\tend_code: %016lx\n", mi.end_code);
|
||||
printf("\tstart_data: %016lx\n", mi.start_data);
|
||||
printf("\tend_data: %016lx\n", mi.end_data);
|
||||
printf("\tstart_brk: %016lx\n", mi.start_brk);
|
||||
printf("\tbrk: %016lx\n", mi.brk);
|
||||
printf("\tstart_stack: %016lx\n", mi.start_stack);
|
||||
printf("\targ_start: %016lx\n", mi.arg_start);
|
||||
printf("\targ_end: %016lx\n", mi.arg_end);
|
||||
printf("\tenv_start: %016lx\n", mi.env_start);
|
||||
printf("\tenv_end: %016lx\n", mi.env_end);
|
||||
|
||||
*stack = mi.start_stack;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_vmas(int fd, unsigned long stack)
|
||||
{
|
||||
struct binfmt_vma_image vi;
|
||||
|
||||
printf("VMAs:\n");
|
||||
while (1) {
|
||||
char *note = "";
|
||||
|
||||
if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
|
||||
perror("Can't read vma from image");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vi.start == 0 && vi.end == 0)
|
||||
return 0;
|
||||
|
||||
if (vi.start <= stack && vi.end >= stack)
|
||||
note = "[stack]";
|
||||
|
||||
printf("\t%016lx-%016lx file %d %016lx prot %x flags %x %s\n",
|
||||
vi.start, vi.end, vi.fd, vi.pgoff,
|
||||
vi.prot, vi.flags, note);
|
||||
}
|
||||
}
|
||||
|
||||
static int show_privmem(int fd)
|
||||
{
|
||||
printf("Pages:\n");
|
||||
return show_mem(fd);
|
||||
}
|
||||
|
||||
static int show_core(int fd)
|
||||
{
|
||||
__u32 version = 0;
|
||||
unsigned long stack;
|
||||
|
||||
read(fd, &version, 4);
|
||||
if (version != BINFMT_IMG_VERS_0) {
|
||||
printf("Unsupported version %d\n", version);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* the pad */
|
||||
read(fd, &version, 4);
|
||||
|
||||
printf("Showing version 0\n");
|
||||
|
||||
if (show_regs(fd))
|
||||
return 1;
|
||||
|
||||
if (show_mm(fd, &stack))
|
||||
return 1;
|
||||
|
||||
if (show_vmas(fd, stack))
|
||||
return 1;
|
||||
|
||||
if (show_privmem(fd))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_pstree(int fd)
|
||||
{
|
||||
int ret;
|
||||
struct pstree_entry e;
|
||||
|
||||
while (1) {
|
||||
int i;
|
||||
__u32 *ch;
|
||||
|
||||
ret = read(fd, &e, sizeof(e));
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
if (ret != sizeof(e)) {
|
||||
perror("Can't read processes entry");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%d:", e.pid);
|
||||
i = e.nr_children * sizeof(__u32);
|
||||
ch = malloc(i);
|
||||
ret = read(fd, ch, i);
|
||||
if (ret != i) {
|
||||
perror("Can't read children list");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < e.nr_children; i++)
|
||||
printf(" %d", ch[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int show_pipes(int fd)
|
||||
{
|
||||
struct pipes_entry e;
|
||||
int ret;
|
||||
char buf[17];
|
||||
|
||||
while (1) {
|
||||
ret = read(fd, &e, sizeof(e));
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (ret != sizeof(e)) {
|
||||
perror("Can't read pipe entry");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%d: %lx %o %d ", e.fd, e.pipeid, e.flags, e.bytes);
|
||||
if (e.flags & O_WRONLY) {
|
||||
printf("\n");
|
||||
|
||||
if (e.bytes) {
|
||||
printf("Bogus pipe\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
ret = e.bytes;
|
||||
if (ret > 16)
|
||||
ret = 16;
|
||||
|
||||
read(fd, buf, ret);
|
||||
printf("\t[%s", buf);
|
||||
if (ret < e.bytes)
|
||||
printf("...");
|
||||
printf("]\n");
|
||||
lseek(fd, e.bytes - ret, SEEK_CUR);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
__u32 type;
|
||||
int fd;
|
||||
|
||||
fd = open(argv[1], O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("Can't open");
|
||||
return 1;
|
||||
}
|
||||
|
||||
read(fd, &type, 4);
|
||||
|
||||
if (type == FDINFO_MAGIC)
|
||||
return show_fdinfo(fd);
|
||||
if (type == PAGES_MAGIC)
|
||||
return show_pages(fd);
|
||||
if (type == SHMEM_MAGIC)
|
||||
return show_shmem(fd);
|
||||
if (type == PSTREE_MAGIC)
|
||||
return show_pstree(fd);
|
||||
if (type == PIPES_MAGIC)
|
||||
return show_pipes(fd);
|
||||
if (type == BINFMT_IMG_MAGIC)
|
||||
return show_core(fd);
|
||||
|
||||
printf("Unknown file type 0x%x\n", type);
|
||||
return 1;
|
||||
}
|
39
xemul/img_structs.h
Normal file
39
xemul/img_structs.h
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
#define FDINFO_MAGIC 0x01010101
|
||||
|
||||
struct fdinfo_entry {
|
||||
__u8 type;
|
||||
__u8 len;
|
||||
__u16 flags;
|
||||
__u32 pos;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
#define FDINFO_FD 1
|
||||
#define FDINFO_MAP 2
|
||||
|
||||
#define PAGES_MAGIC 0x20202020
|
||||
|
||||
#define SHMEM_MAGIC 0x03300330
|
||||
|
||||
struct shmem_entry {
|
||||
__u64 start;
|
||||
__u64 end;
|
||||
__u64 shmid;
|
||||
};
|
||||
|
||||
#define PSTREE_MAGIC 0x40044004
|
||||
|
||||
struct pstree_entry {
|
||||
__u32 pid;
|
||||
__u32 nr_children;
|
||||
};
|
||||
|
||||
#define PIPES_MAGIC 0x05055050
|
||||
|
||||
struct pipes_entry {
|
||||
__u32 fd;
|
||||
__u32 pipeid;
|
||||
__u32 flags;
|
||||
__u32 bytes;
|
||||
};
|
2
xemul/readme
Normal file
2
xemul/readme
Normal file
@ -0,0 +1,2 @@
|
||||
Previous version of C/R -- uses in-kernel dumper restorer.
|
||||
It's here for the reference and not used by crtools itself.
|
Loading…
x
Reference in New Issue
Block a user