mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-30 13:58:34 +00:00
563 lines
12 KiB
Diff
563 lines
12 KiB
Diff
From f7e9d28188e7e2fd0f13f2696f29f20d784cb8fd Mon Sep 17 00:00:00 2001
|
|
From: root <root@ovzept.sw.ru>
|
|
Date: Fri, 3 Jun 2011 18:16:10 +0400
|
|
Subject: [PATCH] Image dumping via proc file
|
|
|
|
---
|
|
fs/proc/Kconfig | 8
|
|
fs/proc/Makefile | 1
|
|
fs/proc/base.c | 3
|
|
fs/proc/img_dump.c | 397 +++++++++++++++++++++++++++++++++++++++++++++
|
|
include/linux/binfmt_img.h | 87 +++++++++
|
|
include/linux/proc_fs.h | 2
|
|
6 files changed, 498 insertions(+)
|
|
create mode 100644 fs/proc/img_dump.c
|
|
create mode 100644 include/linux/binfmt_img.h
|
|
|
|
Index: linux-2.6.git/fs/proc/Kconfig
|
|
===================================================================
|
|
--- linux-2.6.git.orig/fs/proc/Kconfig
|
|
+++ linux-2.6.git/fs/proc/Kconfig
|
|
@@ -67,3 +67,11 @@ config PROC_PAGE_MONITOR
|
|
/proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
|
|
/proc/kpagecount, and /proc/kpageflags. Disabling these
|
|
interfaces will reduce the size of the kernel by approximately 4kb.
|
|
+
|
|
+config PROC_IMG
|
|
+ default y
|
|
+ depends on PROC_FS
|
|
+ bool "Enable /proc/<pid>/dump file"
|
|
+ help
|
|
+ Say Y here if you want to be able to produce checkpoint-restore images
|
|
+ for tasks via proc
|
|
Index: linux-2.6.git/fs/proc/Makefile
|
|
===================================================================
|
|
--- linux-2.6.git.orig/fs/proc/Makefile
|
|
+++ linux-2.6.git/fs/proc/Makefile
|
|
@@ -28,3 +28,4 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o
|
|
proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
|
|
proc-$(CONFIG_PRINTK) += kmsg.o
|
|
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
|
|
+proc-$(CONFIG_PROC_IMG) += img_dump.o
|
|
Index: linux-2.6.git/fs/proc/base.c
|
|
===================================================================
|
|
--- linux-2.6.git.orig/fs/proc/base.c
|
|
+++ linux-2.6.git/fs/proc/base.c
|
|
@@ -2983,6 +2983,9 @@ static const struct pid_entry tgid_base_
|
|
#endif
|
|
INF("cmdline", S_IRUGO, proc_pid_cmdline),
|
|
ONE("stat", S_IRUGO, proc_tgid_stat),
|
|
+#ifdef CONFIG_PROC_IMG
|
|
+ REG("dump", S_IRUSR|S_IWUSR, proc_pid_dump_operations),
|
|
+#endif
|
|
ONE("statm", S_IRUGO, proc_pid_statm),
|
|
REG("maps", S_IRUGO, proc_maps_operations),
|
|
#ifdef CONFIG_NUMA
|
|
Index: linux-2.6.git/fs/proc/img_dump.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ linux-2.6.git/fs/proc/img_dump.c
|
|
@@ -0,0 +1,397 @@
|
|
+#include <linux/proc_fs.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/binfmt_img.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mman.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/types.h>
|
|
+#include "internal.h"
|
|
+
|
|
+static int img_dump_buffer(char __user *ubuf, size_t size, void *buf, int len, int pos)
|
|
+{
|
|
+ int ret;
|
|
+ static size_t dumped = 0;
|
|
+
|
|
+ len -= pos;
|
|
+ if (len > size)
|
|
+ len = size;
|
|
+
|
|
+ ret = copy_to_user(ubuf, buf + pos, len);
|
|
+ if (ret)
|
|
+ return -EFAULT;
|
|
+
|
|
+ dumped += len;
|
|
+ return len;
|
|
+}
|
|
+
|
|
+static int img_dump_header(char __user *buf, size_t size, int pos)
|
|
+{
|
|
+ struct binfmt_img_header hdr;
|
|
+
|
|
+ hdr.magic = BINFMT_IMG_MAGIC;
|
|
+ hdr.version = BINFMT_IMG_VERS_0;
|
|
+
|
|
+ return img_dump_buffer(buf, size, &hdr, sizeof(hdr), pos);
|
|
+}
|
|
+
|
|
+static __u16 encode_segment(unsigned short seg)
|
|
+{
|
|
+ if (seg == 0)
|
|
+ return CKPT_X86_SEG_NULL;
|
|
+ BUG_ON((seg & 3) != 3);
|
|
+
|
|
+ if (seg == __USER_CS)
|
|
+ return CKPT_X86_SEG_USER64_CS;
|
|
+ if (seg == __USER_DS)
|
|
+ return CKPT_X86_SEG_USER64_DS;
|
|
+#ifdef CONFIG_COMPAT
|
|
+ if (seg == __USER32_CS)
|
|
+ return CKPT_X86_SEG_USER32_CS;
|
|
+ if (seg == __USER32_DS)
|
|
+ return CKPT_X86_SEG_USER32_DS;
|
|
+#endif
|
|
+
|
|
+ if (seg & 4)
|
|
+ return CKPT_X86_SEG_LDT | (seg >> 3);
|
|
+
|
|
+ seg >>= 3;
|
|
+ if (GDT_ENTRY_TLS_MIN <= seg && seg <= GDT_ENTRY_TLS_MAX)
|
|
+ return CKPT_X86_SEG_TLS | (seg - GDT_ENTRY_TLS_MIN);
|
|
+
|
|
+ printk(KERN_ERR "c/r: (decode) bad segment %#hx\n", seg);
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+static __u64 encode_tls(struct desc_struct *d)
|
|
+{
|
|
+ return ((__u64)d->a << 32) + d->b;
|
|
+}
|
|
+
|
|
+static int img_dump_regs(struct task_struct *p, char __user *buf, size_t size, int pos)
|
|
+{
|
|
+ struct binfmt_regs_image regi;
|
|
+ struct pt_regs *regs;
|
|
+ int i;
|
|
+
|
|
+ regs = task_pt_regs(p);
|
|
+
|
|
+ regi.r15 = regs->r15;
|
|
+ regi.r14 = regs->r14;
|
|
+ regi.r13 = regs->r13;
|
|
+ regi.r12 = regs->r12;
|
|
+ regi.r11 = regs->r11;
|
|
+ regi.r10 = regs->r10;
|
|
+ regi.r9 = regs->r9;
|
|
+ regi.r8 = regs->r8;
|
|
+ regi.ax = regs->ax;
|
|
+ regi.orig_ax = regs->orig_ax;
|
|
+ regi.bx = regs->bx;
|
|
+ regi.cx = regs->cx;
|
|
+ regi.dx = regs->dx;
|
|
+ regi.si = regs->si;
|
|
+ regi.di = regs->di;
|
|
+ regi.ip = regs->ip;
|
|
+ regi.flags = regs->flags;
|
|
+ regi.bp = regs->bp;
|
|
+ regi.sp = regs->sp;
|
|
+
|
|
+ /* segments */
|
|
+ regi.gsindex = encode_segment(p->thread.gsindex);
|
|
+ regi.fsindex = encode_segment(p->thread.fsindex);
|
|
+ regi.cs = encode_segment(regs->cs);
|
|
+ regi.ss = encode_segment(regs->ss);
|
|
+ regi.ds = encode_segment(p->thread.ds);
|
|
+ regi.es = encode_segment(p->thread.es);
|
|
+
|
|
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
|
|
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
|
+ regi.tls[i] = encode_tls(&p->thread.tls_array[i]);
|
|
+
|
|
+ if (p->thread.gsindex)
|
|
+ regi.gs = 0;
|
|
+ else
|
|
+ regi.gs = p->thread.gs;
|
|
+
|
|
+ if (p->thread.fsindex)
|
|
+ regi.fs = 0;
|
|
+ else
|
|
+ regi.fs = p->thread.fs;
|
|
+
|
|
+ return img_dump_buffer(buf, size, ®i, sizeof(regi), pos);
|
|
+}
|
|
+
|
|
+static int img_dump_mm(struct mm_struct *mm, char __user *buf, size_t size, int pos)
|
|
+{
|
|
+ struct binfmt_mm_image mmi;
|
|
+
|
|
+ mmi.flags = mm->flags;
|
|
+ mmi.def_flags = mm->def_flags;
|
|
+ mmi.start_code = mm->start_code;
|
|
+ mmi.end_code = mm->end_code;
|
|
+ mmi.start_data = mm->start_data;
|
|
+ mmi.end_data = mm->end_data;
|
|
+ mmi.start_brk = mm->start_brk;
|
|
+ mmi.brk = mm->brk;
|
|
+ mmi.start_stack = mm->start_stack;
|
|
+ mmi.arg_start = mm->arg_start;
|
|
+ mmi.arg_end = mm->arg_end;
|
|
+ mmi.env_start = mm->env_start;
|
|
+ mmi.env_end = mm->env_end;
|
|
+ mmi.exe_fd = 0;
|
|
+
|
|
+ return img_dump_buffer(buf, size, &mmi, sizeof(mmi), pos);
|
|
+}
|
|
+
|
|
+static int img_dump_vma(struct vm_area_struct *vma, char __user *buf, size_t size, int pos)
|
|
+{
|
|
+ struct binfmt_vma_image vmai;
|
|
+
|
|
+ if (vma == NULL) {
|
|
+ memset(&vmai, 0, sizeof(vmai));
|
|
+ goto dumpit;
|
|
+ }
|
|
+
|
|
+ printk("Dumping vma %016lx-%016lx %p/%p\n", vma->vm_start, vma->vm_end, vma, vma->vm_mm);
|
|
+
|
|
+ vmai.fd = 0;
|
|
+ vmai.prot = 0;
|
|
+ if (vma->vm_flags & VM_READ)
|
|
+ vmai.prot |= PROT_READ;
|
|
+ if (vma->vm_flags & VM_WRITE)
|
|
+ vmai.prot |= PROT_WRITE;
|
|
+ if (vma->vm_flags & VM_EXEC)
|
|
+ vmai.prot |= PROT_EXEC;
|
|
+
|
|
+ vmai.flags = 0;
|
|
+ if (vma->vm_file == NULL)
|
|
+ vmai.flags |= MAP_ANONYMOUS;
|
|
+ if (vma->vm_flags & VM_MAYSHARE)
|
|
+ vmai.flags |= MAP_SHARED;
|
|
+ else
|
|
+ vmai.flags |= MAP_PRIVATE;
|
|
+
|
|
+ vmai.start = vma->vm_start;
|
|
+ vmai.end = vma->vm_end;
|
|
+ vmai.pgoff = vma->vm_pgoff;
|
|
+
|
|
+dumpit:
|
|
+ return img_dump_buffer(buf, size, &vmai, sizeof(vmai), pos);
|
|
+}
|
|
+
|
|
+static int img_dump_page(unsigned long addr, void *data, char __user *buf, size_t size, int pos)
|
|
+{
|
|
+ struct binfmt_page_image pgi;
|
|
+ int ret = 0, tmp;
|
|
+
|
|
+ pgi.vaddr = addr;
|
|
+
|
|
+ if (pos < sizeof(pgi)) {
|
|
+ tmp = img_dump_buffer(buf, size, &pgi, sizeof(pgi), pos);
|
|
+ if (tmp < 0)
|
|
+ return tmp;
|
|
+
|
|
+ ret = tmp;
|
|
+ if (size <= ret)
|
|
+ return ret;
|
|
+
|
|
+ buf += ret;
|
|
+ size -= ret;
|
|
+ pos = 0;
|
|
+ } else
|
|
+ pos -= sizeof(pgi);
|
|
+
|
|
+ tmp = img_dump_buffer(buf, size, data, PAGE_SIZE, pos);
|
|
+ if (tmp < 0)
|
|
+ return tmp;
|
|
+
|
|
+ return ret + tmp;
|
|
+}
|
|
+
|
|
+static inline int is_private_vma(struct vm_area_struct *vma)
|
|
+{
|
|
+ if (vma->vm_file == NULL)
|
|
+ return 1;
|
|
+ if (!(vma->vm_flags & VM_SHARED))
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static ssize_t do_produce_dump(struct task_struct *p, char __user *buf,
|
|
+ size_t size, loff_t *ppos)
|
|
+{
|
|
+ size_t img_pos = 0, img_ppos;
|
|
+ size_t produced = 0;
|
|
+ int len;
|
|
+ loff_t pos = *ppos;
|
|
+ struct mm_struct *mm;
|
|
+ struct vm_area_struct *vma;
|
|
+
|
|
+#define move_pos(); do { \
|
|
+ buf += len; \
|
|
+ produced += len;\
|
|
+ size -= len; \
|
|
+ pos += len; \
|
|
+ } while (0)
|
|
+
|
|
+#define seek_pos(__size); do { \
|
|
+ img_ppos = img_pos; \
|
|
+ img_pos += (__size); \
|
|
+ } while (0)
|
|
+
|
|
+ /* header */
|
|
+ seek_pos(sizeof(struct binfmt_img_header));
|
|
+ if (pos < img_pos) {
|
|
+ len = img_dump_header(buf, size, pos - img_ppos);
|
|
+ if (len < 0)
|
|
+ goto err;
|
|
+
|
|
+ move_pos();
|
|
+ if (size == 0)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* registers */
|
|
+ seek_pos(sizeof(struct binfmt_regs_image));
|
|
+ if (pos < img_pos) {
|
|
+ len = img_dump_regs(p, buf, size, pos - img_ppos);
|
|
+ if (len < 0)
|
|
+ goto err;
|
|
+
|
|
+ move_pos();
|
|
+ if (size == 0)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* memory */
|
|
+ mm = get_task_mm(p);
|
|
+ if (mm == NULL)
|
|
+ return -EACCES;
|
|
+
|
|
+ down_read(&mm->mmap_sem);
|
|
+
|
|
+ seek_pos(sizeof(struct binfmt_mm_image));
|
|
+ if (pos < img_pos) {
|
|
+ len = img_dump_mm(mm, buf, size, pos - img_ppos);
|
|
+ if (len < 0)
|
|
+ goto err_mm;
|
|
+
|
|
+ move_pos();
|
|
+ if (size == 0)
|
|
+ goto out_mm;
|
|
+ }
|
|
+
|
|
+ vma = mm->mmap;
|
|
+ while (1) {
|
|
+ seek_pos(sizeof(struct binfmt_vma_image));
|
|
+ if (pos < img_pos) {
|
|
+ len = img_dump_vma(vma, buf, size, pos - img_ppos);
|
|
+ if (len < 0)
|
|
+ goto err_mm;
|
|
+
|
|
+ move_pos();
|
|
+ if (size == 0)
|
|
+ goto out_mm;
|
|
+ }
|
|
+
|
|
+ if (vma == NULL)
|
|
+ break;
|
|
+
|
|
+ vma = vma->vm_next;
|
|
+ }
|
|
+
|
|
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
|
|
+ /* slow and stupid */
|
|
+ unsigned long addr;
|
|
+ struct page *page;
|
|
+ void *pg_data;
|
|
+
|
|
+ if (!is_private_vma(vma))
|
|
+ continue;
|
|
+
|
|
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
|
|
+ page = follow_page(vma, addr, FOLL_FORCE | FOLL_DUMP | FOLL_GET);
|
|
+ if (page == NULL)
|
|
+ continue;
|
|
+ if (IS_ERR(page)) /* huh? */
|
|
+ continue;
|
|
+
|
|
+ seek_pos(sizeof(struct binfmt_page_image) + PAGE_SIZE);
|
|
+ if (pos < img_pos) {
|
|
+ pg_data = kmap(page);
|
|
+ len = img_dump_page(addr, pg_data, buf, size, pos - img_ppos);
|
|
+ kunmap(page);
|
|
+
|
|
+ if (len < 0) {
|
|
+ put_page(page);
|
|
+ goto err_mm;
|
|
+ }
|
|
+
|
|
+ move_pos();
|
|
+ if (size == 0) {
|
|
+ put_page(page);
|
|
+ goto out_mm;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ put_page(page);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ seek_pos(sizeof(struct binfmt_page_image));
|
|
+ if (pos < img_pos) {
|
|
+ struct binfmt_page_image zero;
|
|
+
|
|
+ memset(&zero, 0, sizeof(zero));
|
|
+ len = img_dump_buffer(buf, size, &zero, sizeof(zero), pos - img_ppos);
|
|
+ if (len < 0)
|
|
+ goto err;
|
|
+
|
|
+ move_pos();
|
|
+ }
|
|
+
|
|
+out_mm:
|
|
+ up_read(&mm->mmap_sem);
|
|
+ mmput(mm);
|
|
+out:
|
|
+ *ppos = pos;
|
|
+ return produced;
|
|
+
|
|
+err_mm:
|
|
+ up_read(&mm->mmap_sem);
|
|
+ mmput(mm);
|
|
+err:
|
|
+ return len;
|
|
+}
|
|
+
|
|
+static ssize_t img_dump_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
|
+{
|
|
+ struct task_struct *p;
|
|
+
|
|
+ p = get_proc_task(file->f_dentry->d_inode);
|
|
+ if (p == NULL)
|
|
+ return -ESRCH;
|
|
+
|
|
+ if (!(p->state & TASK_STOPPED)) {
|
|
+ put_task_struct(p);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return do_produce_dump(p, buf, size, ppos);
|
|
+}
|
|
+
|
|
+static int img_dump_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int img_dump_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+const struct file_operations proc_pid_dump_operations = {
|
|
+ .open = img_dump_open,
|
|
+ .read = img_dump_read,
|
|
+ .release = img_dump_release,
|
|
+};
|
|
Index: linux-2.6.git/include/linux/binfmt_img.h
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ linux-2.6.git/include/linux/binfmt_img.h
|
|
@@ -0,0 +1,87 @@
|
|
+#ifndef __BINFMT_IMG_H__
|
|
+#define __BINFMT_IMG_H__
|
|
+
|
|
+#include <linux/types.h>
|
|
+
|
|
+struct binfmt_img_header {
|
|
+ __u32 magic;
|
|
+ __u32 version;
|
|
+};
|
|
+
|
|
+#define CKPT_TLS_ENTRIES 3
|
|
+
|
|
+struct binfmt_regs_image {
|
|
+ __u64 r15;
|
|
+ __u64 r14;
|
|
+ __u64 r13;
|
|
+ __u64 r12;
|
|
+ __u64 r11;
|
|
+ __u64 r10;
|
|
+ __u64 r9;
|
|
+ __u64 r8;
|
|
+ __u64 ax;
|
|
+ __u64 orig_ax;
|
|
+ __u64 bx;
|
|
+ __u64 cx;
|
|
+ __u64 dx;
|
|
+ __u64 si;
|
|
+ __u64 di;
|
|
+ __u64 ip;
|
|
+ __u64 flags;
|
|
+ __u64 bp;
|
|
+ __u64 sp;
|
|
+
|
|
+ __u64 gs;
|
|
+ __u64 fs;
|
|
+ __u64 tls[CKPT_TLS_ENTRIES];
|
|
+ __u16 gsindex;
|
|
+ __u16 fsindex;
|
|
+ __u16 cs;
|
|
+ __u16 ss;
|
|
+ __u16 ds;
|
|
+ __u16 es;
|
|
+};
|
|
+
|
|
+#define CKPT_X86_SEG_NULL 0
|
|
+#define CKPT_X86_SEG_USER32_CS 1
|
|
+#define CKPT_X86_SEG_USER32_DS 2
|
|
+#define CKPT_X86_SEG_USER64_CS 3
|
|
+#define CKPT_X86_SEG_USER64_DS 4
|
|
+#define CKPT_X86_SEG_TLS 0x4000
|
|
+#define CKPT_X86_SEG_LDT 0x8000
|
|
+
|
|
+struct binfmt_mm_image {
|
|
+ __u64 flags;
|
|
+ __u64 def_flags;
|
|
+ __u64 start_code;
|
|
+ __u64 end_code;
|
|
+ __u64 start_data;
|
|
+ __u64 end_data;
|
|
+ __u64 start_brk;
|
|
+ __u64 brk;
|
|
+ __u64 start_stack;
|
|
+ __u64 arg_start;
|
|
+ __u64 arg_end;
|
|
+ __u64 env_start;
|
|
+ __u64 env_end;
|
|
+ __u32 exe_fd;
|
|
+};
|
|
+
|
|
+struct binfmt_vma_image {
|
|
+ __u32 prot;
|
|
+ __u32 flags;
|
|
+ __u32 pad;
|
|
+ __u32 fd;
|
|
+ __u64 start;
|
|
+ __u64 end;
|
|
+ __u64 pgoff;
|
|
+};
|
|
+
|
|
+struct binfmt_page_image {
|
|
+ __u64 vaddr;
|
|
+};
|
|
+
|
|
+#define BINFMT_IMG_MAGIC 0xa75b8d43
|
|
+#define BINFMT_IMG_VERS_0 0x00000100
|
|
+
|
|
+#endif
|
|
Index: linux-2.6.git/include/linux/proc_fs.h
|
|
===================================================================
|
|
--- linux-2.6.git.orig/include/linux/proc_fs.h
|
|
+++ linux-2.6.git/include/linux/proc_fs.h
|
|
@@ -102,6 +102,8 @@ struct vmcore {
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
+extern const struct file_operations proc_pid_dump_operations;
|
|
+
|
|
extern void proc_root_init(void);
|
|
|
|
void proc_flush_task(struct task_struct *task);
|