mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-29 13:28:27 +00:00
Update kernel area
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
This commit is contained in:
parent
83c209ef07
commit
b50ee4a175
19
README
19
README
@ -1,7 +1,7 @@
|
||||
crtools
|
||||
=======
|
||||
|
||||
An utility to to checkpoint/restore tasks.
|
||||
An utility to checkpoint/restore tasks.
|
||||
|
||||
Some code snippets are borrowed from
|
||||
|
||||
@ -13,3 +13,20 @@ Some code snippets are borrowed from
|
||||
Many thanks to these projects.
|
||||
|
||||
Licensed under GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
|
||||
|
||||
Kernel patching
|
||||
===============
|
||||
|
||||
To have crtools up and running either
|
||||
|
||||
1) use patches from kernel/ directory
|
||||
2) or clone git://github.com/cyrillos/linux-2.6.git
|
||||
and switch to branch "crtools".
|
||||
|
||||
It's based on Linux
|
||||
|
||||
| commit 1ea6b8f48918282bdca0b32a34095504ee65bab5
|
||||
| Author: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
| Date: Mon Nov 7 16:16:02 2011 -0800
|
||||
|
|
||||
| Linux 3.2-rc1
|
||||
|
@ -1,4 +1,8 @@
|
||||
fs, proc: Make proc_get_link to use dentry instead of inode
|
||||
From fc4504ee8f471ac1ac8162ec68e98f2c09d53411 Mon Sep 17 00:00:00 2001
|
||||
From: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Date: Tue, 8 Nov 2011 14:57:10 +0400
|
||||
Subject: [PATCH 1/4] fs, proc: Make proc_get_link to use dentry instead of
|
||||
inode
|
||||
|
||||
This patch prepares the ground for the next "map_files"
|
||||
patch which needs a name of a link file to analyse.
|
||||
@ -16,11 +20,11 @@ CC: Andrew Morton <akpm@linux-foundation.org>
|
||||
include/linux/proc_fs.h | 2 +-
|
||||
2 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -165,9 +165,9 @@ static int get_task_root(struct task_str
|
||||
diff --git a/fs/proc/base.c b/fs/proc/base.c
|
||||
index 2db1bd3..93c81aa 100644
|
||||
--- a/fs/proc/base.c
|
||||
+++ b/fs/proc/base.c
|
||||
@@ -165,9 +165,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -32,7 +36,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
int result = -ENOENT;
|
||||
|
||||
if (task) {
|
||||
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *i
|
||||
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -44,7 +48,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
int result = -ENOENT;
|
||||
|
||||
if (task) {
|
||||
@@ -1580,13 +1580,13 @@ static const struct file_operations proc
|
||||
@@ -1567,13 +1567,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
@ -60,7 +64,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
if (!task)
|
||||
return -ENOENT;
|
||||
mm = get_task_mm(task);
|
||||
@@ -1616,7 +1616,7 @@ static void *proc_pid_follow_link(struct
|
||||
@@ -1603,7 +1603,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
if (!proc_fd_access_allowed(inode))
|
||||
goto out;
|
||||
|
||||
@ -69,7 +73,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
out:
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
@@ -1655,7 +1655,7 @@ static int proc_pid_readlink(struct dent
|
||||
@@ -1642,7 +1642,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
|
||||
if (!proc_fd_access_allowed(inode))
|
||||
goto out;
|
||||
|
||||
@ -78,7 +82,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@@ -1959,9 +1959,9 @@ out_task:
|
||||
@@ -1980,9 +1980,9 @@ out_task:
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -90,11 +94,11 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
}
|
||||
|
||||
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
Index: linux-2.6.git/include/linux/proc_fs.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/proc_fs.h
|
||||
+++ linux-2.6.git/include/linux/proc_fs.h
|
||||
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations u
|
||||
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
|
||||
index 643b96c..c3d11ff 100644
|
||||
--- a/include/linux/proc_fs.h
|
||||
+++ b/include/linux/proc_fs.h
|
||||
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations;
|
||||
extern const struct proc_ns_operations ipcns_operations;
|
||||
|
||||
union proc_op {
|
||||
@ -103,3 +107,6 @@ Index: linux-2.6.git/include/linux/proc_fs.h
|
||||
int (*proc_read)(struct task_struct *task, char *page);
|
||||
int (*proc_show)(struct seq_file *m,
|
||||
struct pid_namespace *ns, struct pid *pid,
|
||||
--
|
||||
1.7.6.4
|
||||
|
@ -1,6 +1,8 @@
|
||||
fs, proc: Introduce the /proc/<pid>/map_files/ directory v14
|
||||
|
||||
From d23bde31590a7679aa2be7960848b0fedd0ce032 Mon Sep 17 00:00:00 2001
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
Date: Tue, 8 Nov 2011 14:58:01 +0400
|
||||
Subject: [PATCH 2/4] fs, proc: Introduce the /proc/<pid>/map_files/ directory
|
||||
v14
|
||||
|
||||
This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks
|
||||
one for each mapping with file, the name of a symlink is "vma->vm_start-vma->vm_end",
|
||||
@ -115,14 +117,14 @@ CC: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
CC: Andrew Morton <akpm@linux-foundation.org>
|
||||
CC: Pavel Machek <pavel@ucw.cz>
|
||||
---
|
||||
fs/proc/base.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/mm.h | 12 +
|
||||
2 files changed, 357 insertions(+)
|
||||
fs/proc/base.c | 345 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/mm.h | 12 ++
|
||||
2 files changed, 357 insertions(+), 0 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
diff --git a/fs/proc/base.c b/fs/proc/base.c
|
||||
index 93c81aa..9b7a9cd 100644
|
||||
--- a/fs/proc/base.c
|
||||
+++ b/fs/proc/base.c
|
||||
@@ -83,6 +83,7 @@
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/fs_struct.h>
|
||||
@ -140,7 +142,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
/*
|
||||
* Count the number of hardlinks for the pid_entry table, excluding the .
|
||||
* and .. links.
|
||||
@@ -2201,6 +2204,347 @@ static const struct file_operations proc
|
||||
@@ -2217,6 +2220,347 @@ static const struct file_operations proc_fd_operations = {
|
||||
};
|
||||
|
||||
/*
|
||||
@ -488,7 +490,7 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
* /proc/pid/fd needs a special permission handler so that a process can still
|
||||
* access /proc/self/fd after it has executed a setuid().
|
||||
*/
|
||||
@@ -2815,6 +3159,7 @@ static const struct inode_operations pro
|
||||
@@ -2832,6 +3176,7 @@ static const struct inode_operations proc_task_inode_operations;
|
||||
static const struct pid_entry tgid_base_stuff[] = {
|
||||
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
|
||||
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
||||
@ -496,11 +498,11 @@ Index: linux-2.6.git/fs/proc/base.c
|
||||
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
||||
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
|
||||
#ifdef CONFIG_NET
|
||||
Index: linux-2.6.git/include/linux/mm.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/mm.h
|
||||
+++ linux-2.6.git/include/linux/mm.h
|
||||
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(st
|
||||
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||
index 3dc3a8c..14159d3 100644
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
|
||||
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
@ -519,3 +521,6 @@ Index: linux-2.6.git/include/linux/mm.h
|
||||
#ifdef CONFIG_MMU
|
||||
pgprot_t vm_get_page_prot(unsigned long vm_flags);
|
||||
#else
|
||||
--
|
||||
1.7.6.4
|
||||
|
@ -1,6 +1,8 @@
|
||||
proc: Introduce the Children: line in /proc/<pid>/status
|
||||
|
||||
From 9e489dbc4f796b76adb4440ccf4888d934ede61d Mon Sep 17 00:00:00 2001
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
Date: Tue, 8 Nov 2011 14:59:40 +0400
|
||||
Subject: [PATCH 3/4] fs, proc: Introduce the Children: line in
|
||||
/proc/<pid>/status
|
||||
|
||||
Although we can get the pids of some task's issue, this is just
|
||||
more convenient to have them this way.
|
||||
@ -10,13 +12,13 @@ Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/proc/array.c | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
1 files changed, 14 insertions(+), 0 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/array.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/array.c
|
||||
+++ linux-2.6.git/fs/proc/array.c
|
||||
@@ -158,6 +158,18 @@ static inline const char *get_task_state
|
||||
diff --git a/fs/proc/array.c b/fs/proc/array.c
|
||||
index 3a1dafd..8f33329 100644
|
||||
--- a/fs/proc/array.c
|
||||
+++ b/fs/proc/array.c
|
||||
@@ -158,6 +158,18 @@ static inline const char *get_task_state(struct task_struct *tsk)
|
||||
return *p;
|
||||
}
|
||||
|
||||
@ -35,7 +37,7 @@ Index: linux-2.6.git/fs/proc/array.c
|
||||
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *p)
|
||||
{
|
||||
@@ -192,6 +204,8 @@ static inline void task_state(struct seq
|
||||
@@ -192,6 +204,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
|
||||
cred->uid, cred->euid, cred->suid, cred->fsuid,
|
||||
cred->gid, cred->egid, cred->sgid, cred->fsgid);
|
||||
|
||||
@ -44,3 +46,6 @@ Index: linux-2.6.git/fs/proc/array.c
|
||||
task_lock(p);
|
||||
if (p->files)
|
||||
fdt = files_fdtable(p->files);
|
||||
--
|
||||
1.7.6.4
|
||||
|
@ -1,17 +1,21 @@
|
||||
fs, proc: Add start_data, end_data, start_brk members to /proc/$pid/stat
|
||||
From e46fc1fa01faea36ad4c5608436f5900e66c9529 Mon Sep 17 00:00:00 2001
|
||||
From: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Date: Tue, 8 Nov 2011 15:00:56 +0400
|
||||
Subject: [PATCH 4/4] fs, proc: Add start_data, end_data, start_brk members to
|
||||
/proc/$pid/stat
|
||||
|
||||
It helps to dump and restore this mm_struct members at chekpoint/restore time.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/proc/array.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
1 files changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/array.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/array.c
|
||||
+++ linux-2.6.git/fs/proc/array.c
|
||||
@@ -478,7 +478,7 @@ static int do_task_stat(struct seq_file
|
||||
diff --git a/fs/proc/array.c b/fs/proc/array.c
|
||||
index 8f33329..8248682 100644
|
||||
--- a/fs/proc/array.c
|
||||
+++ b/fs/proc/array.c
|
||||
@@ -478,7 +478,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
|
||||
seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
|
||||
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
|
||||
@ -20,7 +24,7 @@ Index: linux-2.6.git/fs/proc/array.c
|
||||
pid_nr_ns(pid, ns),
|
||||
tcomm,
|
||||
state,
|
||||
@@ -525,7 +525,10 @@ static int do_task_stat(struct seq_file
|
||||
@@ -525,7 +525,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
task->policy,
|
||||
(unsigned long long)delayacct_blkio_ticks(task),
|
||||
cputime_to_clock_t(gtime),
|
||||
@ -32,3 +36,6 @@ Index: linux-2.6.git/fs/proc/array.c
|
||||
if (mm)
|
||||
mmput(mm);
|
||||
return 0;
|
||||
--
|
||||
1.7.6.4
|
||||
|
@ -1,976 +0,0 @@
|
||||
elf: Add support for loading ET_CKPT files
|
||||
|
||||
This patch add ability to run that named "checkpoint" files by
|
||||
enhancing Elf file format, which includes
|
||||
|
||||
- new Elf file type ET_CKPT
|
||||
|
||||
- three additional program header types PT_CKPT_VMA, PT_CKPT_CORE
|
||||
and PT_CKPT_PAGES.
|
||||
|
||||
PT_CKPT_VMA -- holds 'vma_entry' structure, which describes the
|
||||
memory area the kernel should map. It also might contain a file descriptor
|
||||
so the kernel will be mapping a file povided. Usually such file get
|
||||
opened by user-space helper tool which prepares 'vma_entry' structure
|
||||
for the kernel.
|
||||
|
||||
PT_CKPT_CORE -- 'core_entry' structure (registers, tls, tasks specific
|
||||
settings). The structure is defined as a 16K container which should be
|
||||
enough for most cases. 8K of it is reserved for arch specific settings.
|
||||
|
||||
PT_CKPT_PAGES -- a set of all pages which contents we should restored.
|
||||
|
||||
Apart from Elf extension flush_old_exec() has been splitted to two
|
||||
functions -- the former flush_old_exec() and flush_exec_keep_thread().
|
||||
The later doesn't call for de_thread() allowing to keep threads
|
||||
relationship. Also arch_setup_additional_pages_at() helper added
|
||||
to setup vdso at predefined address.
|
||||
|
||||
At moment only pure x86-64 architecture is supported.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
CC: Andrew Vagin <avagin@parallels.com>
|
||||
CC: Pavel Emelyanov <xemul@parallels.com>
|
||||
CC: James Bottomley <jbottomley@parallels.com>
|
||||
CC: Glauber Costa <glommer@parallels.com>
|
||||
CC: H. Peter Anvin <hpa@zytor.com>
|
||||
CC: Ingo Molnar <mingo@elte.hu>
|
||||
CC: Tejun Heo <tj@kernel.org>
|
||||
CC: Dave Hansen <dave@linux.vnet.ibm.com>
|
||||
CC: Eric W. Biederman <ebiederm@xmission.com>
|
||||
CC: Daniel Lezcano <dlezcano@fr.ibm.com>
|
||||
CC: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
---
|
||||
arch/x86/include/asm/elf.h | 3
|
||||
arch/x86/include/asm/elf_ckpt.h | 80 ++++++++
|
||||
arch/x86/kernel/Makefile | 2
|
||||
arch/x86/kernel/elf_ckpt.c | 161 ++++++++++++++++++
|
||||
arch/x86/vdso/vma.c | 22 ++
|
||||
fs/Kconfig.binfmt | 11 +
|
||||
fs/Makefile | 1
|
||||
fs/binfmt_elf.c | 17 +
|
||||
fs/binfmt_elf_ckpt.c | 356 ++++++++++++++++++++++++++++++++++++++++
|
||||
fs/exec.c | 27 +--
|
||||
include/linux/binfmts.h | 1
|
||||
include/linux/elf_ckpt.h | 103 +++++++++++
|
||||
12 files changed, 772 insertions(+), 12 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/arch/x86/include/asm/elf.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/arch/x86/include/asm/elf.h
|
||||
+++ linux-2.6.git/arch/x86/include/asm/elf.h
|
||||
@@ -314,7 +314,8 @@ struct linux_binprm;
|
||||
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
|
||||
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
|
||||
int uses_interp);
|
||||
-
|
||||
+extern int arch_setup_additional_pages_at(struct linux_binprm *bprm,
|
||||
+ void *addr, int uses_interp);
|
||||
extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
|
||||
#define compat_arch_setup_additional_pages syscall32_setup_pages
|
||||
|
||||
Index: linux-2.6.git/arch/x86/include/asm/elf_ckpt.h
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/arch/x86/include/asm/elf_ckpt.h
|
||||
@@ -0,0 +1,80 @@
|
||||
+#ifndef _LINUX_ELF_X86_CHECKPOINT_H
|
||||
+#define _LINUX_ELF_X86_CHECKPOINT_H
|
||||
+
|
||||
+#include <linux/errno.h>
|
||||
+
|
||||
+#include <asm/types.h>
|
||||
+#include <asm/ptrace.h>
|
||||
+
|
||||
+#define CKPT_GDT_ENTRY_TLS_ENTRIES 3
|
||||
+
|
||||
+struct user_regs_entry {
|
||||
+ __u64 r15;
|
||||
+ __u64 r14;
|
||||
+ __u64 r13;
|
||||
+ __u64 r12;
|
||||
+ __u64 bp;
|
||||
+ __u64 bx;
|
||||
+ __u64 r11;
|
||||
+ __u64 r10;
|
||||
+ __u64 r9;
|
||||
+ __u64 r8;
|
||||
+ __u64 ax;
|
||||
+ __u64 cx;
|
||||
+ __u64 dx;
|
||||
+ __u64 si;
|
||||
+ __u64 di;
|
||||
+ __u64 orig_ax;
|
||||
+ __u64 ip;
|
||||
+ __u64 cs;
|
||||
+ __u64 flags;
|
||||
+ __u64 sp;
|
||||
+ __u64 ss;
|
||||
+ __u64 fs_base;
|
||||
+ __u64 gs_base;
|
||||
+ __u64 ds;
|
||||
+ __u64 es;
|
||||
+ __u64 fs;
|
||||
+ __u64 gs;
|
||||
+} __packed;
|
||||
+
|
||||
+struct desc_struct_entry {
|
||||
+ __u32 a;
|
||||
+ __u32 b;
|
||||
+} __packed;
|
||||
+
|
||||
+struct user_fpregs_entry {
|
||||
+ __u16 cwd;
|
||||
+ __u16 swd;
|
||||
+ __u16 twd;
|
||||
+ __u16 fop;
|
||||
+ __u64 rip;
|
||||
+ __u64 rdp;
|
||||
+ __u32 mxcsr;
|
||||
+ __u32 mxcsr_mask;
|
||||
+ __u32 st_space[32];
|
||||
+ __u32 xmm_space[64];
|
||||
+ __u32 padding[24];
|
||||
+} __packed;
|
||||
+
|
||||
+struct ckpt_arch_entry {
|
||||
+ struct user_regs_entry gpregs;
|
||||
+ struct user_fpregs_entry fpregs;
|
||||
+ struct desc_struct tls_array[CKPT_GDT_ENTRY_TLS_ENTRIES];
|
||||
+};
|
||||
+
|
||||
+struct core_entry;
|
||||
+
|
||||
+#ifdef CONFIG_X86_64
|
||||
+extern int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
|
||||
+ struct core_entry *core_entry);
|
||||
+#else
|
||||
+static inline int
|
||||
+load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
|
||||
+ struct core_entry *core_entry)
|
||||
+{
|
||||
+ return -ENOEXEC;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _LINUX_ELF_X86_CHECKPOINT_H */
|
||||
Index: linux-2.6.git/arch/x86/kernel/Makefile
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/arch/x86/kernel/Makefile
|
||||
+++ linux-2.6.git/arch/x86/kernel/Makefile
|
||||
@@ -99,6 +99,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
|
||||
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
|
||||
obj-$(CONFIG_OF) += devicetree.o
|
||||
|
||||
+obj-$(CONFIG_BINFMT_ELF_CKPT) += elf_ckpt.o
|
||||
+
|
||||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
Index: linux-2.6.git/arch/x86/kernel/elf_ckpt.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/arch/x86/kernel/elf_ckpt.c
|
||||
@@ -0,0 +1,161 @@
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/errno.h>
|
||||
+#include <linux/signal.h>
|
||||
+#include <linux/binfmts.h>
|
||||
+#include <linux/string.h>
|
||||
+#include <linux/file.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/personality.h>
|
||||
+#include <linux/elfcore.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/highuid.h>
|
||||
+#include <linux/compiler.h>
|
||||
+#include <linux/highmem.h>
|
||||
+#include <linux/pagemap.h>
|
||||
+#include <linux/security.h>
|
||||
+#include <linux/random.h>
|
||||
+#include <linux/elf.h>
|
||||
+#include <linux/utsname.h>
|
||||
+#include <linux/coredump.h>
|
||||
+#include <linux/regset.h>
|
||||
+
|
||||
+#include <asm/uaccess.h>
|
||||
+#include <asm/param.h>
|
||||
+#include <asm/page.h>
|
||||
+#include <asm/prctl.h>
|
||||
+#include <asm/proto.h>
|
||||
+#include <asm/i387.h>
|
||||
+
|
||||
+#include <linux/elf_ckpt.h>
|
||||
+#include <linux/flex_array.h>
|
||||
+#include <asm/tlbflush.h>
|
||||
+#include <asm/desc.h>
|
||||
+
|
||||
+#ifdef CONFIG_X86_64
|
||||
+
|
||||
+#define cp_reg(d, s, r) d.r = s.r
|
||||
+
|
||||
+int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
|
||||
+ struct core_entry *core_entry)
|
||||
+{
|
||||
+ struct ckpt_arch_entry *arch = (struct ckpt_arch_entry *)core_entry->arch;
|
||||
+ struct thread_struct *thread = ¤t->thread;
|
||||
+
|
||||
+ struct user_regs_struct gpregs;
|
||||
+ struct user_i387_struct fpregs;
|
||||
+
|
||||
+ mm_segment_t old_fs;
|
||||
+ int i, ret;
|
||||
+
|
||||
+ if (core_entry->header.arch != CKPT_HEADER_ARCH_X86_64) {
|
||||
+ pr_err("elf-ckpt-x86: Unsupported or corrupted header\n");
|
||||
+ return -ENOEXEC;
|
||||
+ }
|
||||
+
|
||||
+ BUILD_BUG_ON(CKPT_GDT_ENTRY_TLS_ENTRIES != GDT_ENTRY_TLS_ENTRIES);
|
||||
+ BUILD_BUG_ON(sizeof(struct ckpt_arch_entry) > CKPT_ARCH_SIZE);
|
||||
+
|
||||
+ memset(&gpregs, 0, sizeof(gpregs));
|
||||
+ memset(&fpregs, 0, sizeof(fpregs));
|
||||
+
|
||||
+ /*
|
||||
+ * General purpose registers
|
||||
+ */
|
||||
+ cp_reg(gpregs, arch->gpregs, r15);
|
||||
+ cp_reg(gpregs, arch->gpregs, r14);
|
||||
+ cp_reg(gpregs, arch->gpregs, r13);
|
||||
+ cp_reg(gpregs, arch->gpregs, r12);
|
||||
+ cp_reg(gpregs, arch->gpregs, bp);
|
||||
+ cp_reg(gpregs, arch->gpregs, bx);
|
||||
+ cp_reg(gpregs, arch->gpregs, r11);
|
||||
+ cp_reg(gpregs, arch->gpregs, r10);
|
||||
+ cp_reg(gpregs, arch->gpregs, r9);
|
||||
+ cp_reg(gpregs, arch->gpregs, r8);
|
||||
+ cp_reg(gpregs, arch->gpregs, ax);
|
||||
+ cp_reg(gpregs, arch->gpregs, cx);
|
||||
+ cp_reg(gpregs, arch->gpregs, dx);
|
||||
+ cp_reg(gpregs, arch->gpregs, si);
|
||||
+ cp_reg(gpregs, arch->gpregs, di);
|
||||
+ cp_reg(gpregs, arch->gpregs, orig_ax);
|
||||
+ cp_reg(gpregs, arch->gpregs, ip);
|
||||
+ cp_reg(gpregs, arch->gpregs, cs);
|
||||
+ cp_reg(gpregs, arch->gpregs, flags);
|
||||
+ cp_reg(gpregs, arch->gpregs, sp);
|
||||
+ cp_reg(gpregs, arch->gpregs, ss);
|
||||
+ cp_reg(gpregs, arch->gpregs, fs_base);
|
||||
+ cp_reg(gpregs, arch->gpregs, gs_base);
|
||||
+ cp_reg(gpregs, arch->gpregs, ds);
|
||||
+ cp_reg(gpregs, arch->gpregs, es);
|
||||
+ cp_reg(gpregs, arch->gpregs, fs);
|
||||
+ cp_reg(gpregs, arch->gpregs, gs);
|
||||
+
|
||||
+ old_fs = get_fs();
|
||||
+ set_fs(KERNEL_DS);
|
||||
+ ret = arch_ptrace(current, PTRACE_SETREGS, 0, (unsigned long)&gpregs);
|
||||
+ set_fs(old_fs);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
+ *regs = *task_pt_regs(current);
|
||||
+
|
||||
+ thread->usersp = arch->gpregs.sp;
|
||||
+ thread->ds = arch->gpregs.ds;
|
||||
+ thread->es = arch->gpregs.es;
|
||||
+ thread->fs = arch->gpregs.fs;
|
||||
+ thread->gs = arch->gpregs.gs;
|
||||
+
|
||||
+ thread->fsindex = thread->fs;
|
||||
+ thread->gsindex = thread->gs;
|
||||
+
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
|
||||
+ thread->tls_array[i].a = arch->tls_array[i].a;
|
||||
+ thread->tls_array[i].b = arch->tls_array[i].b;
|
||||
+ }
|
||||
+
|
||||
+ if (arch->gpregs.fs_base) {
|
||||
+ ret = do_arch_prctl(current, ARCH_SET_FS, arch->gpregs.fs_base);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (arch->gpregs.gs_base) {
|
||||
+ ret = do_arch_prctl(current, ARCH_SET_GS, arch->gpregs.gs_base);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /* Restoring FPU */
|
||||
+ if (core_entry->task_flags & PF_USED_MATH) {
|
||||
+
|
||||
+ cp_reg(fpregs, arch->fpregs, cwd);
|
||||
+ cp_reg(fpregs, arch->fpregs, swd);
|
||||
+ cp_reg(fpregs, arch->fpregs, twd);
|
||||
+ cp_reg(fpregs, arch->fpregs, fop);
|
||||
+ cp_reg(fpregs, arch->fpregs, rip);
|
||||
+ cp_reg(fpregs, arch->fpregs, rdp);
|
||||
+ cp_reg(fpregs, arch->fpregs, mxcsr);
|
||||
+ cp_reg(fpregs, arch->fpregs, mxcsr_mask);
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(arch->fpregs.st_space); i++)
|
||||
+ cp_reg(fpregs, arch->fpregs, st_space[i]);
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(arch->fpregs.xmm_space); i++)
|
||||
+ cp_reg(fpregs, arch->fpregs, xmm_space[i]);
|
||||
+
|
||||
+ old_fs = get_fs();
|
||||
+ set_fs(KERNEL_DS);
|
||||
+ ret = arch_ptrace(current, PTRACE_SETFPREGS, 0, (unsigned long)&fpregs);
|
||||
+ set_fs(old_fs);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_X86_64 */
|
||||
Index: linux-2.6.git/arch/x86/vdso/vma.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/arch/x86/vdso/vma.c
|
||||
+++ linux-2.6.git/arch/x86/vdso/vma.c
|
||||
@@ -137,6 +137,28 @@ up_fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int arch_setup_additional_pages_at(struct linux_binprm *bprm, void *addr, int uses_interp)
|
||||
+{
|
||||
+ struct mm_struct *mm = current->mm;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!vdso_enabled)
|
||||
+ return 0;
|
||||
+
|
||||
+ down_write(&mm->mmap_sem);
|
||||
+ current->mm->context.vdso = addr;
|
||||
+ ret = install_special_mapping(mm, (unsigned long)addr, vdso_size,
|
||||
+ VM_READ | VM_EXEC |
|
||||
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
|
||||
+ VM_ALWAYSDUMP,
|
||||
+ vdso_pages);
|
||||
+ if (ret)
|
||||
+ current->mm->context.vdso = NULL;
|
||||
+
|
||||
+ up_write(&mm->mmap_sem);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static __init int vdso_setup(char *s)
|
||||
{
|
||||
vdso_enabled = simple_strtoul(s, NULL, 0);
|
||||
Index: linux-2.6.git/fs/Kconfig.binfmt
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/Kconfig.binfmt
|
||||
+++ linux-2.6.git/fs/Kconfig.binfmt
|
||||
@@ -23,6 +23,17 @@ config BINFMT_ELF
|
||||
ld.so (check the file <file:Documentation/Changes> for location and
|
||||
latest version).
|
||||
|
||||
+config BINFMT_ELF_CKPT
|
||||
+ tristate "Kernel support for CKPT ELF binaries"
|
||||
+ default n
|
||||
+ depends on BINFMT_ELF && X86_64
|
||||
+ help
|
||||
+ ELF CKPT (checkpoint) is an extension to ELF format to restore
|
||||
+ checkpointed processes. It's not confirmed yet and highly
|
||||
+ experimental.
|
||||
+
|
||||
+ If unsure, say N.
|
||||
+
|
||||
config COMPAT_BINFMT_ELF
|
||||
bool
|
||||
depends on COMPAT && BINFMT_ELF
|
||||
Index: linux-2.6.git/fs/Makefile
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/Makefile
|
||||
+++ linux-2.6.git/fs/Makefile
|
||||
@@ -37,6 +37,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc
|
||||
obj-y += binfmt_script.o
|
||||
|
||||
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
|
||||
+obj-$(CONFIG_BINFMT_ELF_CKPT) += binfmt_elf_ckpt.o
|
||||
obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
|
||||
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
|
||||
obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o
|
||||
Index: linux-2.6.git/fs/binfmt_elf.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/binfmt_elf.c
|
||||
+++ linux-2.6.git/fs/binfmt_elf.c
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/elf.h>
|
||||
+#include <linux/elf_ckpt.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/coredump.h>
|
||||
#include <asm/uaccess.h>
|
||||
@@ -592,7 +593,11 @@ static int load_elf_binary(struct linux_
|
||||
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
|
||||
goto out;
|
||||
|
||||
- if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
|
||||
+ if (loc->elf_ex.e_type != ET_EXEC &&
|
||||
+#ifdef CONFIG_BINFMT_ELF_CKPT
|
||||
+ loc->elf_ex.e_type != ET_CKPT &&
|
||||
+#endif
|
||||
+ loc->elf_ex.e_type != ET_DYN)
|
||||
goto out;
|
||||
if (!elf_check_arch(&loc->elf_ex))
|
||||
goto out;
|
||||
@@ -619,6 +624,16 @@ static int load_elf_binary(struct linux_
|
||||
goto out_free_ph;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_BINFMT_ELF_CKPT
|
||||
+ if (loc->elf_ex.e_type == ET_CKPT) {
|
||||
+ retval = load_elf_ckpt(bprm, regs, &loc->elf_ex,
|
||||
+ (struct elf_phdr *)elf_phdata);
|
||||
+ if (!retval)
|
||||
+ set_binfmt(&elf_format);
|
||||
+ goto out_free_ph;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
elf_ppnt = elf_phdata;
|
||||
elf_bss = 0;
|
||||
elf_brk = 0;
|
||||
Index: linux-2.6.git/fs/binfmt_elf_ckpt.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/fs/binfmt_elf_ckpt.c
|
||||
@@ -0,0 +1,356 @@
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/errno.h>
|
||||
+#include <linux/signal.h>
|
||||
+#include <linux/binfmts.h>
|
||||
+#include <linux/string.h>
|
||||
+#include <linux/file.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/personality.h>
|
||||
+#include <linux/elfcore.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/highuid.h>
|
||||
+#include <linux/compiler.h>
|
||||
+#include <linux/highmem.h>
|
||||
+#include <linux/pagemap.h>
|
||||
+#include <linux/security.h>
|
||||
+#include <linux/random.h>
|
||||
+#include <linux/elf.h>
|
||||
+#include <linux/utsname.h>
|
||||
+#include <linux/coredump.h>
|
||||
+#include <linux/regset.h>
|
||||
+
|
||||
+#include <asm/uaccess.h>
|
||||
+#include <asm/param.h>
|
||||
+#include <asm/page.h>
|
||||
+#include <asm/prctl.h>
|
||||
+#include <asm/proto.h>
|
||||
+#include <asm/i387.h>
|
||||
+
|
||||
+#include <linux/elf_ckpt.h>
|
||||
+#include <asm/elf_ckpt.h>
|
||||
+
|
||||
+#include <linux/flex_array.h>
|
||||
+#include <asm/tlbflush.h>
|
||||
+#include <asm/desc.h>
|
||||
+
|
||||
+int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
|
||||
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
|
||||
+{
|
||||
+ struct elf_phdr *elf_phdr_pages;
|
||||
+ struct flex_array *fa = NULL;
|
||||
+ struct vma_entry *vma_entry_ptr;
|
||||
+ int nr_vma_found, nr_vma_mapped;
|
||||
+ struct vma_entry vma_entry;
|
||||
+ struct file *file = NULL;
|
||||
+ unsigned long map_addr;
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ unsigned long vdso = -1UL;
|
||||
+#endif
|
||||
+
|
||||
+ struct core_entry *core_entry = NULL;
|
||||
+ unsigned long start_stack = -1UL;
|
||||
+
|
||||
+ int i, ret = -ENOEXEC;
|
||||
+ loff_t off;
|
||||
+
|
||||
+ BUILD_BUG_ON(CKPT_TASK_COMM_LEN != TASK_COMM_LEN);
|
||||
+ BUILD_BUG_ON(CKPT_PAGE_SIZE != PAGE_SIZE);
|
||||
+ BUILD_BUG_ON(CKPT_CORE_SIZE != sizeof(*core_entry));
|
||||
+
|
||||
+ elf_phdr_pages = NULL;
|
||||
+ nr_vma_found = 0;
|
||||
+ nr_vma_mapped = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * An early check for header version so if we fail here
|
||||
+ * we would not need to use flex array at all.
|
||||
+ */
|
||||
+ for (i = 0; i < elf_ex->e_phnum; i++) {
|
||||
+ if (elf_phdr[i].p_type != PT_CKPT_CORE)
|
||||
+ continue;
|
||||
+
|
||||
+ core_entry = vmalloc(sizeof(*core_entry));
|
||||
+ if (!core_entry) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, elf_phdr[i].p_offset,
|
||||
+ (char *)core_entry, sizeof(*core_entry));
|
||||
+ if (ret != sizeof(*core_entry)) {
|
||||
+ pr_err("elf-ckpt: Can't read core_entry\n");
|
||||
+ ret = -EIO;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (core_entry->header.version != CKPT_HEADER_VERSION) {
|
||||
+ pr_err("elf-ckpt: Unsupported or corrupted header\n");
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (i == elf_ex->e_phnum) {
|
||||
+ pr_err("elf-ckpt: No header found\n");
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ fa = flex_array_alloc(sizeof(vma_entry), elf_ex->e_phnum, GFP_KERNEL);
|
||||
+ if (!fa || flex_array_prealloc(fa, 0, elf_ex->e_phnum, GFP_KERNEL)) {
|
||||
+ ret = -ENOMEM;
|
||||
+ if (fa) {
|
||||
+ flex_array_free(fa);
|
||||
+ fa = NULL;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ ret = flush_exec_keep_thread(bprm);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
+ current->flags &= ~PF_FORKNOEXEC;
|
||||
+ current->mm->def_flags = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We don't care about parameters passed (such as argc, argv, env)
|
||||
+ * when execute checkpoint file because we're to substitute
|
||||
+ * all things anyway.
|
||||
+ */
|
||||
+ do_munmap(current->mm, 0, TASK_SIZE);
|
||||
+
|
||||
+ SET_PERSONALITY(loc->elf_ex);
|
||||
+
|
||||
+ for (i = 0; i < elf_ex->e_phnum; i++) {
|
||||
+
|
||||
+ switch (elf_phdr[i].p_type) {
|
||||
+ case PT_CKPT_VMA:
|
||||
+ ret = kernel_read(bprm->file, elf_phdr[i].p_offset,
|
||||
+ (char *)&vma_entry, sizeof(vma_entry));
|
||||
+ if (ret != sizeof(vma_entry)) {
|
||||
+ pr_err("elf-ckpt: Can't read vma_entry\n");
|
||||
+ ret = -EIO;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (flex_array_put(fa, i, &vma_entry, GFP_KERNEL))
|
||||
+ BUG();
|
||||
+
|
||||
+ /* We need to know if there is executable stack */
|
||||
+ if (vma_entry.status & VMA_AREA_STACK) {
|
||||
+ if (vma_entry.flags & PROT_EXEC)
|
||||
+ current->personality |= READ_IMPLIES_EXEC;
|
||||
+ }
|
||||
+
|
||||
+ nr_vma_found++;
|
||||
+ continue;
|
||||
+ case PT_CKPT_PAGES:
|
||||
+ elf_phdr_pages = &elf_phdr[i];
|
||||
+ continue;
|
||||
+ default:
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Be sure it has the file structure we expected to see. */
|
||||
+ if (!elf_phdr_pages || !nr_vma_found) {
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * VMA randomization still needs to be set (just in case if
|
||||
+ * the program we restore will exec() something else later).
|
||||
+ */
|
||||
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
||||
+ current->flags |= PF_RANDOMIZE;
|
||||
+
|
||||
+ /*
|
||||
+ * FIXME: Note it flushes signal handlers as well,
|
||||
+ * so we need to dump queued signals and restore
|
||||
+ * them here.
|
||||
+ */
|
||||
+ setup_new_exec(bprm);
|
||||
+
|
||||
+ current->mm->free_area_cache = current->mm->mmap_base;
|
||||
+ current->mm->cached_hole_size = 0;
|
||||
+
|
||||
+ for (i = 0; i < nr_vma_found; i++) {
|
||||
+ vma_entry_ptr = flex_array_get(fa, i);
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ if (vma_entry_ptr->status & VMA_AREA_VDSO)
|
||||
+ vdso = vma_entry_ptr->start;
|
||||
+#endif
|
||||
+
|
||||
+ if (vma_entry_ptr->status & VMA_AREA_STACK) {
|
||||
+ /* Note if stack is VM_GROWSUP -- it should be reversed */
|
||||
+ start_stack = vma_entry_ptr->start;
|
||||
+ }
|
||||
+
|
||||
+ /* Anything special should be ignored */
|
||||
+ if (!(vma_entry_ptr->status & VMA_AREA_REGULAR))
|
||||
+ continue;
|
||||
+
|
||||
+ /* It's a file mmap'ed */
|
||||
+ if (vma_entry_ptr->fd != -1) {
|
||||
+ file = fget((unsigned int)vma_entry_ptr->fd);
|
||||
+ if (!file) {
|
||||
+ ret = -EBADF;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /* Reuse this field to handle error cases */
|
||||
+ vma_entry_ptr->fd = (__u64)file;
|
||||
+ } else
|
||||
+ file = NULL;
|
||||
+
|
||||
+ down_write(¤t->mm->mmap_sem);
|
||||
+ map_addr = do_mmap(file,
|
||||
+ vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->end - vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->prot,
|
||||
+ vma_entry_ptr->flags | MAP_FIXED,
|
||||
+ vma_entry_ptr->pgoff);
|
||||
+ up_write(¤t->mm->mmap_sem);
|
||||
+
|
||||
+ if (file) {
|
||||
+ fput(file);
|
||||
+ do_close((unsigned int)vma_entry_ptr->fd);
|
||||
+ }
|
||||
+
|
||||
+ if ((unsigned long)(map_addr) >= TASK_SIZE) {
|
||||
+ ret = IS_ERR((void *)map_addr) ? PTR_ERR((void*)map_addr) : -EINVAL;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ nr_vma_mapped++;
|
||||
+ }
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ if (vdso == -1UL) {
|
||||
+ pr_err("elf-ckpt: Can't find VDSO address\n");
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ if (start_stack == -1UL) {
|
||||
+ pr_err("elf-ckpt: Can't find stack VMA\n");
|
||||
+ ret = -ENOEXEC;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /* The name it has before */
|
||||
+ set_task_comm(current, core_entry->task_comm);
|
||||
+
|
||||
+ bprm->p = core_entry->mm_start_stack;
|
||||
+
|
||||
+ current->mm->start_code = core_entry->mm_start_code;
|
||||
+ current->mm->end_code = core_entry->mm_end_code;
|
||||
+ current->mm->start_data = core_entry->mm_start_data;
|
||||
+ current->mm->end_data = core_entry->mm_end_data;
|
||||
+ current->mm->start_stack = core_entry->mm_start_stack;
|
||||
+ current->mm->start_brk = core_entry->mm_start_brk;
|
||||
+ current->mm->brk = core_entry->mm_brk;
|
||||
+
|
||||
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
||||
+ ret = arch_setup_additional_pages_at(bprm, (void *)vdso, 0);
|
||||
+ if (ret) {
|
||||
+ pr_err("elf-ckpt: Can't setup additional pages at %lx with %d\n",
|
||||
+ vdso, ret);
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ /*
|
||||
+ * Restore pages
|
||||
+ */
|
||||
+ off = elf_phdr_pages->p_offset;
|
||||
+ while (1) {
|
||||
+ struct vm_area_struct *vma;
|
||||
+ struct page *page;
|
||||
+ void *page_data;
|
||||
+ __u64 va;
|
||||
+
|
||||
+ ret = kernel_read(bprm->file, off, (char *)&va, sizeof(va));
|
||||
+ if (ret != sizeof(va)) {
|
||||
+ pr_err("elf-ckpt: Can't read page virtual address: "
|
||||
+ "ret = %d off = %lx\n", ret, (unsigned long)off);
|
||||
+ ret = -EIO;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ /* End of pages reached */
|
||||
+ if (!va)
|
||||
+ break;
|
||||
+
|
||||
+ vma = find_vma(current->mm, (unsigned long)va);
|
||||
+ if (!vma) {
|
||||
+ pr_err("elf-ckpt: No VMA for page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -ESRCH;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ ret = get_user_pages(current, current->mm, (unsigned long)va,
|
||||
+ 1, 1, 1, &page, NULL);
|
||||
+ if (ret != 1) {
|
||||
+ pr_err("elf-ckpt: Can't get user page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -EFAULT;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ page_data = kmap(page);
|
||||
+ ret = kernel_read(bprm->file, off + sizeof(va), page_data, PAGE_SIZE);
|
||||
+ kunmap(page);
|
||||
+ put_page(page);
|
||||
+
|
||||
+ if (ret != PAGE_SIZE) {
|
||||
+ pr_err("elf-ckpt: Can't read data on page: %16lx\n", (unsigned long)va);
|
||||
+ ret = -EFAULT;
|
||||
+ goto out_unmap;
|
||||
+ }
|
||||
+
|
||||
+ off += sizeof(va) + PAGE_SIZE;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Architecture specific setup for registers
|
||||
+ * and friends, it's done lately since if
|
||||
+ * an error happened before there is no much
|
||||
+ * point to setup arch-specific things at all.
|
||||
+ */
|
||||
+ ret = load_elf_ckpt_arch(current, regs, core_entry);
|
||||
+ if (ret)
|
||||
+ goto out_unmap;
|
||||
+
|
||||
+ /* We're done */
|
||||
+ ret = 0;
|
||||
+out:
|
||||
+ if (core_entry)
|
||||
+ vfree(core_entry);
|
||||
+
|
||||
+ if (fa)
|
||||
+ flex_array_free(fa);
|
||||
+ return ret;
|
||||
+
|
||||
+out_unmap:
|
||||
+ for (i = 0; i < nr_vma_mapped; i++) {
|
||||
+ vma_entry_ptr = flex_array_get(fa, i);
|
||||
+ down_write(¤t->mm->mmap_sem);
|
||||
+ do_munmap(current->mm, vma_entry_ptr->start,
|
||||
+ vma_entry_ptr->end - vma_entry_ptr->start);
|
||||
+ up_write(¤t->mm->mmap_sem);
|
||||
+ }
|
||||
+
|
||||
+ send_sig(SIGKILL, current, 0);
|
||||
+ goto out;
|
||||
+}
|
||||
Index: linux-2.6.git/fs/exec.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/exec.c
|
||||
+++ linux-2.6.git/fs/exec.c
|
||||
@@ -1071,18 +1071,10 @@ void set_task_comm(struct task_struct *t
|
||||
perf_event_comm(tsk);
|
||||
}
|
||||
|
||||
-int flush_old_exec(struct linux_binprm * bprm)
|
||||
+int flush_exec_keep_thread(struct linux_binprm * bprm)
|
||||
{
|
||||
int retval;
|
||||
|
||||
- /*
|
||||
- * Make sure we have a private signal table and that
|
||||
- * we are unassociated from the previous thread group.
|
||||
- */
|
||||
- retval = de_thread(current);
|
||||
- if (retval)
|
||||
- goto out;
|
||||
-
|
||||
set_mm_exe_file(bprm->mm, bprm->file);
|
||||
|
||||
/*
|
||||
@@ -1101,10 +1093,25 @@ int flush_old_exec(struct linux_binprm *
|
||||
current->personality &= ~bprm->per_clear;
|
||||
|
||||
return 0;
|
||||
-
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
+EXPORT_SYMBOL(flush_exec_keep_thread);
|
||||
+
|
||||
+int flush_old_exec(struct linux_binprm * bprm)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ /*
|
||||
+ * Make sure we have a private signal table and that
|
||||
+ * we are unassociated from the previous thread group.
|
||||
+ */
|
||||
+ retval = de_thread(current);
|
||||
+ if (retval)
|
||||
+ return retval;
|
||||
+
|
||||
+ return flush_exec_keep_thread(bprm);
|
||||
+}
|
||||
EXPORT_SYMBOL(flush_old_exec);
|
||||
|
||||
void would_dump(struct linux_binprm *bprm, struct file *file)
|
||||
Index: linux-2.6.git/include/linux/binfmts.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/binfmts.h
|
||||
+++ linux-2.6.git/include/linux/binfmts.h
|
||||
@@ -110,6 +110,7 @@ extern int prepare_binprm(struct linux_b
|
||||
extern int __must_check remove_arg_zero(struct linux_binprm *);
|
||||
extern int search_binary_handler(struct linux_binprm *, struct pt_regs *);
|
||||
extern int flush_old_exec(struct linux_binprm * bprm);
|
||||
+extern int flush_exec_keep_thread(struct linux_binprm * bprm);
|
||||
extern void setup_new_exec(struct linux_binprm * bprm);
|
||||
extern void would_dump(struct linux_binprm *, struct file *);
|
||||
|
||||
Index: linux-2.6.git/include/linux/elf_ckpt.h
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ linux-2.6.git/include/linux/elf_ckpt.h
|
||||
@@ -0,0 +1,103 @@
|
||||
+#ifndef _LINUX_ELF_CHECKPOINT_H
|
||||
+#define _LINUX_ELF_CHECKPOINT_H
|
||||
+
|
||||
+#ifdef __KERNEL__
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/elf-em.h>
|
||||
+
|
||||
+#include <asm/elf.h>
|
||||
+#include <asm/elf_ckpt.h>
|
||||
+
|
||||
+/*
|
||||
+ * Elf extension includes new Elf file type
|
||||
+ * and program header types as well.
|
||||
+ */
|
||||
+#define ET_CKPT 5
|
||||
+
|
||||
+#define PT_CKPT_OFFSET 0x01010101
|
||||
+
|
||||
+#define PT_CKPT_VMA (PT_LOOS + PT_CKPT_OFFSET + 1)
|
||||
+#define PT_CKPT_CORE (PT_LOOS + PT_CKPT_OFFSET + 2)
|
||||
+#define PT_CKPT_PAGES (PT_LOOS + PT_CKPT_OFFSET + 3)
|
||||
+
|
||||
+#define CKPT_PAGE_SIZE 4096
|
||||
+#define CKPT_TASK_COMM_LEN 16
|
||||
+
|
||||
+#define CKPT_HEADER_VERSION 1
|
||||
+#define CKPT_HEADER_ARCH_X86_64 1
|
||||
+
|
||||
+#define VMA_AREA_REGULAR (1 << 0)
|
||||
+#define VMA_AREA_STACK (1 << 1)
|
||||
+#define VMA_AREA_VSYSCALL (1 << 2)
|
||||
+#define VMA_AREA_VDSO (1 << 3)
|
||||
+#define VMA_FORCE_READ (1 << 4)
|
||||
+#define VMA_AREA_HEAP (1 << 5)
|
||||
+#define VMA_FILE_PRIVATE (1 << 6)
|
||||
+#define VMA_FILE_SHARED (1 << 7)
|
||||
+#define VMA_ANON_SHARED (1 << 8)
|
||||
+#define VMA_ANON_PRIVATE (1 << 9)
|
||||
+#define VMA_FORCE_WRITE (1 << 10)
|
||||
+
|
||||
+struct vma_entry {
|
||||
+ __u64 start;
|
||||
+ __u64 end;
|
||||
+ __u64 pgoff;
|
||||
+ __u32 prot;
|
||||
+ __u32 flags;
|
||||
+ __u32 status; /* from VMA_x above */
|
||||
+ __u32 pid; /* pid VMA belongs to */
|
||||
+ __s64 fd;
|
||||
+ __u64 ino;
|
||||
+ __u32 dev_maj;
|
||||
+ __u32 dev_min;
|
||||
+} __packed;
|
||||
+
|
||||
+struct page_entry {
|
||||
+ __u64 va; /* page virtual address */
|
||||
+ __u8 data[CKPT_PAGE_SIZE]; /* page contents */
|
||||
+} __packed;
|
||||
+
|
||||
+struct image_header {
|
||||
+ __u16 version;
|
||||
+ __u16 arch;
|
||||
+ __u32 flags;
|
||||
+} __packed;
|
||||
+
|
||||
+#define CKPT_ARCH_SIZE (2 * 4096)
|
||||
+#define CKPT_CORE_SIZE (4 * 4096)
|
||||
+
|
||||
+struct core_entry {
|
||||
+ union {
|
||||
+ struct {
|
||||
+ struct image_header header;
|
||||
+ __u8 arch[CKPT_ARCH_SIZE]; /* should be enough for all archs */
|
||||
+ __u32 task_personality;
|
||||
+ __u8 task_comm[CKPT_TASK_COMM_LEN];
|
||||
+ __u32 task_flags;
|
||||
+ __u64 mm_start_code;
|
||||
+ __u64 mm_end_code;
|
||||
+ __u64 mm_start_data;
|
||||
+ __u64 mm_end_data;
|
||||
+ __u64 mm_start_stack;
|
||||
+ __u64 mm_start_brk;
|
||||
+ __u64 mm_brk;
|
||||
+ };
|
||||
+ __u8 __core_pad[CKPT_CORE_SIZE];
|
||||
+ };
|
||||
+} __packed;
|
||||
+
|
||||
+#ifdef CONFIG_BINFMT_ELF_CKPT
|
||||
+extern int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
|
||||
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr);
|
||||
+#else
|
||||
+static inline int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
|
||||
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
|
||||
+{
|
||||
+ return -ENOEXEC;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif /* __KERNEL__ */
|
||||
+
|
||||
+#endif /* _LINUX_ELF_CHECKPOINT_H */
|
@ -1,183 +0,0 @@
|
||||
clone: Introduce the CLONE_CHILD_USEPID functionality
|
||||
|
||||
From: Pavel Emelyanov <xemul@openvz.org>
|
||||
|
||||
When restoring a task (or a set of tasks) we need to recreate them with
|
||||
exactly the same pid as they had before. Thus we need the ability to create
|
||||
a task with specified pid.
|
||||
|
||||
The proposal is to reuse the already free CLONE_STOPPED clone flag.
|
||||
|
||||
About the security implication - this can create some problems with pids
|
||||
wraparound and similar, so this approach can be restricted with the "don't
|
||||
allow for CLONE_CHILD_USEPID when the current pid namespace has ever done
|
||||
real pid allocation". This will work perfectly for checkpoint-restore and
|
||||
will not give anyone chances for screwing pids up on a living system.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
|
||||
---
|
||||
include/linux/pid.h | 2 -
|
||||
include/linux/sched.h | 1
|
||||
kernel/fork.c | 10 ++++++-
|
||||
kernel/pid.c | 70 ++++++++++++++++++++++++++++++++++++--------------
|
||||
4 files changed, 62 insertions(+), 21 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/include/linux/pid.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/pid.h
|
||||
+++ linux-2.6.git/include/linux/pid.h
|
||||
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
|
||||
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
|
||||
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
|
||||
|
||||
-extern struct pid *alloc_pid(struct pid_namespace *ns);
|
||||
+extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
|
||||
extern void free_pid(struct pid *pid);
|
||||
|
||||
/*
|
||||
Index: linux-2.6.git/include/linux/sched.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/sched.h
|
||||
+++ linux-2.6.git/include/linux/sched.h
|
||||
@@ -23,6 +23,7 @@
|
||||
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
|
||||
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
|
||||
and is now available for re-use. */
|
||||
+#define CLONE_CHILD_USEPID 0x02000000 /* use the given pid */
|
||||
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
|
||||
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
|
||||
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
||||
Index: linux-2.6.git/kernel/fork.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/kernel/fork.c
|
||||
+++ linux-2.6.git/kernel/fork.c
|
||||
@@ -1253,8 +1253,16 @@ static struct task_struct *copy_process(
|
||||
goto bad_fork_cleanup_io;
|
||||
|
||||
if (pid != &init_struct_pid) {
|
||||
+ int want_pid = 0;
|
||||
+
|
||||
+ if (clone_flags & CLONE_CHILD_USEPID) {
|
||||
+ retval = get_user(want_pid, child_tidptr);
|
||||
+ if (retval)
|
||||
+ goto bad_fork_cleanup_io;
|
||||
+ }
|
||||
+
|
||||
retval = -ENOMEM;
|
||||
- pid = alloc_pid(p->nsproxy->pid_ns);
|
||||
+ pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
|
||||
if (!pid)
|
||||
goto bad_fork_cleanup_io;
|
||||
}
|
||||
Index: linux-2.6.git/kernel/pid.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/kernel/pid.c
|
||||
+++ linux-2.6.git/kernel/pid.c
|
||||
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_name
|
||||
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
|
||||
}
|
||||
|
||||
-static int alloc_pidmap(struct pid_namespace *pid_ns)
|
||||
+static int alloc_pidmap_page(struct pidmap *map)
|
||||
+{
|
||||
+ if (unlikely(!map->page)) {
|
||||
+ void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
+ /*
|
||||
+ * Free the page if someone raced with us
|
||||
+ * installing it:
|
||||
+ */
|
||||
+ spin_lock_irq(&pidmap_lock);
|
||||
+ if (!map->page) {
|
||||
+ map->page = page;
|
||||
+ page = NULL;
|
||||
+ }
|
||||
+ spin_unlock_irq(&pidmap_lock);
|
||||
+ kfree(page);
|
||||
+ if (unlikely(!map->page))
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
|
||||
+{
|
||||
+ int offset;
|
||||
+ struct pidmap *map;
|
||||
+
|
||||
+ offset = pid & BITS_PER_PAGE_MASK;
|
||||
+ map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
|
||||
+
|
||||
+ if (alloc_pidmap_page(map) < 0)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (!test_and_set_bit(offset, map->page)) {
|
||||
+ atomic_dec(&map->nr_free);
|
||||
+ return pid;
|
||||
+ }
|
||||
+
|
||||
+ return -EBUSY;
|
||||
+}
|
||||
+
|
||||
+static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
|
||||
{
|
||||
int i, offset, max_scan, pid, last = pid_ns->last_pid;
|
||||
struct pidmap *map;
|
||||
|
||||
+ if (desired_pid)
|
||||
+ return set_pidmap(pid_ns, desired_pid);
|
||||
+
|
||||
pid = last + 1;
|
||||
if (pid >= pid_max)
|
||||
pid = RESERVED_PIDS;
|
||||
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_names
|
||||
*/
|
||||
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
|
||||
for (i = 0; i <= max_scan; ++i) {
|
||||
- if (unlikely(!map->page)) {
|
||||
- void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
- /*
|
||||
- * Free the page if someone raced with us
|
||||
- * installing it:
|
||||
- */
|
||||
- spin_lock_irq(&pidmap_lock);
|
||||
- if (!map->page) {
|
||||
- map->page = page;
|
||||
- page = NULL;
|
||||
- }
|
||||
- spin_unlock_irq(&pidmap_lock);
|
||||
- kfree(page);
|
||||
- if (unlikely(!map->page))
|
||||
- break;
|
||||
- }
|
||||
+ if (alloc_pidmap_page(map) < 0)
|
||||
+ break;
|
||||
+
|
||||
if (likely(atomic_read(&map->nr_free))) {
|
||||
do {
|
||||
if (!test_and_set_bit(offset, map->page)) {
|
||||
@@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
}
|
||||
|
||||
-struct pid *alloc_pid(struct pid_namespace *ns)
|
||||
+struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
|
||||
{
|
||||
struct pid *pid;
|
||||
enum pid_type type;
|
||||
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespa
|
||||
|
||||
tmp = ns;
|
||||
for (i = ns->level; i >= 0; i--) {
|
||||
- nr = alloc_pidmap(tmp);
|
||||
+ nr = alloc_pidmap(tmp, this_ns_pid);
|
||||
if (nr < 0)
|
||||
goto out_free;
|
||||
|
||||
pid->numbers[i].nr = nr;
|
||||
pid->numbers[i].ns = tmp;
|
||||
tmp = tmp->parent;
|
||||
+ this_ns_pid = 0;
|
||||
}
|
||||
|
||||
get_pid_ns(ns);
|
@ -1,27 +0,0 @@
|
||||
vfs: Add ->statfs callback for pipefs
|
||||
|
||||
From: Pavel Emelyanov <xemul@parallels.com>
|
||||
|
||||
This is done to make it possible to distinguish pipes
|
||||
from fifos when opening one via /proc/<pid>/fd/ link.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Reviewed-by: Tejun Heo <tj@kernel.org>
|
||||
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/pipe.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
Index: linux-2.6.git/fs/pipe.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/pipe.c
|
||||
+++ linux-2.6.git/fs/pipe.c
|
||||
@@ -1254,6 +1254,7 @@ out:
|
||||
|
||||
static const struct super_operations pipefs_ops = {
|
||||
.destroy_inode = free_inode_nonrcu,
|
||||
+ .statfs = simple_statfs,
|
||||
};
|
||||
|
||||
/*
|
@ -1,85 +0,0 @@
|
||||
fs: Add do_close helper
|
||||
|
||||
To be able to close file descriptors right from inside
|
||||
kernel space do_close() helper is added.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/open.c | 32 ++++++++++++++++++++------------
|
||||
include/linux/fs.h | 1 +
|
||||
2 files changed, 21 insertions(+), 12 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/open.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/open.c
|
||||
+++ linux-2.6.git/fs/open.c
|
||||
@@ -1056,17 +1056,11 @@ int filp_close(struct file *filp, fl_own
|
||||
|
||||
EXPORT_SYMBOL(filp_close);
|
||||
|
||||
-/*
|
||||
- * Careful here! We test whether the file pointer is NULL before
|
||||
- * releasing the fd. This ensures that one clone task can't release
|
||||
- * an fd while another clone is opening it.
|
||||
- */
|
||||
-SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
+int do_close(unsigned int fd)
|
||||
{
|
||||
struct file * filp;
|
||||
struct files_struct *files = current->files;
|
||||
struct fdtable *fdt;
|
||||
- int retval;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
fdt = files_fdtable(files);
|
||||
@@ -1079,7 +1073,25 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
FD_CLR(fd, fdt->close_on_exec);
|
||||
__put_unused_fd(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
- retval = filp_close(filp, files);
|
||||
+
|
||||
+ return filp_close(filp, files);
|
||||
+
|
||||
+out_unlock:
|
||||
+ spin_unlock(&files->file_lock);
|
||||
+ return -EBADF;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(do_close);
|
||||
+
|
||||
+/*
|
||||
+ * Careful here! We test whether the file pointer is NULL before
|
||||
+ * releasing the fd. This ensures that one clone task can't release
|
||||
+ * an fd while another clone is opening it.
|
||||
+ */
|
||||
+SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = do_close(fd);
|
||||
|
||||
/* can't restart close syscall because file table entry was cleared */
|
||||
if (unlikely(retval == -ERESTARTSYS ||
|
||||
@@ -1089,10 +1101,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
retval = -EINTR;
|
||||
|
||||
return retval;
|
||||
-
|
||||
-out_unlock:
|
||||
- spin_unlock(&files->file_lock);
|
||||
- return -EBADF;
|
||||
}
|
||||
EXPORT_SYMBOL(sys_close);
|
||||
|
||||
Index: linux-2.6.git/include/linux/fs.h
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/include/linux/fs.h
|
||||
+++ linux-2.6.git/include/linux/fs.h
|
||||
@@ -2025,6 +2025,7 @@ extern struct file *file_open_root(struc
|
||||
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
|
||||
const struct cred *);
|
||||
extern int filp_close(struct file *, fl_owner_t id);
|
||||
+extern int do_close(unsigned int fd);
|
||||
extern char * getname(const char __user *);
|
||||
|
||||
/* fs/ioctl.c */
|
@ -1,49 +0,0 @@
|
||||
fs, proc: Add /proc/$pid/tls entry
|
||||
|
||||
To be able to restart checkpointed tasks we need
|
||||
to know TLS status at dumping time. Export this
|
||||
information by /proc/$pid/tls entry.
|
||||
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
---
|
||||
fs/proc/base.c | 20 ++++++++++++++++++++
|
||||
1 file changed, 20 insertions(+)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -3150,6 +3150,23 @@ static int proc_pid_personality(struct s
|
||||
return err;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_X86
|
||||
+static int proc_pid_tls(struct seq_file *m, struct pid_namespace *ns,
|
||||
+ struct pid *pid, struct task_struct *task)
|
||||
+{
|
||||
+ int err = lock_trace(task);
|
||||
+ if (!err) {
|
||||
+ int i;
|
||||
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
||||
+ seq_printf(m, "%x %x\n",
|
||||
+ task->thread.tls_array[i].a,
|
||||
+ task->thread.tls_array[i].b);
|
||||
+ unlock_trace(task);
|
||||
+ }
|
||||
+ return err;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Thread groups
|
||||
*/
|
||||
@@ -3169,6 +3186,9 @@ static const struct pid_entry tgid_base_
|
||||
INF("auxv", S_IRUSR, proc_pid_auxv),
|
||||
ONE("status", S_IRUGO, proc_pid_status),
|
||||
ONE("personality", S_IRUGO, proc_pid_personality),
|
||||
+#ifdef CONFIG_X86
|
||||
+ ONE("tls", S_IRUGO, proc_pid_tls),
|
||||
+#endif
|
||||
INF("limits", S_IRUGO, proc_pid_limits),
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
|
@ -1,28 +0,0 @@
|
||||
From: Vasiliy Kulikov <segooon@gmail.com>
|
||||
|
||||
In the patch "proc: fix races against execve() of /proc/PID/fd**"
|
||||
proc_pid_fd_link_getattr() leaked task_struct if ptrace check fails.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff -puN fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix fs/proc/base.c
|
||||
--- a/fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix
|
||||
+++ a/fs/proc/base.c
|
||||
@@ -1681,9 +1681,9 @@ static int proc_pid_fd_link_getattr(stru
|
||||
|
||||
generic_fillattr(inode, stat);
|
||||
unlock_trace(task);
|
||||
- put_task_struct(task);
|
||||
rc = 0;
|
||||
out_task:
|
||||
+ put_task_struct(task);
|
||||
return rc;
|
||||
}
|
||||
|
||||
_
|
@ -1,255 +0,0 @@
|
||||
From: Vasiliy Kulikov <segoon@openwall.com>
|
||||
|
||||
fd* files are restricted to the task's owner, and other users may not get
|
||||
direct access to them. But one may open any of these files and run any
|
||||
setuid program, keeping opened file descriptors. As there are permission
|
||||
checks on open(), but not on readdir() and read(), operations on the kept
|
||||
file descriptors will not be checked. It makes it possible to violate
|
||||
procfs permission model.
|
||||
|
||||
Reading fdinfo/* may disclosure current fds' position and flags, reading
|
||||
directory contents of fdinfo/ and fd/ may disclosure the number of opened
|
||||
files by the target task. This information is not sensible per se, but it
|
||||
can reveal some private information (like length of a password stored in a
|
||||
file) under certain conditions.
|
||||
|
||||
Used existing (un)lock_trace functions to check for ptrace_may_access(),
|
||||
but instead of using EPERM return code from it use EACCES to be consistent
|
||||
with existing proc_pid_follow_link()/proc_pid_readlink() return code. If
|
||||
they differ, attacker can guess what fds exist by analyzing stat() return
|
||||
code. Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
|
||||
readdir() and lookup() for fd/ and fdinfo/.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Cc: <stable@kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 146 ++++++++++++++++++++++++++++++++++++++++-----------------
|
||||
1 file changed, 103 insertions(+), 43 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -1665,12 +1665,46 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
+static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
+ struct kstat *stat)
|
||||
+{
|
||||
+ struct inode *inode = dentry->d_inode;
|
||||
+ struct task_struct *task = get_proc_task(inode);
|
||||
+ int rc;
|
||||
+
|
||||
+ if (task == NULL)
|
||||
+ return -ESRCH;
|
||||
+
|
||||
+ rc = -EACCES;
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_task;
|
||||
+
|
||||
+ generic_fillattr(inode, stat);
|
||||
+ unlock_trace(task);
|
||||
+ put_task_struct(task);
|
||||
+ rc = 0;
|
||||
+out_task:
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
static const struct inode_operations proc_pid_link_inode_operations = {
|
||||
.readlink = proc_pid_readlink,
|
||||
.follow_link = proc_pid_follow_link,
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
+static const struct inode_operations proc_fdinfo_link_inode_operations = {
|
||||
+ .setattr = proc_setattr,
|
||||
+ .getattr = proc_pid_fd_link_getattr,
|
||||
+};
|
||||
+
|
||||
+static const struct inode_operations proc_fd_link_inode_operations = {
|
||||
+ .readlink = proc_pid_readlink,
|
||||
+ .follow_link = proc_pid_follow_link,
|
||||
+ .setattr = proc_setattr,
|
||||
+ .getattr = proc_pid_fd_link_getattr,
|
||||
+};
|
||||
+
|
||||
|
||||
/* building an inode */
|
||||
|
||||
@@ -1902,49 +1936,61 @@ out:
|
||||
|
||||
static int proc_fd_info(struct inode *inode, struct path *path, char *info)
|
||||
{
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
- struct files_struct *files = NULL;
|
||||
+ struct task_struct *task;
|
||||
+ struct files_struct *files;
|
||||
struct file *file;
|
||||
int fd = proc_fd(inode);
|
||||
+ int rc;
|
||||
|
||||
- if (task) {
|
||||
- files = get_files_struct(task);
|
||||
- put_task_struct(task);
|
||||
- }
|
||||
- if (files) {
|
||||
- /*
|
||||
- * We are not taking a ref to the file structure, so we must
|
||||
- * hold ->file_lock.
|
||||
- */
|
||||
- spin_lock(&files->file_lock);
|
||||
- file = fcheck_files(files, fd);
|
||||
- if (file) {
|
||||
- unsigned int f_flags;
|
||||
- struct fdtable *fdt;
|
||||
-
|
||||
- fdt = files_fdtable(files);
|
||||
- f_flags = file->f_flags & ~O_CLOEXEC;
|
||||
- if (FD_ISSET(fd, fdt->close_on_exec))
|
||||
- f_flags |= O_CLOEXEC;
|
||||
-
|
||||
- if (path) {
|
||||
- *path = file->f_path;
|
||||
- path_get(&file->f_path);
|
||||
- }
|
||||
- if (info)
|
||||
- snprintf(info, PROC_FDINFO_MAX,
|
||||
- "pos:\t%lli\n"
|
||||
- "flags:\t0%o\n",
|
||||
- (long long) file->f_pos,
|
||||
- f_flags);
|
||||
- spin_unlock(&files->file_lock);
|
||||
- put_files_struct(files);
|
||||
- return 0;
|
||||
+ task = get_proc_task(inode);
|
||||
+ if (!task)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ rc = -EACCES;
|
||||
+ if (lock_trace(task))
|
||||
+ goto out_task;
|
||||
+
|
||||
+ rc = -ENOENT;
|
||||
+ files = get_files_struct(task);
|
||||
+ if (files == NULL)
|
||||
+ goto out_unlock;
|
||||
+
|
||||
+ /*
|
||||
+ * We are not taking a ref to the file structure, so we must
|
||||
+ * hold ->file_lock.
|
||||
+ */
|
||||
+ spin_lock(&files->file_lock);
|
||||
+ file = fcheck_files(files, fd);
|
||||
+ if (file) {
|
||||
+ unsigned int f_flags;
|
||||
+ struct fdtable *fdt;
|
||||
+
|
||||
+ fdt = files_fdtable(files);
|
||||
+ f_flags = file->f_flags & ~O_CLOEXEC;
|
||||
+ if (FD_ISSET(fd, fdt->close_on_exec))
|
||||
+ f_flags |= O_CLOEXEC;
|
||||
+
|
||||
+ if (path) {
|
||||
+ *path = file->f_path;
|
||||
+ path_get(&file->f_path);
|
||||
}
|
||||
- spin_unlock(&files->file_lock);
|
||||
- put_files_struct(files);
|
||||
- }
|
||||
- return -ENOENT;
|
||||
+ if (info)
|
||||
+ snprintf(info, PROC_FDINFO_MAX,
|
||||
+ "pos:\t%lli\n"
|
||||
+ "flags:\t0%o\n",
|
||||
+ (long long) file->f_pos,
|
||||
+ f_flags);
|
||||
+ rc = 0;
|
||||
+ } else
|
||||
+ rc = -ENOENT;
|
||||
+ spin_unlock(&files->file_lock);
|
||||
+ put_files_struct(files);
|
||||
+
|
||||
+out_unlock:
|
||||
+ unlock_trace(task);
|
||||
+out_task:
|
||||
+ put_task_struct(task);
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static int proc_fd_link(struct inode *inode, struct path *path)
|
||||
@@ -2039,7 +2085,7 @@ static struct dentry *proc_fd_instantiat
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
- inode->i_op = &proc_pid_link_inode_operations;
|
||||
+ inode->i_op = &proc_fd_link_inode_operations;
|
||||
inode->i_size = 64;
|
||||
ei->op.proc_get_link = proc_fd_link;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
@@ -2071,7 +2117,12 @@ static struct dentry *proc_lookupfd_comm
|
||||
if (fd == ~0U)
|
||||
goto out;
|
||||
|
||||
+ result = ERR_PTR(-EACCES);
|
||||
+ if (lock_trace(task))
|
||||
+ goto out;
|
||||
+
|
||||
result = instantiate(dir, dentry, task, &fd);
|
||||
+ unlock_trace(task);
|
||||
out:
|
||||
put_task_struct(task);
|
||||
out_no_task:
|
||||
@@ -2091,23 +2142,28 @@ static int proc_readfd_common(struct fil
|
||||
retval = -ENOENT;
|
||||
if (!p)
|
||||
goto out_no_task;
|
||||
+
|
||||
+ retval = -EACCES;
|
||||
+ if (lock_trace(p))
|
||||
+ goto out;
|
||||
+
|
||||
retval = 0;
|
||||
|
||||
fd = filp->f_pos;
|
||||
switch (fd) {
|
||||
case 0:
|
||||
if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
filp->f_pos++;
|
||||
case 1:
|
||||
ino = parent_ino(dentry);
|
||||
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
filp->f_pos++;
|
||||
default:
|
||||
files = get_files_struct(p);
|
||||
if (!files)
|
||||
- goto out;
|
||||
+ goto out_unlock;
|
||||
rcu_read_lock();
|
||||
for (fd = filp->f_pos-2;
|
||||
fd < files_fdtable(files)->max_fds;
|
||||
@@ -2131,6 +2187,9 @@ static int proc_readfd_common(struct fil
|
||||
rcu_read_unlock();
|
||||
put_files_struct(files);
|
||||
}
|
||||
+
|
||||
+out_unlock:
|
||||
+ unlock_trace(p);
|
||||
out:
|
||||
put_task_struct(p);
|
||||
out_no_task:
|
||||
@@ -2208,6 +2267,7 @@ static struct dentry *proc_fdinfo_instan
|
||||
ei->fd = fd;
|
||||
inode->i_mode = S_IFREG | S_IRUSR;
|
||||
inode->i_fop = &proc_fdinfo_file_operations;
|
||||
+ inode->i_op = &proc_fdinfo_link_inode_operations;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
d_add(dentry, inode);
|
||||
/* Close the race of the process dying before we return the dentry */
|
@ -1,118 +0,0 @@
|
||||
From: Vasiliy Kulikov <segoon@openwall.com>
|
||||
|
||||
The patch "proc: fix races against execve() of /proc/PID/fd**" is still a
|
||||
partial fix for a setxid problem. link(2) is a yet another way to
|
||||
identify whether a specific fd is opened by a privileged process. By
|
||||
calling link(2) against /proc/PID/fd/* an attacker may identify whether
|
||||
the fd number is valid for PID by analysing link(2) return code.
|
||||
|
||||
Both getattr() and link() can be used by the attacker iff the dentry is
|
||||
present in the dcache. In this case ->lookup() is not called and the only
|
||||
way to check ptrace permissions is either operation handler or
|
||||
->revalidate(). The easiest solution to prevent any unauthorized access
|
||||
to /proc/PID/fd*/ files is to force the dentry drop on each unauthorized
|
||||
access attempt.
|
||||
|
||||
If an attacker keeps opened fd of /proc/PID/fd/ and dcache contains a
|
||||
specific dentry for some /proc/PID/fd/XXX, any future attemp to use the
|
||||
dentry by the attacker would lead to the dentry drop as a result of a
|
||||
failed ptrace check in ->revalidate(). Then the attacker cannot spawn a
|
||||
dentry for the specific fd number because of ptrace check in ->lookup().
|
||||
|
||||
The dentry drop can be still observed by an attacker by analysing
|
||||
information from /proc/slabinfo, which is addressed in the successive
|
||||
patch.
|
||||
|
||||
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
|
||||
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
|
||||
Cc: Al Viro <viro@zeniv.linux.org.uk>
|
||||
Cc: Christoph Lameter <cl@linux-foundation.org>
|
||||
Cc: Pekka Enberg <penberg@kernel.org>
|
||||
Cc: Matt Mackall <mpm@selenic.com>
|
||||
Cc: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/base.c | 42 ++++++------------------------------------
|
||||
1 file changed, 6 insertions(+), 36 deletions(-)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/base.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/base.c
|
||||
+++ linux-2.6.git/fs/proc/base.c
|
||||
@@ -1665,46 +1665,12 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
-static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
- struct kstat *stat)
|
||||
-{
|
||||
- struct inode *inode = dentry->d_inode;
|
||||
- struct task_struct *task = get_proc_task(inode);
|
||||
- int rc;
|
||||
-
|
||||
- if (task == NULL)
|
||||
- return -ESRCH;
|
||||
-
|
||||
- rc = -EACCES;
|
||||
- if (lock_trace(task))
|
||||
- goto out_task;
|
||||
-
|
||||
- generic_fillattr(inode, stat);
|
||||
- unlock_trace(task);
|
||||
- rc = 0;
|
||||
-out_task:
|
||||
- put_task_struct(task);
|
||||
- return rc;
|
||||
-}
|
||||
-
|
||||
static const struct inode_operations proc_pid_link_inode_operations = {
|
||||
.readlink = proc_pid_readlink,
|
||||
.follow_link = proc_pid_follow_link,
|
||||
.setattr = proc_setattr,
|
||||
};
|
||||
|
||||
-static const struct inode_operations proc_fdinfo_link_inode_operations = {
|
||||
- .setattr = proc_setattr,
|
||||
- .getattr = proc_pid_fd_link_getattr,
|
||||
-};
|
||||
-
|
||||
-static const struct inode_operations proc_fd_link_inode_operations = {
|
||||
- .readlink = proc_pid_readlink,
|
||||
- .follow_link = proc_pid_follow_link,
|
||||
- .setattr = proc_setattr,
|
||||
- .getattr = proc_pid_fd_link_getattr,
|
||||
-};
|
||||
-
|
||||
|
||||
/* building an inode */
|
||||
|
||||
@@ -2013,6 +1979,11 @@ static int tid_fd_revalidate(struct dent
|
||||
task = get_proc_task(inode);
|
||||
fd = proc_fd(inode);
|
||||
|
||||
+ if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
|
||||
+ put_task_struct(task);
|
||||
+ task = NULL;
|
||||
+ }
|
||||
+
|
||||
if (task) {
|
||||
files = get_files_struct(task);
|
||||
if (files) {
|
||||
@@ -2085,7 +2056,7 @@ static struct dentry *proc_fd_instantiat
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
- inode->i_op = &proc_fd_link_inode_operations;
|
||||
+ inode->i_op = &proc_pid_link_inode_operations;
|
||||
inode->i_size = 64;
|
||||
ei->op.proc_get_link = proc_fd_link;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
@@ -2267,7 +2238,6 @@ static struct dentry *proc_fdinfo_instan
|
||||
ei->fd = fd;
|
||||
inode->i_mode = S_IFREG | S_IRUSR;
|
||||
inode->i_fop = &proc_fdinfo_file_operations;
|
||||
- inode->i_op = &proc_fdinfo_link_inode_operations;
|
||||
d_set_d_op(dentry, &tid_fd_dentry_operations);
|
||||
d_add(dentry, inode);
|
||||
/* Close the race of the process dying before we return the dentry */
|
@ -1,26 +0,0 @@
|
||||
From: Pavel Emelyanov <xemul@openvz.org>
|
||||
|
||||
On reading sysctl dirs we should return -EISDIR instead of -EINVAL.
|
||||
|
||||
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
|
||||
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
|
||||
Cc: Alexey Dobriyan <adobriyan@gmail.com>
|
||||
Cc: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
|
||||
fs/proc/proc_sysctl.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
Index: linux-2.6.git/fs/proc/proc_sysctl.c
|
||||
===================================================================
|
||||
--- linux-2.6.git.orig/fs/proc/proc_sysctl.c
|
||||
+++ linux-2.6.git/fs/proc/proc_sysctl.c
|
||||
@@ -370,6 +370,7 @@ static const struct file_operations proc
|
||||
};
|
||||
|
||||
static const struct file_operations proc_sys_dir_file_operations = {
|
||||
+ .read = generic_read_dir,
|
||||
.readdir = proc_sys_readdir,
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
@ -1,13 +0,0 @@
|
||||
The kernel patches series. See "series" file to obtain
|
||||
order of appliance. Not all patches do address C/R directly
|
||||
but some of them are needed due to dependencies.
|
||||
|
||||
The following patches are known to be in -mm tree already
|
||||
|
||||
procfs-report-eisdir-when-reading-sysctl-dirs-in-proc.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd-fix.patch
|
||||
proc-force-dcache-drop-on-unauthorized-access.patch
|
||||
cr-statfs-callback-for-pipefs
|
||||
|
||||
Has been tested on Linux 3.1-rc3.
|
@ -1,13 +0,0 @@
|
||||
procfs-report-eisdir-when-reading-sysctl-dirs-in-proc.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd.patch
|
||||
proc-fix-races-against-execve-of-proc-pid-fd-fix.patch
|
||||
proc-force-dcache-drop-on-unauthorized-access.patch
|
||||
cr-statfs-callback-for-pipefs
|
||||
fs-proc-switch-to-dentry
|
||||
cr-proc-map-files-21
|
||||
cr-clone-with-pid-support
|
||||
cr-proc-add-children
|
||||
fs-add-do-close
|
||||
fs-proc-add-tls
|
||||
fs-proc-add-mm-task-stat
|
||||
binfmt-elf-for-cr-5
|
Loading…
x
Reference in New Issue
Block a user