2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-30 05:48:05 +00:00

rseq: fail dump if rseq is used but host doesn't support get_rseq_conf feature

A lot of kernel versions lacks support for ptrace(PTRACE_GET_RSEQ_CONFIGURATION).
But the userspace may be fresh (for instance containers with fresh Fedora runs
on CentOS 7 host). Consider two scenarious:

- kernel has no ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support

1. there is a process which use rseq => fail dump
2. there is no process which use rseq => we can dump without any problems

But how to determine if process use rseq or not without get_rseq_conf feature?
Let's just try to do rseq registration from the parasite. If rseq is already
registered then we'll got EBUSY error. If not we'll success in registration.

Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
This commit is contained in:
Alexander Mikhalitsyn 2022-04-08 23:04:08 +03:00 committed by Andrei Vagin
parent f81e3062ca
commit c5162cef52
4 changed files with 146 additions and 0 deletions

View File

@ -188,6 +188,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
return 0;
}
static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq)
{
if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf)
return 0;
pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited);
/*
* We have no kdat.has_ptrace_get_rseq_conf and user
* process has rseq() used, let's fail dump.
*/
if (ti_rseq->rseq_inited) {
pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid);
return -1;
}
return 0;
}
struct cr_imgset *glob_imgset;
static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds)
@ -718,6 +737,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
if (!ret)
ret = seccomp_dump_thread(pid, tc);
/*
* We are dumping rseq() in the dump_thread_rseq() function,
* *before* processes gets infected (because of ptrace requests
* API restriction). At this point, if the kernel lacks
* kdat.has_ptrace_get_rseq_conf support we have to ensure
* that dumpable processes haven't initialized rseq() or
* fail dump if rseq() was used.
*/
if (!ret)
ret = check_thread_rseq(pid, &ti->rseq);
return ret;
}

View File

@ -164,10 +164,17 @@ struct parasite_dump_creds {
unsigned int groups[0];
};
struct parasite_check_rseq {
bool has_rseq;
bool has_ptrace_get_rseq_conf; /* no need to check if supported */
bool rseq_inited;
};
struct parasite_dump_thread {
unsigned int *tid_addr;
pid_t tid;
tls_t tls;
struct parasite_check_rseq rseq;
stack_t sas;
int pdeath_sig;
char comm[TASK_COMM_LEN];

View File

@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c
return ce->groups ? 0 : -ENOMEM;
}
static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq)
{
rseq->has_rseq = kdat.has_rseq;
rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf;
rseq->rseq_inited = false;
}
int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core)
{
ThreadCoreEntry *tc = core->thread_core;
@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn
pc = args->creds;
pc->cap_last_cap = kdat.last_cap;
init_parasite_rseq_arg(&args->rseq);
ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl);
if (ret < 0)
return ret;
@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit
compel_arch_get_tls_thread(tctl, &args->tls);
init_parasite_rseq_arg(&args->rseq);
ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD);
if (ret) {
pr_err("Can't init thread in parasite %d\n", pid);

View File

@ -169,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args)
}
static int dump_creds(struct parasite_dump_creds *args);
static int check_rseq(struct parasite_check_rseq *rseq);
static int dump_thread_common(struct parasite_dump_thread *ti)
{
@ -199,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti)
goto out;
}
ret = check_rseq(&ti->rseq);
if (ret) {
pr_err("Unable to check if rseq() is initialized: %d\n", ret);
goto out;
}
ret = dump_creds(ti->creds);
out:
return ret;
@ -315,6 +322,97 @@ grps_err:
return -1;
}
static int check_rseq(struct parasite_check_rseq *rseq)
{
int ret;
unsigned long rseq_abi_pointer;
unsigned long rseq_abi_size;
uint32_t rseq_signature;
void *addr;
/* no need to do hacky check if we can get all info from ptrace() */
if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf)
return 0;
/*
* We need to determine if victim process has rseq()
* initialized, but we have no *any* proper kernel interface
* supported at this point.
* Our plan:
* 1. We know that if we call rseq() syscall and process already
* has current->rseq filled, then we get:
* -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq),
* -EPERM if current->rseq_sig != sig),
* -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) &&
* current->rseq_sig != sig
* if current->rseq == NULL (rseq() wasn't used) then we go to:
* IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it
* will be hard to distinguish case when rseq() was initialized or not.
* Let's construct arguments payload
* with:
* 1. correct rseq_abi_size
* 2. aligned and correct rseq_abi_pointer
* And see what rseq() return to us.
* If ret value is:
* 0: it means that rseq *wasn't* used and we successfully registered it,
* -EINVAL or : it means that rseq is already initialized,
* so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false,
* we should just fail dump as it's unsafe to skip rseq() dump for processes
* with rseq() initialized.
* -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq
*/
addr = (void *)sys_mmap(NULL, sizeof(struct criu_rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
if (addr == MAP_FAILED) {
pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr);
return -1;
}
memset(addr, 0, sizeof(struct criu_rseq));
/* sys_mmap returns page aligned addresses */
rseq_abi_pointer = (unsigned long)addr;
rseq_abi_size = (unsigned long)sizeof(struct criu_rseq);
/* it's not so important to have unique signature for us,
* because rseq_abi_pointer is guaranteed to be unique
*/
rseq_signature = 0x12345612;
pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
if (ret) {
if (ret == -EINVAL) {
pr_info("\trseq is initialized in the victim\n");
rseq->rseq_inited = true;
ret = 0;
} else {
pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
rseq_abi_size, 0, rseq_signature, ret);
ret = -1;
}
} else {
ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct criu_rseq), RSEQ_FLAG_UNREGISTER,
rseq_signature);
if (ret) {
pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret);
ret = -1;
/* we can't do munmap() because rseq is registered and we failed to unregister it */
goto out_nounmap;
}
rseq->rseq_inited = false;
ret = 0;
}
sys_munmap(addr, sizeof(struct criu_rseq));
out_nounmap:
return ret;
}
static int fill_fds_fown(int fd, struct fd_opts *p)
{
int flags, ret;