2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 01:51:51 +00:00

criu: shstk: prepare shadow stack parameters for restorer blob

Shadow stacks must be populated using special WRSS instruction. This
instruction is only available when shadow stack is enabled, calling it
with disabled shadow stack causes #UD.

Moreover, shadow stack VMAs cannot be mremap()ed and they must be
created using map_shadow_stack() system call. This requires delaying the
restore of shadow stacks to restorer blob after the CRIU mappings are
cleared.

Introduce rst_shstk_info structure to hold shadow stack parameters
required in the restorer blob and populate this structure in
arch_prepare_shstk() method.

Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
This commit is contained in:
Mike Rapoport (IBM) 2022-05-25 10:30:06 +03:00 committed by Andrei Vagin
parent 4b6dda7ec0
commit 2ebd1a4f0b
7 changed files with 185 additions and 0 deletions

View File

@ -9,6 +9,7 @@ obj-y += cpu.o
obj-y += crtools.o
obj-y += kerndat.o
obj-y += sigframe.o
obj-y += shstk.o
ifeq ($(CONFIG_COMPAT),y)
obj-y += sigaction_compat.o
endif

View File

@ -8,6 +8,7 @@
#include <compel/plugins/std/syscall-codes.h>
#include <compel/asm/sigframe.h>
#include "asm/compat.h"
#include "asm/shstk.h"
#ifdef CONFIG_COMPAT
extern void restore_tls(tls_t *ptls);

View File

@ -0,0 +1,69 @@
#ifndef __CR_ASM_SHSTK_H__
#define __CR_ASM_SHSTK_H__
/*
* Shadow stack constants from Linux
*/
/* arch/x86/include/uapi/asm/mman.h */
#ifndef SHADOW_STACK_SET_TOKEN
#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */
#endif
/* arch/x86/include/uapi/asm/prctl.h */
#define ARCH_SHSTK_ENABLE 0x5001
#define ARCH_SHSTK_DISABLE 0x5002
#define ARCH_SHSTK_LOCK 0x5003
#define ARCH_SHSTK_UNLOCK 0x5004
#define ARCH_SHSTK_STATUS 0x5005
#define ARCH_SHSTK_SHSTK (1ULL << 0)
#define ARCH_SHSTK_WRSS (1ULL << 1)
#define ARCH_HAS_SHSTK
/* from arch/x86/kernel/shstk.c */
#define SHSTK_DATA_BIT (1UL << 63) /* BIT(63) */
/*
* Shadow stack memory cannot be restored with memcpy/pread but only using
* a special instruction that can write to shadow stack.
* That instruction is only available when shadow stack is enabled,
* otherwise it causes #UD.
*
* Also, shadow stack VMAs cannot be mmap()ed or mrepmap()ed, they must be
* created using map_shadow_stack() system call. This pushes creation of
* shadow stack VMAs to the restorer blob after CRIU mappings are freed.
*
* And there is an additional jungling with shadow stacks to ensure that we
* don't unmap an active shadow stack
*
* The overall sequence of restoring shadow stack is
* - Enable shadow stack early after clone()ing the task
* - Unlock shadow stack features using ptrace
* - In the restorer blob:
* - switch to a temporary shadow stack to be able to unmap shadow stack
* with the CRIU mappings
* - after memory mappigns are restored, recreate shadow stack VMAs,
* populate them using wrss instruction and switch to the task shadow
* stack
* - lock shadow stack features
*/
struct rst_shstk_info {
unsigned long vma_start; /* start of shadow stack VMA */
unsigned long vma_size; /* size of shadow stack VMA */
unsigned long premmaped_addr; /* address of shadow stack copy in
the premmaped area */
unsigned long tmp_shstk; /* address of temporary shadow stack */
u64 ssp; /* shadow stack pointer */
u64 cet; /* CET conrtol state */
};
#define rst_shstk_info rst_shstk_info
struct task_restore_args;
struct pstree_item;
int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_shstk_prepare
#endif /* __CR_ASM_SHSTK_H__ */

90
criu/arch/x86/shstk.c Normal file
View File

@ -0,0 +1,90 @@
#include <common/list.h>
#include <compel/cpu.h>
#include "pstree.h"
#include "restorer.h"
#include "rst-malloc.h"
#include "vma.h"
static bool task_needs_shstk(struct pstree_item *item, CoreEntry *core)
{
UserX86FpregsEntry *fpregs;
if (!task_alive(item))
return false;
fpregs = core->thread_info->fpregs;
if (fpregs->xsave && fpregs->xsave->cet) {
if (!compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
pr_warn_once("Restoring task with shadow stack on non-CET machine\n");
return false;
}
if (fpregs->xsave->cet->cet & ARCH_SHSTK_SHSTK)
return true;
}
return false;
}
static int shstk_prepare_task(struct vm_area_list *vmas,
struct rst_shstk_info *shstk)
{
struct vma_area *vma;
list_for_each_entry(vma, &vmas->h, list) {
if (vma_area_is(vma, VMA_AREA_SHSTK) &&
in_vma_area(vma, shstk->ssp)) {
unsigned long premmaped_addr = vma->premmaped_addr;
unsigned long size = vma_area_len(vma);
shstk->vma_start = vma->e->start;
shstk->vma_size = size;
shstk->premmaped_addr = premmaped_addr;
shstk->tmp_shstk = premmaped_addr + size;
break;
}
}
return 0;
}
int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta)
{
struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
UserX86FpregsEntry *fpregs = core->thread_info->fpregs;
struct vm_area_list *vmas = &rsti(item)->vmas;
struct rst_shstk_info *shstk = &ta->shstk;
int i;
if (!task_needs_shstk(item, core))
return 0;
shstk->cet = fpregs->xsave->cet->cet;
shstk->ssp = fpregs->xsave->cet->ssp;
if (shstk_prepare_task(vmas, shstk)) {
pr_err("Failed to prepare shadow stack memory\n");
return -1;
}
for (i = 0; i < item->nr_threads; i++) {
struct thread_restore_args *thread_args = &args_array[i];
core = item->core[i];
fpregs = core->thread_info->fpregs;
shstk = &thread_args->shstk;
shstk->cet = fpregs->xsave->cet->cet;
shstk->ssp = fpregs->xsave->cet->ssp;
if (shstk_prepare_task(vmas, shstk)) {
pr_err("Failed to prepare shadow stack memory\n");
return -1;
}
}
return 0;
}

View File

@ -975,6 +975,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
if (setup_uffd(pid, ta))
return -1;
if (arch_shstk_prepare(current, core, ta))
return -1;
return sigreturn_restore(pid, ta, args_len, core);
}

View File

@ -7,4 +7,17 @@
extern int arch_set_thread_regs_nosigrt(struct pid *pid);
struct task_restore_args;
struct pstree_item;
#ifndef arch_shstk_prepare
static inline int arch_shstk_prepare(struct pstree_item *item,
CoreEntry *core,
struct task_restore_args *ta)
{
return 0;
}
#define arch_shstk_prepare arch_shstk_prepare
#endif
#endif

View File

@ -56,6 +56,10 @@ struct restore_posix_timer {
int overrun;
};
#ifndef rst_shstk_info
struct rst_shstk_info {};
#endif
/*
* We should be able to construct fpu sigframe in sigreturn_prep_fpu_frame,
* so the mem_zone.rt_sigframe should be 64-bytes aligned. To make things
@ -119,6 +123,8 @@ struct thread_restore_args {
unsigned int seccomp_filters_n;
bool seccomp_force_tsync;
struct rst_shstk_info shstk;
char comm[TASK_COMM_LEN];
int cg_set;
int cgroupd_sk;
@ -240,6 +246,8 @@ struct task_restore_args {
uid_t uid;
u32 cap_eff[CR_CAP_SIZE];
struct rst_shstk_info shstk;
} __aligned(64);
/*