mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
restorer: shstk: implement shadow stack restore
The restore of a task with shadow stack enabled adds these steps: * switch from the default shadow stack to a temporary shadow stack allocated in the premmaped area * unmap CRIU mappings; nothing changed here, but it's important that CRIU mappings can be removed only after switching to a temporary shadow stack * create shadow stack VMA with map_shadow_stack() * restore shadow stack contents with wrss * switch to "real" shadow stack * lock shadow stack features Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
This commit is contained in:
parent
7dd5830023
commit
a48aa33eaa
@ -118,3 +118,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
|
||||
__NR_map_shadow_stack 453 sys_map_shadow_stack (unsigned long addr, unsigned long size, unsigned int flags)
|
||||
|
@ -177,6 +177,24 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
|
||||
#define USER32_CS 0x23
|
||||
|
||||
/* clang-format off */
|
||||
/*
|
||||
* rst_sigreturn in resorer is noninline call which adds an entry to the
|
||||
* shadow stack above the sigframe token;
|
||||
* if shadow stack is enabled, increment the shadow stack pointer to remove
|
||||
* that entry
|
||||
*/
|
||||
#define ARCH_SHSTK_POP() \
|
||||
asm volatile( \
|
||||
"xor %%rax, %%rax\n" \
|
||||
"rdsspq %%rax\n" \
|
||||
"cmpq $0, %%rax\n" \
|
||||
"jz 1f\n" \
|
||||
"movq $1, %%rax\n" \
|
||||
"incsspq %%rax\n" \
|
||||
"1:\n" \
|
||||
: : \
|
||||
: "rax")
|
||||
|
||||
#define ARCH_RT_SIGRETURN_NATIVE(new_sp) \
|
||||
asm volatile( \
|
||||
"movq %0, %%rax \n" \
|
||||
@ -205,9 +223,10 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
|
||||
|
||||
#define ARCH_RT_SIGRETURN_RST(new_sp, rt_sigframe) \
|
||||
do { \
|
||||
if ((rt_sigframe)->is_native) \
|
||||
if ((rt_sigframe)->is_native) { \
|
||||
ARCH_SHSTK_POP(); \
|
||||
ARCH_RT_SIGRETURN_NATIVE(new_sp); \
|
||||
else \
|
||||
} else \
|
||||
ARCH_RT_SIGRETURN_COMPAT(new_sp); \
|
||||
} while (0)
|
||||
|
||||
|
@ -10,11 +10,11 @@
|
||||
#endif
|
||||
|
||||
/* arch/x86/include/uapi/asm/prctl.h */
|
||||
#define ARCH_SHSTK_ENABLE 0x5001
|
||||
#define ARCH_SHSTK_ENABLE 0x5001
|
||||
#define ARCH_SHSTK_DISABLE 0x5002
|
||||
#define ARCH_SHSTK_LOCK 0x5003
|
||||
#define ARCH_SHSTK_UNLOCK 0x5004
|
||||
#define ARCH_SHSTK_STATUS 0x5005
|
||||
#define ARCH_SHSTK_UNLOCK 0x5004
|
||||
#define ARCH_SHSTK_STATUS 0x5005
|
||||
|
||||
#define ARCH_SHSTK_SHSTK (1ULL << 0)
|
||||
#define ARCH_SHSTK_WRSS (1ULL << 1)
|
||||
@ -66,13 +66,207 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
|
||||
struct task_restore_args *ta);
|
||||
#define arch_shstk_prepare arch_shstk_prepare
|
||||
|
||||
#if 0
|
||||
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
|
||||
#define arch_shstk_unlock arch_shstk_unlock
|
||||
|
||||
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
|
||||
int (*func)(void *arg), void *arg);
|
||||
#define arch_shstk_trampoline arch_shstk_trampoline
|
||||
#endif
|
||||
|
||||
#ifdef CR_NOGLIBC
|
||||
|
||||
#include <compel/plugins/std/syscall.h>
|
||||
#include <compel/cpu.h>
|
||||
#include "vma.h"
|
||||
|
||||
#define SHSTK_BUSY_BIT (1UL << 0) /* BIT(0) */
|
||||
|
||||
static inline int shstk_map(unsigned long addr, unsigned long size)
|
||||
{
|
||||
long shstk = sys_map_shadow_stack(addr, size, SHADOW_STACK_SET_TOKEN);
|
||||
|
||||
if (shstk < 0) {
|
||||
pr_err("Failed to map shadow stack at %lx: %ld\n", addr, shstk);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (shstk != addr) {
|
||||
pr_err("Shadow stack address mismatch: need %lx, got %lx\n", addr, shstk);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_info("Created shadow stack at %lx\n", shstk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
static inline unsigned long get_ssp(void)
|
||||
{
|
||||
unsigned long ssp;
|
||||
|
||||
asm volatile("rdsspq %0" : "=r"(ssp) :: );
|
||||
|
||||
return ssp;
|
||||
}
|
||||
|
||||
static inline void wrssq(unsigned long addr, unsigned long val)
|
||||
{
|
||||
asm volatile("wrssq %1, (%0)" :: "r"(addr), "r"(val) : "memory");
|
||||
}
|
||||
/* clang-format off */
|
||||
|
||||
static always_inline void shstk_switch_ssp(unsigned long new_ssp)
|
||||
{
|
||||
unsigned long old_ssp = get_ssp();
|
||||
|
||||
asm volatile("rstorssp (%0)\n" :: "r"(new_ssp));
|
||||
asm volatile("saveprevssp");
|
||||
|
||||
pr_debug("changed ssp from %lx to %lx\n", old_ssp, new_ssp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable writes to the shadow stack and lock it's disable/enable control
|
||||
*/
|
||||
static inline int shstk_finalize(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
|
||||
if (ret) {
|
||||
pr_err("Failed to disable writes to shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
|
||||
if (ret)
|
||||
pr_err("Failed to lock shadow stack controls\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore contents of the shadow stack and set shadow stack pointer
|
||||
*/
|
||||
static always_inline int shstk_restore(struct rst_shstk_info *cet)
|
||||
{
|
||||
unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr;
|
||||
unsigned long ssp = cet->vma_start + cet->vma_size - 8;
|
||||
unsigned long shstk_top = cet->vma_size / 8 - 1;
|
||||
unsigned long val;
|
||||
long ret;
|
||||
|
||||
if (!(cet->cet & ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
if (shstk_map(cet->vma_start, cet->vma_size))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Switch shadow stack from temporary location to the actual task's
|
||||
* shadow stack VMA
|
||||
*/
|
||||
shstk_switch_ssp(ssp);
|
||||
|
||||
/* restore shadow stack contents */
|
||||
for (; ssp >= cet->ssp; ssp -= 8, shstk_top--)
|
||||
wrssq(ssp, shstk_data[shstk_top]);
|
||||
|
||||
/*
|
||||
* Add tokens for sigreturn frame and for switch of the shadow stack.
|
||||
* The sigreturn token will be checked by the kernel during
|
||||
* processing of sigreturn
|
||||
* The token for stack switch is required by rstorssp and
|
||||
* saveprevssp semantics
|
||||
*/
|
||||
|
||||
/* token for sigreturn frame */
|
||||
val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
|
||||
wrssq(ssp, val);
|
||||
|
||||
/* shadow stack switch token */
|
||||
val = ssp | SHSTK_BUSY_BIT;
|
||||
ssp -= 8;
|
||||
wrssq(ssp, val);
|
||||
|
||||
/* reset shadow stack pointer to the proper location */
|
||||
shstk_switch_ssp(ssp);
|
||||
|
||||
ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to unmap premmaped shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return shstk_finalize();
|
||||
}
|
||||
#define arch_shstk_restore shstk_restore
|
||||
|
||||
/*
|
||||
* Disable shadow stack
|
||||
*/
|
||||
static inline int shstk_disable(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
|
||||
if (ret) {
|
||||
pr_err("Failed to disable writes to shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
|
||||
if (ret) {
|
||||
pr_err("Failed to disable shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
|
||||
if (ret)
|
||||
pr_err("Failed to lock shadow stack controls\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Switch to temporary shadow stack
|
||||
*/
|
||||
static always_inline int shstk_switch_to_restorer(struct rst_shstk_info *cet)
|
||||
{
|
||||
unsigned long ssp;
|
||||
long ret;
|
||||
|
||||
if (!(cet->cet & ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
ret = sys_munmap((void *)cet->tmp_shstk, PAGE_SIZE);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to unmap area for temporary shadow stack\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = shstk_map(cet->tmp_shstk, PAGE_SIZE);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Switch shadow stack from the default created by the kernel to a
|
||||
* temporary shadow stack allocated in the premmaped area
|
||||
*/
|
||||
ssp = cet->tmp_shstk + PAGE_SIZE - 8;
|
||||
shstk_switch_ssp(ssp);
|
||||
|
||||
ret = sys_arch_prctl(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS);
|
||||
if (ret) {
|
||||
pr_err("Failed to enable writes to shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define arch_shstk_switch_to_restorer shstk_switch_to_restorer
|
||||
|
||||
#endif /* CR_NOGLIBC */
|
||||
|
||||
#endif /* __CR_ASM_SHSTK_H__ */
|
||||
|
@ -339,4 +339,20 @@ enum {
|
||||
#define __r_sym(name) restorer_sym##name
|
||||
#define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name))
|
||||
|
||||
#ifndef arch_shstk_switch_to_restorer
|
||||
static inline int arch_shstk_switch_to_restorer(struct rst_shstk_info *shstk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#define arch_shstk_switch_to_restorer arch_shstk_switch_to_restorer
|
||||
#endif
|
||||
|
||||
#ifndef arch_shstk_restore
|
||||
static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#define arch_shstk_restore arch_shstk_restore
|
||||
#endif
|
||||
|
||||
#endif /* __CR_RESTORER_H__ */
|
||||
|
@ -18,6 +18,11 @@ ifeq ($(ARCH),mips)
|
||||
ccflags-y += -mno-abicalls -fno-pic
|
||||
endif
|
||||
|
||||
# -mshstk required for CET instructions
|
||||
ifeq ($(ARCH),x86)
|
||||
ccflags-y += -mshstk
|
||||
endif
|
||||
|
||||
LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S
|
||||
|
||||
restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o
|
||||
|
@ -752,6 +752,10 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
|
||||
goto core_restore_end;
|
||||
}
|
||||
|
||||
/* restore original shadow stack */
|
||||
if (arch_shstk_restore(&args->shstk))
|
||||
goto core_restore_end;
|
||||
|
||||
/* All signals must be handled by thread leader */
|
||||
ksigfillset(&to_block);
|
||||
ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
|
||||
@ -1672,6 +1676,9 @@ __visible long __export_restore_task(struct task_restore_args *args)
|
||||
pr_debug("lazy-pages: uffd %d\n", args->uffd);
|
||||
}
|
||||
|
||||
if (arch_shstk_switch_to_restorer(&args->shstk))
|
||||
goto core_restore_end;
|
||||
|
||||
/*
|
||||
* Park vdso/vvar in a safe place if architecture doesn't support
|
||||
* mapping them with arch_prctl().
|
||||
@ -1723,6 +1730,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
|
||||
if (vma_entry->start > vma_entry->shmid)
|
||||
break;
|
||||
|
||||
/*
|
||||
* shadow stack VMAs cannot be remapped, they must be
|
||||
* recreated with map_shadow_stack system call
|
||||
*/
|
||||
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
|
||||
continue;
|
||||
|
||||
if (vma_remap(vma_entry, args->uffd))
|
||||
goto core_restore_end;
|
||||
}
|
||||
@ -1740,6 +1754,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
|
||||
if (vma_entry->start < vma_entry->shmid)
|
||||
break;
|
||||
|
||||
/*
|
||||
* shadow stack VMAs cannot be remapped, they must be
|
||||
* recreated with map_shadow_stack system call
|
||||
*/
|
||||
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
|
||||
continue;
|
||||
|
||||
if (vma_remap(vma_entry, args->uffd))
|
||||
goto core_restore_end;
|
||||
}
|
||||
@ -2166,6 +2187,14 @@ __visible long __export_restore_task(struct task_restore_args *args)
|
||||
|
||||
futex_set_and_wake(&thread_inprogress, args->nr_threads);
|
||||
|
||||
/*
|
||||
* Shadow stack of the leader can be locked only after all other
|
||||
* threads were cloned, otherwise they may start with read-only
|
||||
* shadow stack.
|
||||
*/
|
||||
if (arch_shstk_restore(&args->shstk))
|
||||
goto core_restore_end;
|
||||
|
||||
restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
|
||||
|
||||
if (ret)
|
||||
|
Loading…
x
Reference in New Issue
Block a user