2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 01:51:51 +00:00

restorer: shstk: implement shadow stack restore

The restore of a task with shadow stack enabled adds these steps:

* switch from the default shadow stack to a temporary shadow stack
  allocated in the premmaped area
* unmap CRIU mappings; nothing changed here, but it's important that
  CRIU mappings can be removed only after switching to a temporary
  shadow stack
* create shadow stack VMA with map_shadow_stack()
* restore shadow stack contents with wrss
* switch to "real" shadow stack
* lock shadow stack features

Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
This commit is contained in:
Mike Rapoport (IBM) 2022-05-25 12:30:04 +03:00 committed by Andrei Vagin
parent 7dd5830023
commit a48aa33eaa
6 changed files with 271 additions and 7 deletions

View File

@ -118,3 +118,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_map_shadow_stack 453 sys_map_shadow_stack (unsigned long addr, unsigned long size, unsigned int flags)

View File

@ -177,6 +177,24 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
#define USER32_CS 0x23
/* clang-format off */
/*
* rst_sigreturn in resorer is noninline call which adds an entry to the
* shadow stack above the sigframe token;
* if shadow stack is enabled, increment the shadow stack pointer to remove
* that entry
*/
#define ARCH_SHSTK_POP() \
asm volatile( \
"xor %%rax, %%rax\n" \
"rdsspq %%rax\n" \
"cmpq $0, %%rax\n" \
"jz 1f\n" \
"movq $1, %%rax\n" \
"incsspq %%rax\n" \
"1:\n" \
: : \
: "rax")
#define ARCH_RT_SIGRETURN_NATIVE(new_sp) \
asm volatile( \
"movq %0, %%rax \n" \
@ -205,9 +223,10 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
#define ARCH_RT_SIGRETURN_RST(new_sp, rt_sigframe) \
do { \
if ((rt_sigframe)->is_native) \
if ((rt_sigframe)->is_native) { \
ARCH_SHSTK_POP(); \
ARCH_RT_SIGRETURN_NATIVE(new_sp); \
else \
} else \
ARCH_RT_SIGRETURN_COMPAT(new_sp); \
} while (0)

View File

@ -66,13 +66,207 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_shstk_prepare
#if 0
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
#define arch_shstk_unlock arch_shstk_unlock
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
#endif
#ifdef CR_NOGLIBC
#include <compel/plugins/std/syscall.h>
#include <compel/cpu.h>
#include "vma.h"
#define SHSTK_BUSY_BIT (1UL << 0) /* BIT(0) */
static inline int shstk_map(unsigned long addr, unsigned long size)
{
long shstk = sys_map_shadow_stack(addr, size, SHADOW_STACK_SET_TOKEN);
if (shstk < 0) {
pr_err("Failed to map shadow stack at %lx: %ld\n", addr, shstk);
return -1;
}
if (shstk != addr) {
pr_err("Shadow stack address mismatch: need %lx, got %lx\n", addr, shstk);
return -1;
}
pr_info("Created shadow stack at %lx\n", shstk);
return 0;
}
/* clang-format off */
static inline unsigned long get_ssp(void)
{
unsigned long ssp;
asm volatile("rdsspq %0" : "=r"(ssp) :: );
return ssp;
}
static inline void wrssq(unsigned long addr, unsigned long val)
{
asm volatile("wrssq %1, (%0)" :: "r"(addr), "r"(val) : "memory");
}
/* clang-format off */
static always_inline void shstk_switch_ssp(unsigned long new_ssp)
{
unsigned long old_ssp = get_ssp();
asm volatile("rstorssp (%0)\n" :: "r"(new_ssp));
asm volatile("saveprevssp");
pr_debug("changed ssp from %lx to %lx\n", old_ssp, new_ssp);
}
/*
* Disable writes to the shadow stack and lock it's disable/enable control
*/
static inline int shstk_finalize(void)
{
int ret = 0;
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to disable writes to shadow stack\n");
return ret;
}
ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
if (ret)
pr_err("Failed to lock shadow stack controls\n");
return ret;
}
/*
* Restore contents of the shadow stack and set shadow stack pointer
*/
static always_inline int shstk_restore(struct rst_shstk_info *cet)
{
unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr;
unsigned long ssp = cet->vma_start + cet->vma_size - 8;
unsigned long shstk_top = cet->vma_size / 8 - 1;
unsigned long val;
long ret;
if (!(cet->cet & ARCH_SHSTK_SHSTK))
return 0;
if (shstk_map(cet->vma_start, cet->vma_size))
return -1;
/*
* Switch shadow stack from temporary location to the actual task's
* shadow stack VMA
*/
shstk_switch_ssp(ssp);
/* restore shadow stack contents */
for (; ssp >= cet->ssp; ssp -= 8, shstk_top--)
wrssq(ssp, shstk_data[shstk_top]);
/*
* Add tokens for sigreturn frame and for switch of the shadow stack.
* The sigreturn token will be checked by the kernel during
* processing of sigreturn
* The token for stack switch is required by rstorssp and
* saveprevssp semantics
*/
/* token for sigreturn frame */
val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
wrssq(ssp, val);
/* shadow stack switch token */
val = ssp | SHSTK_BUSY_BIT;
ssp -= 8;
wrssq(ssp, val);
/* reset shadow stack pointer to the proper location */
shstk_switch_ssp(ssp);
ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}
return shstk_finalize();
}
#define arch_shstk_restore shstk_restore
/*
* Disable shadow stack
*/
static inline int shstk_disable(void)
{
int ret;
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to disable writes to shadow stack\n");
return ret;
}
ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
if (ret) {
pr_err("Failed to disable shadow stack\n");
return ret;
}
ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
if (ret)
pr_err("Failed to lock shadow stack controls\n");
return 0;
}
/*
* Switch to temporary shadow stack
*/
static always_inline int shstk_switch_to_restorer(struct rst_shstk_info *cet)
{
unsigned long ssp;
long ret;
if (!(cet->cet & ARCH_SHSTK_SHSTK))
return 0;
ret = sys_munmap((void *)cet->tmp_shstk, PAGE_SIZE);
if (ret < 0) {
pr_err("Failed to unmap area for temporary shadow stack\n");
return -1;
}
ret = shstk_map(cet->tmp_shstk, PAGE_SIZE);
if (ret < 0)
return -1;
/*
* Switch shadow stack from the default created by the kernel to a
* temporary shadow stack allocated in the premmaped area
*/
ssp = cet->tmp_shstk + PAGE_SIZE - 8;
shstk_switch_ssp(ssp);
ret = sys_arch_prctl(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to enable writes to shadow stack\n");
return ret;
}
return 0;
}
#define arch_shstk_switch_to_restorer shstk_switch_to_restorer
#endif /* CR_NOGLIBC */
#endif /* __CR_ASM_SHSTK_H__ */

View File

@ -339,4 +339,20 @@ enum {
#define __r_sym(name) restorer_sym##name
#define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name))
#ifndef arch_shstk_switch_to_restorer
static inline int arch_shstk_switch_to_restorer(struct rst_shstk_info *shstk)
{
return 0;
}
#define arch_shstk_switch_to_restorer arch_shstk_switch_to_restorer
#endif
#ifndef arch_shstk_restore
static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
{
return 0;
}
#define arch_shstk_restore arch_shstk_restore
#endif
#endif /* __CR_RESTORER_H__ */

View File

@ -18,6 +18,11 @@ ifeq ($(ARCH),mips)
ccflags-y += -mno-abicalls -fno-pic
endif
# -mshstk required for CET instructions
ifeq ($(ARCH),x86)
ccflags-y += -mshstk
endif
LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S
restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o

View File

@ -752,6 +752,10 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
goto core_restore_end;
}
/* restore original shadow stack */
if (arch_shstk_restore(&args->shstk))
goto core_restore_end;
/* All signals must be handled by thread leader */
ksigfillset(&to_block);
ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
@ -1672,6 +1676,9 @@ __visible long __export_restore_task(struct task_restore_args *args)
pr_debug("lazy-pages: uffd %d\n", args->uffd);
}
if (arch_shstk_switch_to_restorer(&args->shstk))
goto core_restore_end;
/*
* Park vdso/vvar in a safe place if architecture doesn't support
* mapping them with arch_prctl().
@ -1723,6 +1730,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start > vma_entry->shmid)
break;
/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;
if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}
@ -1740,6 +1754,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start < vma_entry->shmid)
break;
/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;
if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}
@ -2166,6 +2187,14 @@ __visible long __export_restore_task(struct task_restore_args *args)
futex_set_and_wake(&thread_inprogress, args->nr_threads);
/*
* Shadow stack of the leader can be locked only after all other
* threads were cloned, otherwise they may start with read-only
* shadow stack.
*/
if (arch_shstk_restore(&args->shstk))
goto core_restore_end;
restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
if (ret)