diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 8bcc3cc50..7489ee0c1 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -122,3 +122,4 @@ pidfd_open 434 434 (pid_t pid, unsigned int flags) openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size) pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +membarrier 283 389 (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl index a0ad0cef4..f844d898d 100644 --- a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl @@ -102,6 +102,7 @@ __NR_kcmp 2 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, u __NR_seccomp 2 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) __NR_memfd_create 2 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 2 sys_userfaultfd (int flags) +__NR_membarrier 3 sys_membarrier (int cmd, unsigned int flags, int cpu_id) __NR_rseq 2 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) __NR_open_tree 4 sys_open_tree (int dirfd, const char *pathname, unsigned int flags) __NR_move_mount 4 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl index 505ec849d..9f50d5e8a 100644 --- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl @@ -119,3 +119,4 @@ __NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags) __NR_openat2 5437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) __NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +__NR_membarrier 5318 sys_membarrier (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index af40d7104..4c9b75cf1 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -118,3 +118,4 @@ __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) __NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +__NR_membarrier 365 sys_membarrier (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index 6a349e1cb..af7d550e2 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -118,3 +118,4 @@ __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) __NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +__NR_membarrier 356 sys_membarrier (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index a119a59b2..ab36a5cd6 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -106,3 +106,4 @@ __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) __NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +__NR_membarrier 375 sys_membarrier (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 16dd86e79..57681b79a 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -117,3 +117,4 @@ __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) __NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) +__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 340fb96ec..ee5974acc 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -770,6 +770,11 @@ static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item core->tc->child_subreaper = misc->child_subreaper; core->tc->has_child_subreaper = true; + if (misc->membarrier_registration_mask) { + core->tc->membarrier_registration_mask = misc->membarrier_registration_mask; + core->tc->has_membarrier_registration_mask = true; + } + ret = get_task_personality(pid, &core->tc->personality); if (ret < 0) goto err; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 9107a2322..270049721 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -863,6 +863,9 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc, struct task_restore_a if (tc->has_child_subreaper) args->child_subreaper = tc->child_subreaper; + if (tc->has_membarrier_registration_mask) + args->membarrier_registration_mask = tc->membarrier_registration_mask; + /* loginuid value is critical to restore */ if (kdat.luid == LUID_FULL && tc->has_loginuid && tc->loginuid != INVALID_UID) { ret = prepare_loginuid(tc->loginuid); diff --git a/criu/include/parasite.h b/criu/include/parasite.h index 739fbf2c3..5209b6da2 100644 --- a/criu/include/parasite.h +++ b/criu/include/parasite.h @@ -128,6 +128,7 @@ struct parasite_dump_misc { int dumpable; int thp_disabled; int child_subreaper; + int membarrier_registration_mask; }; /* diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 2475ee0bc..f398d8d8f 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -229,6 +229,7 @@ struct task_restore_args { #endif int lsm_type; int child_subreaper; + int membarrier_registration_mask; bool has_clone3_set_tid; /* diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 58ea35892..c0604903b 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -211,6 +211,42 @@ out: return ret; } +/* + * Returns a membarrier() registration command (it is a bitmask) if the process + * was registered for specified (as a bit index) membarrier()-issuing command; + * returns zero otherwise. + */ +static int get_membarrier_registration_mask(int cmd_bit) +{ + unsigned cmd = 1 << cmd_bit; + int ret; + + /* + * Issuing a barrier will be successful only if the process was registered + * for this type of membarrier. All errors are a sign that the type issued + * was not registered (EPERM) or not supported by kernel (EINVAL or ENOSYS). + */ + ret = sys_membarrier(cmd, 0, 0); + if (ret && ret != -EPERM && ret != -EINVAL && ret != -ENOSYS) { + pr_err("membarrier(1 << %d) returned %d\n", cmd_bit, ret); + return -1; + } + pr_debug("membarrier(1 << %d) returned %d\n", cmd_bit, ret); + /* + * For supported registrations, MEMBARRIER_CMD_REGISTER_xxx = MEMBARRIER_CMD_xxx << 1. + * See: enum membarrier_cmd in include/uapi/linux/membarrier.h in kernel sources. + */ + return ret ? 0 : cmd << 1; +} + +/* + * It would be better to check the following with BUILD_BUG_ON, but we might + * have an old linux/membarrier.h header without necessary enum values. + */ +#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED 3 +#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_SYNC_CORE 5 +#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_RSEQ 7 + static int dump_misc(struct parasite_dump_misc *args) { int ret; @@ -225,6 +261,20 @@ static int dump_misc(struct parasite_dump_misc *args) args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0); args->thp_disabled = sys_prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); + args->membarrier_registration_mask = 0; + ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED); + if (ret < 0) + return -1; + args->membarrier_registration_mask |= ret; + ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_SYNC_CORE); + if (ret < 0) + return -1; + args->membarrier_registration_mask |= ret; + ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_RSEQ); + if (ret < 0) + return -1; + args->membarrier_registration_mask |= ret; + ret = sys_prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&args->child_subreaper, 0, 0, 0); if (ret) pr_err("PR_GET_CHILD_SUBREAPER failed (%d)\n", ret); diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index d4f77bfde..bbee0f6fb 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1537,6 +1537,30 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args) return 0; } +/* + * Restore membarrier() registrations. + */ +static int restore_membarrier_registrations(int mask) +{ + unsigned long bitmap[1] = { mask }; + int i, err, ret = 0; + + if (!mask) + return 0; + + pr_info("Restoring membarrier() registrations %x\n", mask); + + for_each_bit(i, bitmap) { + err = sys_membarrier(1 << i, 0, 0); + if (!err) + continue; + pr_err("Can't restore membarrier(1 << %d) registration: %d\n", i, err); + ret = -1; + } + + return ret; +} + /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -2023,6 +2047,9 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } + if (restore_membarrier_registrations(args->membarrier_registration_mask) < 0) + goto core_restore_end; + pr_info("%ld: Restored\n", sys_getpid()); restore_finish_stage(task_entries_local, CR_STATE_RESTORE); diff --git a/images/core.proto b/images/core.proto index 1882fe8e4..5b07b5c44 100644 --- a/images/core.proto +++ b/images/core.proto @@ -64,6 +64,8 @@ message task_core_entry { optional uint64 blk_sigset_extended = 20[(criu).hex = true]; optional uint32 stop_signo = 21; + + optional uint32 membarrier_registration_mask = 22 [(criu).hex = true]; } message task_kobj_ids_entry {