2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 09:58:09 +00:00

non-root: enable non-root checkpoint/restore

This commit enables checkpointing and restoring of applications as
non-root.

First goal was to enable checkpoint and restore of the env00 and
pthread00 test case.

This uses the information from opts.unprivileged and opts.cap_eff to
skip certain code paths which do not work as non-root.

Co-authored-by: Adrian Reber <areber@redhat.com>
Signed-off-by: Younes Manton <ymanton@ca.ibm.com>
This commit is contained in:
Younes Manton 2022-08-12 11:56:53 -07:00 committed by Andrei Vagin
parent ce01f70d94
commit 6a30c7d1ed
18 changed files with 194 additions and 53 deletions

View File

@ -734,6 +734,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id, struct parasite_dump_
unsigned int n_ctls = 0; unsigned int n_ctls = 0;
struct cg_set *cs; struct cg_set *cs;
if (opts.unprivileged)
return 0;
if (item) if (item)
pid = item->pid->real; pid = item->pid->real;
else else
@ -989,6 +992,9 @@ int dump_cgroups(void)
CgroupEntry cg = CGROUP_ENTRY__INIT; CgroupEntry cg = CGROUP_ENTRY__INIT;
int ret = -1; int ret = -1;
if (opts.unprivileged)
return 0;
BUG_ON(!criu_cgset || !root_cgset); BUG_ON(!criu_cgset || !root_cgset);
/* /*

View File

@ -700,6 +700,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
{ "lsm-mount-context", required_argument, 0, 1099 }, { "lsm-mount-context", required_argument, 0, 1099 },
{ "network-lock", required_argument, 0, 1100 }, { "network-lock", required_argument, 0, 1100 },
BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode), BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
BOOL_OPT("unprivileged", &opts.unprivileged),
{}, {},
}; };

View File

@ -21,6 +21,7 @@
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sched.h> #include <sched.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/utsname.h>
#include "../soccr/soccr.h" #include "../soccr/soccr.h"
@ -515,6 +516,14 @@ static int check_ipc(void)
{ {
int ret; int ret;
/*
* Since kernel 5.16 sem_next_id can be accessed via CAP_CHECKPOINT_RESTORE, however
* for non-root users access() runs with an empty set of caps and will therefore always
* fail.
*/
if (opts.uid)
return 0;
ret = access("/proc/sys/kernel/sem_next_id", R_OK | W_OK); ret = access("/proc/sys/kernel/sem_next_id", R_OK | W_OK);
if (!ret) if (!ret)
return 0; return 0;
@ -1039,10 +1048,14 @@ static int check_tcp(void)
} }
val = 1; val = 1;
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)); if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
if (ret < 0) { ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
pr_perror("Can't turn TCP repair mode ON"); if (ret < 0) {
goto out; pr_perror("Can't turn TCP repair mode ON");
goto out;
}
} else {
pr_info("Not checking for TCP repair mode. Please set CAP_NET_ADMIN\n");
} }
optlen = sizeof(val); optlen = sizeof(val);
@ -1394,9 +1407,6 @@ int cr_check(void)
struct ns_id *ns; struct ns_id *ns;
int ret = 0; int ret = 0;
if (!is_root_user())
return -1;
root_item = alloc_pstree_item(); root_item = alloc_pstree_item();
if (root_item == NULL) if (root_item == NULL)
return -1; return -1;
@ -1666,36 +1676,43 @@ static int pr_set_dumpable(int value)
int check_caps(void) int check_caps(void)
{ {
struct proc_status_creds creds; /* Read out effective capabilities and store in opts.cap_eff. */
int exit_code = -1; if (set_opts_cap_eff())
if (parse_pid_status(PROC_SELF, &creds.s, NULL))
goto out; goto out;
memcpy(&opts.cap_eff, &creds.cap_eff, sizeof(u32) * PROC_CAP_SIZE); /*
* No matter if running as root or not. CRIU always needs
* at least these capabilities.
*/
if (!has_cap_checkpoint_restore(opts.cap_eff)) if (!has_cap_checkpoint_restore(opts.cap_eff))
goto out; goto out;
/* For some things we need to know if we are running as root. */ /* For some things we need to know if we are running as root. */
opts.uid = geteuid(); opts.uid = geteuid();
if (opts.uid) { if (!opts.uid) {
/* /* CRIU is running as root. No further checks are necessary. */
* At his point we know we are running as non-root with the necessary return 0;
* capabilities available. Now we have to make the process dumpable
* so that /proc/self is not owned by root.
*/
if (pr_set_dumpable(1))
return -1;
} }
exit_code = 0; if (!opts.unprivileged) {
pr_msg("Running as non-root requires '--unprivileged'\n");
pr_msg("Please consult the documentation for limitations when running as non-root\n");
return -1;
}
/*
* At his point we know we are running as non-root with the necessary
* capabilities available. Now we have to make the process dumpable
* so that /proc/self is not owned by root.
*/
if (pr_set_dumpable(1))
return -1;
return 0;
out: out:
if (exit_code) { pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n"); pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
}
return exit_code; return -1;
} }

View File

@ -1809,6 +1809,9 @@ static int restore_task_with_children(void *_arg)
goto err; goto err;
} }
if (set_opts_cap_eff())
goto err;
/* Wait prepare_userns */ /* Wait prepare_userns */
if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0) if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0)
goto err; goto err;

View File

@ -14,6 +14,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sched.h> #include <sched.h>
#include <sys/prctl.h>
#include "version.h" #include "version.h"
#include "crtools.h" #include "crtools.h"
@ -409,6 +410,12 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file); pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
} }
if (req->has_unprivileged)
opts.unprivileged = req->unprivileged;
if (check_caps())
return 1;
if (kerndat_init()) if (kerndat_init())
return 1; return 1;

View File

@ -185,6 +185,9 @@ int main(int argc, char *argv[], char *envp[])
return cr_service_work(atoi(argv[optind + 1])); return cr_service_work(atoi(argv[optind + 1]));
} }
if (check_caps())
return 1;
if (opts.imgs_dir == NULL) if (opts.imgs_dir == NULL)
SET_CHAR_OPTS(imgs_dir, "."); SET_CHAR_OPTS(imgs_dir, ".");
@ -414,6 +417,8 @@ usage:
" --network-lock METHOD\n" " --network-lock METHOD\n"
" network locking/unlocking method; argument\n" " network locking/unlocking method; argument\n"
" can be 'nftables' or 'iptables' (default).\n" " can be 'nftables' or 'iptables' (default).\n"
" --unprivileged accept limitations when running as non-root\n"
" consult documentation for further details\n"
"\n" "\n"
"* External resources support:\n" "* External resources support:\n"
" --external RES dump objects from this list as external resources:\n" " --external RES dump objects from this list as external resources:\n"

View File

@ -13,6 +13,8 @@
#include "rst-malloc.h" #include "rst-malloc.h"
#include "log.h" #include "log.h"
#include "util.h" #include "util.h"
#include "cr_options.h"
#include "util-caps.h"
/* clang-format off */ /* clang-format off */
static struct fdstore_desc { static struct fdstore_desc {
@ -27,6 +29,8 @@ int fdstore_init(void)
uint32_t buf[2] = { INT_MAX / 2, INT_MAX / 2 }; uint32_t buf[2] = { INT_MAX / 2, INT_MAX / 2 };
struct sockaddr_un addr; struct sockaddr_un addr;
unsigned int addrlen; unsigned int addrlen;
int rcv_opt_name;
int snd_opt_name;
struct stat st; struct stat st;
int sk, ret; int sk, ret;
@ -49,8 +53,16 @@ int fdstore_init(void)
return -1; return -1;
} }
if (setsockopt(sk, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0], sizeof(buf[0])) < 0 || if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
setsockopt(sk, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1], sizeof(buf[1])) < 0) { rcv_opt_name = SO_RCVBUFFORCE;
snd_opt_name = SO_SNDBUFFORCE;
} else {
rcv_opt_name = SO_RCVBUF;
snd_opt_name = SO_SNDBUF;
}
if (setsockopt(sk, SOL_SOCKET, snd_opt_name, &buf[0], sizeof(buf[0])) < 0 ||
setsockopt(sk, SOL_SOCKET, rcv_opt_name, &buf[1], sizeof(buf[1])) < 0) {
pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE"); pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE");
close(sk); close(sk);
return -1; return -1;

View File

@ -21,7 +21,7 @@
#include "image.h" #include "image.h"
#include "common/list.h" #include "common/list.h"
#include "rst-malloc.h" #include "rst-malloc.h"
#include "util-pie.h" #include "util-caps.h"
#include "common/lock.h" #include "common/lock.h"
#include "sockets.h" #include "sockets.h"
#include "pstree.h" #include "pstree.h"
@ -1346,10 +1346,35 @@ static int fchroot(int fd)
return chroot("."); return chroot(".");
} }
static int need_chroot(int saved_root)
{
struct stat saved_root_stat, cur_root_stat;
int psd;
if (fstat(saved_root, &saved_root_stat) == -1) {
pr_perror("Failed to stat saved root dir");
return -1;
}
psd = open_pid_proc(PROC_SELF);
if (psd < 0) {
pr_perror("Failed to open PROC_SELF");
return -1;
}
if (fstatat(psd, "root", &cur_root_stat, 0) == -1) {
pr_perror("Failed to stat current root dir");
return -1;
}
return saved_root_stat.st_ino != cur_root_stat.st_ino || saved_root_stat.st_dev != cur_root_stat.st_dev;
}
int restore_fs(struct pstree_item *me) int restore_fs(struct pstree_item *me)
{ {
int dd_root = -1, dd_cwd = -1, ret, err = -1; int dd_root = -1, dd_cwd = -1, ret, err = -1;
struct rst_info *ri = rsti(me); struct rst_info *ri = rsti(me);
bool do_chroot = true;
/* /*
* First -- open both descriptors. We will not * First -- open both descriptors. We will not
@ -1368,15 +1393,24 @@ int restore_fs(struct pstree_item *me)
goto out; goto out;
} }
/*
* In unprivileged mode chroot() may fail if we don't have
* sufficient privileges, therefore only do it if the process
* is actually chrooted.
*/
if (opts.unprivileged)
do_chroot = need_chroot(dd_root);
/* /*
* Now do chroot/chdir. Chroot goes first as it calls chdir into * Now do chroot/chdir. Chroot goes first as it calls chdir into
* dd_root so we'd need to fix chdir after it anyway. * dd_root so we'd need to fix chdir after it anyway.
*/ */
if (do_chroot) {
ret = fchroot(dd_root); ret = fchroot(dd_root);
if (ret < 0) { if (ret < 0) {
pr_perror("Can't change root"); pr_perror("Can't change root");
goto out; goto out;
}
} }
ret = fchdir(dd_cwd); ret = fchdir(dd_cwd);

View File

@ -226,7 +226,8 @@ int prepare_inventory(InventoryEntry *he)
if (get_task_ids(&crt.i)) if (get_task_ids(&crt.i))
return -1; return -1;
he->has_root_cg_set = true; if (!opts.unprivileged)
he->has_root_cg_set = true;
if (dump_task_cgroup(NULL, &he->root_cg_set, NULL)) if (dump_task_cgroup(NULL, &he->root_cg_set, NULL))
return -1; return -1;

View File

@ -2,6 +2,7 @@
#define __CR_OPTIONS_H__ #define __CR_OPTIONS_H__
#include <stdbool.h> #include <stdbool.h>
#include <sys/capability.h>
#include "common/config.h" #include "common/config.h"
#include "common/list.h" #include "common/list.h"
#include "int.h" #include "int.h"
@ -223,8 +224,14 @@ struct cr_options {
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN * CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN
*/ */
uid_t uid; uid_t uid;
/* This contains the value from /proc/pid/status: CapEff */ /* This contains the value from capget()->effective */
u32 cap_eff[CR_CAP_SIZE]; u32 cap_eff[_LINUX_CAPABILITY_U32S_3];
/*
* If CRIU should be running as non-root with the help of
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN the user should
* explicitly request it as it comes with many limitations.
*/
int unprivileged;
}; };
extern struct cr_options opts; extern struct cr_options opts;

View File

@ -386,6 +386,8 @@ extern int mount_detached_fs(const char *fsname);
extern char *get_legacy_iptables_bin(bool ipv6); extern char *get_legacy_iptables_bin(bool ipv6);
extern int set_opts_cap_eff(void);
extern ssize_t read_all(int fd, void *buf, size_t size); extern ssize_t read_all(int fd, void *buf, size_t size);
extern ssize_t write_all(int fd, const void *buf, size_t size); extern ssize_t write_all(int fd, const void *buf, size_t size);

View File

@ -28,6 +28,7 @@
#include "cgroup.h" #include "cgroup.h"
#include "fdstore.h" #include "fdstore.h"
#include "kerndat.h" #include "kerndat.h"
#include "util-caps.h"
#include "protobuf.h" #include "protobuf.h"
#include "util.h" #include "util.h"
@ -1623,10 +1624,12 @@ int collect_namespaces(bool for_dump)
int prepare_userns_creds(void) int prepare_userns_creds(void)
{ {
/* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */ if (!opts.unprivileged || has_cap_setuid(opts.cap_eff)) {
if (setuid(0) || setgid(0) || setgroups(0, NULL)) { /* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */
pr_perror("Unable to initialize id-s"); if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
return -1; pr_perror("Unable to initialize id-s");
return -1;
}
} }
/* /*

View File

@ -184,7 +184,7 @@ static int lsm_set_label(char *label, char *type, int procfd)
return 0; return 0;
} }
static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type) static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type, uid_t uid)
{ {
CredsEntry *ce = &args->creds; CredsEntry *ce = &args->creds;
int b, i, ret; int b, i, ret;
@ -211,10 +211,12 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
* lose caps bits when changing xids. * lose caps bits when changing xids.
*/ */
ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0); if (!uid) {
if (ret) { ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret); if (ret) {
return -1; pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret);
return -1;
}
} }
/* /*
@ -252,10 +254,12 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
* special state any longer. * special state any longer.
*/ */
ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0); if (!uid) {
if (ret) { ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret); if (ret) {
return -1; pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret);
return -1;
}
} }
/* /*
@ -634,7 +638,7 @@ long __export_restore_thread(struct thread_restore_args *args)
if (restore_seccomp(args)) if (restore_seccomp(args))
BUG(); BUG();
ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type); ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type, args->ta->uid);
ret = ret || restore_dumpable_flag(&args->ta->mm); ret = ret || restore_dumpable_flag(&args->ta->mm);
ret = ret || restore_pdeath_sig(args); ret = ret || restore_pdeath_sig(args);
if (ret) if (ret)
@ -1915,7 +1919,7 @@ long __export_restore_task(struct task_restore_args *args)
* turning off TCP repair is CAP_SYS_NED_ADMIN protected, * turning off TCP repair is CAP_SYS_NED_ADMIN protected,
* thus restore* creds _after_ all of the above. * thus restore* creds _after_ all of the above.
*/ */
ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type); ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type, args->uid);
ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_dumpable_flag(&args->mm);
ret = ret || restore_pdeath_sig(args->t); ret = ret || restore_pdeath_sig(args->t);
ret = ret || restore_child_subreaper(args->child_subreaper); ret = ret || restore_child_subreaper(args->child_subreaper);

View File

@ -5,6 +5,7 @@
#include "proc_parse.h" #include "proc_parse.h"
#include "namespaces.h" #include "namespaces.h"
#include "timens.h" #include "timens.h"
#include "cr_options.h"
#include "protobuf.h" #include "protobuf.h"
#include "images/timens.pb-c.h" #include "images/timens.pb-c.h"
@ -57,6 +58,9 @@ int prepare_timens(int id)
struct timespec ts; struct timespec ts;
struct timespec prev_moff = {}, prev_boff = {}; struct timespec prev_moff = {}, prev_boff = {};
if (opts.unprivileged)
return 0;
img = open_image(CR_FD_TIMENS, O_RSTR, id); img = open_image(CR_FD_TIMENS, O_RSTR, id);
if (!img) if (!img)
return -1; return -1;

View File

@ -41,6 +41,7 @@
#include "namespaces.h" #include "namespaces.h"
#include "criu-log.h" #include "criu-log.h"
#include "syscall.h" #include "syscall.h"
#include "util-caps.h"
#include "clone-noasan.h" #include "clone-noasan.h"
#include "cr_options.h" #include "cr_options.h"
@ -1426,6 +1427,9 @@ void rlimit_unlimit_nofile(void)
{ {
struct rlimit new; struct rlimit new;
if (opts.unprivileged && !has_cap_sys_resource(opts.cap_eff))
return;
new.rlim_cur = kdat.sysctl_nr_open; new.rlim_cur = kdat.sysctl_nr_open;
new.rlim_max = kdat.sysctl_nr_open; new.rlim_max = kdat.sysctl_nr_open;
@ -2064,3 +2068,21 @@ out:
xfree(free_path); xfree(free_path);
return mp_path; return mp_path;
} }
int set_opts_cap_eff(void)
{
struct __user_cap_header_struct cap_header;
struct __user_cap_data_struct cap_data[_LINUX_CAPABILITY_U32S_3];
int i;
cap_header.version = _LINUX_CAPABILITY_VERSION_3;
cap_header.pid = getpid();
if (capget(&cap_header, &cap_data[0]))
return -1;
for (i = 0; i < _LINUX_CAPABILITY_U32S_3; i++)
memcpy(&opts.cap_eff[i], &cap_data[i].effective, sizeof(u32));
return 0;
}

View File

@ -139,6 +139,7 @@ message criu_opts {
optional criu_network_lock_method network_lock = 64 [default = IPTABLES]; optional criu_network_lock_method network_lock = 64 [default = IPTABLES];
optional bool mntns_compat_mode = 65; optional bool mntns_compat_mode = 65;
optional bool skip_file_rwx_check = 66; optional bool skip_file_rwx_check = 66;
optional bool unprivileged = 67;
/* optional bool check_mounts = 128; */ /* optional bool check_mounts = 128; */
} }

View File

@ -566,6 +566,17 @@ void criu_set_skip_file_rwx_check(bool skip_file_rwx_check)
criu_local_set_skip_file_rwx_check(global_opts, skip_file_rwx_check); criu_local_set_skip_file_rwx_check(global_opts, skip_file_rwx_check);
} }
void criu_local_set_unprivileged(criu_opts *opts, bool unprivileged)
{
opts->rpc->has_unprivileged = true;
opts->rpc->unprivileged = unprivileged;
}
void criu_set_unprivileged(bool unprivileged)
{
criu_local_set_unprivileged(global_opts, unprivileged);
}
void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master) void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master)
{ {
opts->rpc->has_orphan_pts_master = true; opts->rpc->has_orphan_pts_master = true;

View File

@ -79,6 +79,7 @@ void criu_set_weak_sysctls(bool val);
void criu_set_evasive_devices(bool evasive_devices); void criu_set_evasive_devices(bool evasive_devices);
void criu_set_shell_job(bool shell_job); void criu_set_shell_job(bool shell_job);
void criu_set_skip_file_rwx_check(bool skip_file_rwx_check); void criu_set_skip_file_rwx_check(bool skip_file_rwx_check);
void criu_set_unprivileged(bool unprivileged);
void criu_set_orphan_pts_master(bool orphan_pts_master); void criu_set_orphan_pts_master(bool orphan_pts_master);
void criu_set_file_locks(bool file_locks); void criu_set_file_locks(bool file_locks);
void criu_set_track_mem(bool track_mem); void criu_set_track_mem(bool track_mem);