mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
non-root: enable non-root checkpoint/restore
This commit enables checkpointing and restoring of applications as non-root. First goal was to enable checkpoint and restore of the env00 and pthread00 test case. This uses the information from opts.unprivileged and opts.cap_eff to skip certain code paths which do not work as non-root. Co-authored-by: Adrian Reber <areber@redhat.com> Signed-off-by: Younes Manton <ymanton@ca.ibm.com>
This commit is contained in:
parent
ce01f70d94
commit
6a30c7d1ed
@ -734,6 +734,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id, struct parasite_dump_
|
||||
unsigned int n_ctls = 0;
|
||||
struct cg_set *cs;
|
||||
|
||||
if (opts.unprivileged)
|
||||
return 0;
|
||||
|
||||
if (item)
|
||||
pid = item->pid->real;
|
||||
else
|
||||
@ -989,6 +992,9 @@ int dump_cgroups(void)
|
||||
CgroupEntry cg = CGROUP_ENTRY__INIT;
|
||||
int ret = -1;
|
||||
|
||||
if (opts.unprivileged)
|
||||
return 0;
|
||||
|
||||
BUG_ON(!criu_cgset || !root_cgset);
|
||||
|
||||
/*
|
||||
|
@ -700,6 +700,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "lsm-mount-context", required_argument, 0, 1099 },
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
|
||||
BOOL_OPT("unprivileged", &opts.unprivileged),
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <sys/prctl.h>
|
||||
#include <sched.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#include "../soccr/soccr.h"
|
||||
|
||||
@ -515,6 +516,14 @@ static int check_ipc(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Since kernel 5.16 sem_next_id can be accessed via CAP_CHECKPOINT_RESTORE, however
|
||||
* for non-root users access() runs with an empty set of caps and will therefore always
|
||||
* fail.
|
||||
*/
|
||||
if (opts.uid)
|
||||
return 0;
|
||||
|
||||
ret = access("/proc/sys/kernel/sem_next_id", R_OK | W_OK);
|
||||
if (!ret)
|
||||
return 0;
|
||||
@ -1039,10 +1048,14 @@ static int check_tcp(void)
|
||||
}
|
||||
|
||||
val = 1;
|
||||
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't turn TCP repair mode ON");
|
||||
goto out;
|
||||
if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
|
||||
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't turn TCP repair mode ON");
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
pr_info("Not checking for TCP repair mode. Please set CAP_NET_ADMIN\n");
|
||||
}
|
||||
|
||||
optlen = sizeof(val);
|
||||
@ -1394,9 +1407,6 @@ int cr_check(void)
|
||||
struct ns_id *ns;
|
||||
int ret = 0;
|
||||
|
||||
if (!is_root_user())
|
||||
return -1;
|
||||
|
||||
root_item = alloc_pstree_item();
|
||||
if (root_item == NULL)
|
||||
return -1;
|
||||
@ -1666,36 +1676,43 @@ static int pr_set_dumpable(int value)
|
||||
|
||||
int check_caps(void)
|
||||
{
|
||||
struct proc_status_creds creds;
|
||||
int exit_code = -1;
|
||||
|
||||
if (parse_pid_status(PROC_SELF, &creds.s, NULL))
|
||||
/* Read out effective capabilities and store in opts.cap_eff. */
|
||||
if (set_opts_cap_eff())
|
||||
goto out;
|
||||
|
||||
memcpy(&opts.cap_eff, &creds.cap_eff, sizeof(u32) * PROC_CAP_SIZE);
|
||||
|
||||
/*
|
||||
* No matter if running as root or not. CRIU always needs
|
||||
* at least these capabilities.
|
||||
*/
|
||||
if (!has_cap_checkpoint_restore(opts.cap_eff))
|
||||
goto out;
|
||||
|
||||
/* For some things we need to know if we are running as root. */
|
||||
opts.uid = geteuid();
|
||||
|
||||
if (opts.uid) {
|
||||
/*
|
||||
* At his point we know we are running as non-root with the necessary
|
||||
* capabilities available. Now we have to make the process dumpable
|
||||
* so that /proc/self is not owned by root.
|
||||
*/
|
||||
if (pr_set_dumpable(1))
|
||||
return -1;
|
||||
if (!opts.uid) {
|
||||
/* CRIU is running as root. No further checks are necessary. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
exit_code = 0;
|
||||
if (!opts.unprivileged) {
|
||||
pr_msg("Running as non-root requires '--unprivileged'\n");
|
||||
pr_msg("Please consult the documentation for limitations when running as non-root\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* At his point we know we are running as non-root with the necessary
|
||||
* capabilities available. Now we have to make the process dumpable
|
||||
* so that /proc/self is not owned by root.
|
||||
*/
|
||||
if (pr_set_dumpable(1))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
if (exit_code) {
|
||||
pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
|
||||
pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
|
||||
}
|
||||
pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
|
||||
pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
|
||||
|
||||
return exit_code;
|
||||
return -1;
|
||||
}
|
||||
|
@ -1809,6 +1809,9 @@ static int restore_task_with_children(void *_arg)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (set_opts_cap_eff())
|
||||
goto err;
|
||||
|
||||
/* Wait prepare_userns */
|
||||
if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0)
|
||||
goto err;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sched.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "version.h"
|
||||
#include "crtools.h"
|
||||
@ -409,6 +410,12 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
||||
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
|
||||
}
|
||||
|
||||
if (req->has_unprivileged)
|
||||
opts.unprivileged = req->unprivileged;
|
||||
|
||||
if (check_caps())
|
||||
return 1;
|
||||
|
||||
if (kerndat_init())
|
||||
return 1;
|
||||
|
||||
|
@ -185,6 +185,9 @@ int main(int argc, char *argv[], char *envp[])
|
||||
return cr_service_work(atoi(argv[optind + 1]));
|
||||
}
|
||||
|
||||
if (check_caps())
|
||||
return 1;
|
||||
|
||||
if (opts.imgs_dir == NULL)
|
||||
SET_CHAR_OPTS(imgs_dir, ".");
|
||||
|
||||
@ -414,6 +417,8 @@ usage:
|
||||
" --network-lock METHOD\n"
|
||||
" network locking/unlocking method; argument\n"
|
||||
" can be 'nftables' or 'iptables' (default).\n"
|
||||
" --unprivileged accept limitations when running as non-root\n"
|
||||
" consult documentation for further details\n"
|
||||
"\n"
|
||||
"* External resources support:\n"
|
||||
" --external RES dump objects from this list as external resources:\n"
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include "rst-malloc.h"
|
||||
#include "log.h"
|
||||
#include "util.h"
|
||||
#include "cr_options.h"
|
||||
#include "util-caps.h"
|
||||
|
||||
/* clang-format off */
|
||||
static struct fdstore_desc {
|
||||
@ -27,6 +29,8 @@ int fdstore_init(void)
|
||||
uint32_t buf[2] = { INT_MAX / 2, INT_MAX / 2 };
|
||||
struct sockaddr_un addr;
|
||||
unsigned int addrlen;
|
||||
int rcv_opt_name;
|
||||
int snd_opt_name;
|
||||
struct stat st;
|
||||
int sk, ret;
|
||||
|
||||
@ -49,8 +53,16 @@ int fdstore_init(void)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (setsockopt(sk, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0], sizeof(buf[0])) < 0 ||
|
||||
setsockopt(sk, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1], sizeof(buf[1])) < 0) {
|
||||
if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
|
||||
rcv_opt_name = SO_RCVBUFFORCE;
|
||||
snd_opt_name = SO_SNDBUFFORCE;
|
||||
} else {
|
||||
rcv_opt_name = SO_RCVBUF;
|
||||
snd_opt_name = SO_SNDBUF;
|
||||
}
|
||||
|
||||
if (setsockopt(sk, SOL_SOCKET, snd_opt_name, &buf[0], sizeof(buf[0])) < 0 ||
|
||||
setsockopt(sk, SOL_SOCKET, rcv_opt_name, &buf[1], sizeof(buf[1])) < 0) {
|
||||
pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE");
|
||||
close(sk);
|
||||
return -1;
|
||||
|
46
criu/files.c
46
criu/files.c
@ -21,7 +21,7 @@
|
||||
#include "image.h"
|
||||
#include "common/list.h"
|
||||
#include "rst-malloc.h"
|
||||
#include "util-pie.h"
|
||||
#include "util-caps.h"
|
||||
#include "common/lock.h"
|
||||
#include "sockets.h"
|
||||
#include "pstree.h"
|
||||
@ -1346,10 +1346,35 @@ static int fchroot(int fd)
|
||||
return chroot(".");
|
||||
}
|
||||
|
||||
static int need_chroot(int saved_root)
|
||||
{
|
||||
struct stat saved_root_stat, cur_root_stat;
|
||||
int psd;
|
||||
|
||||
if (fstat(saved_root, &saved_root_stat) == -1) {
|
||||
pr_perror("Failed to stat saved root dir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
psd = open_pid_proc(PROC_SELF);
|
||||
if (psd < 0) {
|
||||
pr_perror("Failed to open PROC_SELF");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fstatat(psd, "root", &cur_root_stat, 0) == -1) {
|
||||
pr_perror("Failed to stat current root dir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return saved_root_stat.st_ino != cur_root_stat.st_ino || saved_root_stat.st_dev != cur_root_stat.st_dev;
|
||||
}
|
||||
|
||||
int restore_fs(struct pstree_item *me)
|
||||
{
|
||||
int dd_root = -1, dd_cwd = -1, ret, err = -1;
|
||||
struct rst_info *ri = rsti(me);
|
||||
bool do_chroot = true;
|
||||
|
||||
/*
|
||||
* First -- open both descriptors. We will not
|
||||
@ -1368,15 +1393,24 @@ int restore_fs(struct pstree_item *me)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* In unprivileged mode chroot() may fail if we don't have
|
||||
* sufficient privileges, therefore only do it if the process
|
||||
* is actually chrooted.
|
||||
*/
|
||||
if (opts.unprivileged)
|
||||
do_chroot = need_chroot(dd_root);
|
||||
|
||||
/*
|
||||
* Now do chroot/chdir. Chroot goes first as it calls chdir into
|
||||
* dd_root so we'd need to fix chdir after it anyway.
|
||||
*/
|
||||
|
||||
ret = fchroot(dd_root);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't change root");
|
||||
goto out;
|
||||
if (do_chroot) {
|
||||
ret = fchroot(dd_root);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't change root");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = fchdir(dd_cwd);
|
||||
|
@ -226,7 +226,8 @@ int prepare_inventory(InventoryEntry *he)
|
||||
if (get_task_ids(&crt.i))
|
||||
return -1;
|
||||
|
||||
he->has_root_cg_set = true;
|
||||
if (!opts.unprivileged)
|
||||
he->has_root_cg_set = true;
|
||||
if (dump_task_cgroup(NULL, &he->root_cg_set, NULL))
|
||||
return -1;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __CR_OPTIONS_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <sys/capability.h>
|
||||
#include "common/config.h"
|
||||
#include "common/list.h"
|
||||
#include "int.h"
|
||||
@ -223,8 +224,14 @@ struct cr_options {
|
||||
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN
|
||||
*/
|
||||
uid_t uid;
|
||||
/* This contains the value from /proc/pid/status: CapEff */
|
||||
u32 cap_eff[CR_CAP_SIZE];
|
||||
/* This contains the value from capget()->effective */
|
||||
u32 cap_eff[_LINUX_CAPABILITY_U32S_3];
|
||||
/*
|
||||
* If CRIU should be running as non-root with the help of
|
||||
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN the user should
|
||||
* explicitly request it as it comes with many limitations.
|
||||
*/
|
||||
int unprivileged;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
|
@ -386,6 +386,8 @@ extern int mount_detached_fs(const char *fsname);
|
||||
|
||||
extern char *get_legacy_iptables_bin(bool ipv6);
|
||||
|
||||
extern int set_opts_cap_eff(void);
|
||||
|
||||
extern ssize_t read_all(int fd, void *buf, size_t size);
|
||||
extern ssize_t write_all(int fd, const void *buf, size_t size);
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "cgroup.h"
|
||||
#include "fdstore.h"
|
||||
#include "kerndat.h"
|
||||
#include "util-caps.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@ -1623,10 +1624,12 @@ int collect_namespaces(bool for_dump)
|
||||
|
||||
int prepare_userns_creds(void)
|
||||
{
|
||||
/* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */
|
||||
if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
|
||||
pr_perror("Unable to initialize id-s");
|
||||
return -1;
|
||||
if (!opts.unprivileged || has_cap_setuid(opts.cap_eff)) {
|
||||
/* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */
|
||||
if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
|
||||
pr_perror("Unable to initialize id-s");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -184,7 +184,7 @@ static int lsm_set_label(char *label, char *type, int procfd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type)
|
||||
static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type, uid_t uid)
|
||||
{
|
||||
CredsEntry *ce = &args->creds;
|
||||
int b, i, ret;
|
||||
@ -211,10 +211,12 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
|
||||
* lose caps bits when changing xids.
|
||||
*/
|
||||
|
||||
ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
|
||||
if (ret) {
|
||||
pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret);
|
||||
return -1;
|
||||
if (!uid) {
|
||||
ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
|
||||
if (ret) {
|
||||
pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -252,10 +254,12 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
|
||||
* special state any longer.
|
||||
*/
|
||||
|
||||
ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
|
||||
if (ret) {
|
||||
pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret);
|
||||
return -1;
|
||||
if (!uid) {
|
||||
ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
|
||||
if (ret) {
|
||||
pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -634,7 +638,7 @@ long __export_restore_thread(struct thread_restore_args *args)
|
||||
if (restore_seccomp(args))
|
||||
BUG();
|
||||
|
||||
ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type);
|
||||
ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type, args->ta->uid);
|
||||
ret = ret || restore_dumpable_flag(&args->ta->mm);
|
||||
ret = ret || restore_pdeath_sig(args);
|
||||
if (ret)
|
||||
@ -1915,7 +1919,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
* turning off TCP repair is CAP_SYS_NED_ADMIN protected,
|
||||
* thus restore* creds _after_ all of the above.
|
||||
*/
|
||||
ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type);
|
||||
ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type, args->uid);
|
||||
ret = ret || restore_dumpable_flag(&args->mm);
|
||||
ret = ret || restore_pdeath_sig(args->t);
|
||||
ret = ret || restore_child_subreaper(args->child_subreaper);
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "proc_parse.h"
|
||||
#include "namespaces.h"
|
||||
#include "timens.h"
|
||||
#include "cr_options.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "images/timens.pb-c.h"
|
||||
@ -57,6 +58,9 @@ int prepare_timens(int id)
|
||||
struct timespec ts;
|
||||
struct timespec prev_moff = {}, prev_boff = {};
|
||||
|
||||
if (opts.unprivileged)
|
||||
return 0;
|
||||
|
||||
img = open_image(CR_FD_TIMENS, O_RSTR, id);
|
||||
if (!img)
|
||||
return -1;
|
||||
|
22
criu/util.c
22
criu/util.c
@ -41,6 +41,7 @@
|
||||
#include "namespaces.h"
|
||||
#include "criu-log.h"
|
||||
#include "syscall.h"
|
||||
#include "util-caps.h"
|
||||
|
||||
#include "clone-noasan.h"
|
||||
#include "cr_options.h"
|
||||
@ -1426,6 +1427,9 @@ void rlimit_unlimit_nofile(void)
|
||||
{
|
||||
struct rlimit new;
|
||||
|
||||
if (opts.unprivileged && !has_cap_sys_resource(opts.cap_eff))
|
||||
return;
|
||||
|
||||
new.rlim_cur = kdat.sysctl_nr_open;
|
||||
new.rlim_max = kdat.sysctl_nr_open;
|
||||
|
||||
@ -2064,3 +2068,21 @@ out:
|
||||
xfree(free_path);
|
||||
return mp_path;
|
||||
}
|
||||
|
||||
int set_opts_cap_eff(void)
|
||||
{
|
||||
struct __user_cap_header_struct cap_header;
|
||||
struct __user_cap_data_struct cap_data[_LINUX_CAPABILITY_U32S_3];
|
||||
int i;
|
||||
|
||||
cap_header.version = _LINUX_CAPABILITY_VERSION_3;
|
||||
cap_header.pid = getpid();
|
||||
|
||||
if (capget(&cap_header, &cap_data[0]))
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < _LINUX_CAPABILITY_U32S_3; i++)
|
||||
memcpy(&opts.cap_eff[i], &cap_data[i].effective, sizeof(u32));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -139,6 +139,7 @@ message criu_opts {
|
||||
optional criu_network_lock_method network_lock = 64 [default = IPTABLES];
|
||||
optional bool mntns_compat_mode = 65;
|
||||
optional bool skip_file_rwx_check = 66;
|
||||
optional bool unprivileged = 67;
|
||||
/* optional bool check_mounts = 128; */
|
||||
}
|
||||
|
||||
|
11
lib/c/criu.c
11
lib/c/criu.c
@ -566,6 +566,17 @@ void criu_set_skip_file_rwx_check(bool skip_file_rwx_check)
|
||||
criu_local_set_skip_file_rwx_check(global_opts, skip_file_rwx_check);
|
||||
}
|
||||
|
||||
void criu_local_set_unprivileged(criu_opts *opts, bool unprivileged)
|
||||
{
|
||||
opts->rpc->has_unprivileged = true;
|
||||
opts->rpc->unprivileged = unprivileged;
|
||||
}
|
||||
|
||||
void criu_set_unprivileged(bool unprivileged)
|
||||
{
|
||||
criu_local_set_unprivileged(global_opts, unprivileged);
|
||||
}
|
||||
|
||||
void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master)
|
||||
{
|
||||
opts->rpc->has_orphan_pts_master = true;
|
||||
|
@ -79,6 +79,7 @@ void criu_set_weak_sysctls(bool val);
|
||||
void criu_set_evasive_devices(bool evasive_devices);
|
||||
void criu_set_shell_job(bool shell_job);
|
||||
void criu_set_skip_file_rwx_check(bool skip_file_rwx_check);
|
||||
void criu_set_unprivileged(bool unprivileged);
|
||||
void criu_set_orphan_pts_master(bool orphan_pts_master);
|
||||
void criu_set_file_locks(bool file_locks);
|
||||
void criu_set_track_mem(bool track_mem);
|
||||
|
Loading…
x
Reference in New Issue
Block a user