diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 13d1001c9..f50448cd2 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2232,6 +2232,10 @@ skip_ns_bouncing: if (ret < 0) goto out_kill; + ret = apply_memfd_seals(); + if (ret < 0) + goto out_kill; + /* * Zombies die after CR_STATE_RESTORE which is switched * by root task, not by us. See comment before CR_STATE_FORKING diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h index d9c5c5e7b..ea9d48c72 100644 --- a/criu/include/fcntl.h +++ b/criu/include/fcntl.h @@ -34,6 +34,14 @@ struct f_owner_ex { # define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) #endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + #ifndef O_PATH # define O_PATH 010000000 #endif diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 0a9aeff2f..2d8eda545 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -15,6 +15,7 @@ extern const struct fdtype_ops memfd_dump_ops; extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); +extern int apply_memfd_seals(void); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 1cca96a32..d17c10fb7 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -1,4 +1,5 @@ #include +#include #include "common/compiler.h" #include "common/lock.h" @@ -24,6 +25,13 @@ #define MEMFD_PREFIX "/memfd:" #define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* Linux 5.1+ */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ + struct memfd_inode { struct list_head list; u32 id; @@ -37,6 +45,7 @@ struct memfd_inode { struct { mutex_t lock; int fdstore_id; + unsigned int pending_seals; }; }; }; @@ -92,6 +101,10 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, mie.size = st->st_size; mie.shmid = shmid; + mie.seals = fcntl(fd, F_GET_SEALS); + if (mie.seals == -1) + goto out; + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) goto out; @@ -187,6 +200,8 @@ struct memfd_info { struct memfd_inode *inode; }; +static int memfd_open_inode(struct memfd_inode *inode); + static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -202,6 +217,7 @@ static struct memfd_inode *memfd_alloc_inode(int id) inode->id = id; mutex_init(&inode->lock); inode->fdstore_id = -1; + inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); return inode; @@ -223,7 +239,16 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) goto out; - fd = memfd_create(mie->name, 0); + if (mie->seals == F_SEAL_SEAL) { + inode->pending_seals = 0; + flags = 0; + } else { + /* Seals are applied later due to F_SEAL_FUTURE_WRITE */ + inode->pending_seals = mie->seals; + flags = MFD_ALLOW_SEALING; + } + + fd = memfd_create(mie->name, flags); if (fd < 0) { pr_perror("Can't create memfd:%s", mie->name); goto out; @@ -401,3 +426,35 @@ struct file_desc *collect_memfd(u32 id) { return fdesc; } + +int apply_memfd_seals(void) +{ + /* + * We apply the seals after all the mappings are done because the seal + * F_SEAL_FUTURE_WRITE prevents future write access (added in + * Linux 5.1). Thus we must make sure all writable mappings are opened + * before applying this seal. + */ + + int ret, fd; + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) { + if (!inode->pending_seals) + continue; + + fd = memfd_open_inode(inode); + if (fd < 0) + return -1; + + ret = fcntl(fd, F_ADD_SEALS, inode->pending_seals); + close(fd); + + if (ret < 0) { + pr_perror("Cannot apply seals on memfd"); + return -1; + } + } + + return 0; +} diff --git a/images/memfd.proto b/images/memfd.proto index 8eccd6f4f..546ffc2ab 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -17,4 +17,5 @@ message memfd_inode_entry { required uint32 gid = 3; required uint64 size = 4; required uint32 shmid = 5; + required uint32 seals = 6 [(criu).flags = "seals.flags"]; }; diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index a89850a1d..40a6036cf 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -124,6 +124,14 @@ rfile_flags_map = [ ('O_CLOEXEC', 0o02000000), ] +seals_flags_map = [ + ('F_SEAL_SEAL', 0x0001), + ('F_SEAL_SHRINK', 0x0002), + ('F_SEAL_GROW', 0x0004), + ('F_SEAL_WRITE', 0x0008), + ('F_SEAL_FUTURE_WRITE', 0x0010), +] + pmap_flags_map = [ ('PE_PARENT', 1 << 0), ('PE_LAZY', 1 << 1), @@ -136,6 +144,7 @@ flags_maps = { 'mmap.status': mmap_status_map, 'rfile.flags': rfile_flags_map, 'pmap.flags': pmap_flags_map, + 'seals.flags': seals_flags_map, } gen_maps = {