2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 18:07:57 +00:00
criu/files-reg.c
Kir Kolyshkin 8ac865f3ee Fix check for open_image() ret
When open_image() was modified to return a pointer rather than an int
in commit 295090c1, these two checks were overlooked and never fixed.

Signed-off-by: Kir Kolyshkin <kir@openvz.org>
Acked-by: Andrew Vagin <avagin@odin.com>
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2015-05-05 13:37:16 +03:00

1271 lines
29 KiB
C

#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
#include <ctype.h>
/* Stolen from kernel/fs/nfs/unlink.c */
#define SILLYNAME_PREF ".nfs"
#define SILLYNAME_SUFF_LEN (((unsigned)sizeof(u64) << 1) + ((unsigned)sizeof(unsigned int) << 1))
#include "cr_options.h"
#include "imgset.h"
#include "file-ids.h"
#include "mount.h"
#include "files.h"
#include "image.h"
#include "list.h"
#include "util.h"
#include "fs-magic.h"
#include "asm/atomic.h"
#include "namespaces.h"
#include "proc_parse.h"
#include "pstree.h"
#include "protobuf.h"
#include "protobuf/regfile.pb-c.h"
#include "protobuf/remap-file-path.pb-c.h"
#include "files-reg.h"
#include "plugin.h"
int setfsuid(uid_t fsuid);
/*
* Ghost files are those not visible from the FS. Dumping them is
* nasty and the only way we have -- just carry its contents with
* us. Any brave soul to implement link unlinked file back?
*/
struct ghost_file {
struct list_head list;
u32 id;
u32 dev;
u32 ino;
struct file_remap remap;
};
static u32 ghost_file_ids = 1;
static LIST_HEAD(ghost_files);
static mutex_t *ghost_file_mutex;
/*
* To rollback link remaps.
*/
struct link_remap_rlb {
struct list_head list;
struct ns_id *mnt_ns;
char *path;
};
static LIST_HEAD(link_remaps);
/*
* This constant is selected without any calculations. Just do not
* want to pick up too big files with us in the image.
*/
#define MAX_GHOST_FILE_SIZE (1 * 1024 * 1024)
static int create_ghost(struct ghost_file *gf, GhostFileEntry *gfe, char *root, struct cr_img *img)
{
int gfd, ghost_flags, ret = -1;
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s/%s", root, gf->remap.path);
if (S_ISFIFO(gfe->mode)) {
if (mknod(path, gfe->mode, 0)) {
pr_perror("Can't create node for ghost file");
goto err;
}
ghost_flags = O_RDWR; /* To not block */
} else if (S_ISCHR(gfe->mode) || S_ISBLK(gfe->mode)) {
if (!gfe->has_rdev) {
pr_err("No rdev for ghost device\n");
goto err;
}
if (mknod(path, gfe->mode, gfe->rdev)) {
pr_perror("Can't create node for ghost dev");
goto err;
}
ghost_flags = O_WRONLY;
} else if (S_ISDIR(gfe->mode)) {
if (mkdir(path, gfe->mode)) {
pr_perror("Can't make ghost dir");
goto err;
}
ghost_flags = O_DIRECTORY;
} else
ghost_flags = O_WRONLY | O_CREAT | O_EXCL;
gfd = open(path, ghost_flags, gfe->mode);
if (gfd < 0) {
pr_perror("Can't open ghost file %s", path);
goto err;
}
if (fchown(gfd, gfe->uid, gfe->gid) < 0) {
pr_perror("Can't reset user/group on ghost %s", path);
goto err_c;
}
if (S_ISREG(gfe->mode)) {
if (copy_file(img_raw_fd(img), gfd, 0) < 0)
goto err_c;
}
ret = 0;
err_c:
close(gfd);
err:
return ret;
}
static int open_remap_ghost(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct ghost_file *gf;
GhostFileEntry *gfe = NULL;
struct cr_img *img;
char *root;
list_for_each_entry(gf, &ghost_files, list)
if (gf->id == rfe->remap_id)
goto gf_found;
/*
* Ghost not found. We will create one in the same dir
* as the very first client of it thus resolving any
* issues with cross-device links.
*/
pr_info("Opening ghost file %#x for %s\n", rfe->remap_id, rfi->path);
root = rst_get_mnt_root(rfi->rfe->mnt_id);
if (root == NULL) {
pr_err("The %d mount is not found\n", rfi->rfe->mnt_id);
return -1;
}
gf = shmalloc(sizeof(*gf));
if (!gf)
return -1;
gf->remap.path = xmalloc(PATH_MAX);
gf->remap.mnt_id = rfi->rfe->mnt_id;
if (!gf->remap.path)
goto err;
img = open_image(CR_FD_GHOST_FILE, O_RSTR, rfe->remap_id);
if (!img)
goto err;
if (pb_read_one(img, &gfe, PB_GHOST_FILE) < 0)
goto close_ifd;
/*
* For old formats where optional has_[dev|ino] is
* not present we will have zeros here which is quite
* a sign for "absent" fields.
*/
gf->dev = gfe->dev;
gf->ino = gfe->ino;
if (S_ISDIR(gfe->mode))
strncpy(gf->remap.path, rfi->path, PATH_MAX);
else
snprintf(gf->remap.path, PATH_MAX, "%s.cr.%x.ghost", rfi->path, rfe->remap_id);
if (create_ghost(gf, gfe, root, img))
goto close_ifd;
ghost_file_entry__free_unpacked(gfe, NULL);
close_image(img);
gf->id = rfe->remap_id;
gf->remap.users = 0;
gf->remap.is_dir = S_ISDIR(gfe->mode);
gf->remap.owner = gfe->uid;
list_add_tail(&gf->list, &ghost_files);
gf_found:
rfi->remap = &gf->remap;
return 0;
close_ifd:
close_image(img);
err:
if (gfe)
ghost_file_entry__free_unpacked(gfe, NULL);
xfree(gf->remap.path);
shfree_last(gf);
return -1;
}
static int open_remap_linked(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct file_remap *rm;
struct file_desc *rdesc;
struct reg_file_info *rrfi;
uid_t owner = -1;
rdesc = find_file_desc_raw(FD_TYPES__REG, rfe->remap_id);
if (!rdesc) {
pr_err("Can't find target file %x\n", rfe->remap_id);
return -1;
}
rm = xmalloc(sizeof(*rm));
if (!rm)
return -1;
rrfi = container_of(rdesc, struct reg_file_info, d);
pr_info("Remapped %s -> %s\n", rfi->path, rrfi->path);
if (root_ns_mask & CLONE_NEWUSER) {
int rfd;
struct stat st;
rfd = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
if (fstatat(rfd, rrfi->path, &st, AT_SYMLINK_NOFOLLOW)) {
pr_perror("Can't get owner of link remap %s", rrfi->path);
return -1;
}
owner = st.st_uid;
}
rm->path = rrfi->path;
rm->users = 0;
rm->is_dir = false;
rm->owner = owner;
rm->mnt_id = rfi->rfe->mnt_id;
rfi->remap = rm;
return 0;
}
static int open_remap_dead_process(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct pstree_item *helper;
for_each_pstree_item(helper) {
/* don't need to add multiple tasks */
if (helper->pid.virt == rfe->remap_id) {
pr_info("Skipping helper for restoring /proc/%d; pid exists\n", rfe->remap_id);
return 0;
}
}
helper = alloc_pstree_helper();
if (!helper)
return -1;
helper->sid = root_item->sid;
helper->pgid = root_item->pgid;
helper->pid.virt = rfe->remap_id;
helper->parent = root_item;
list_add_tail(&helper->sibling, &root_item->children);
pr_info("Added a helper for restoring /proc/%d\n", helper->pid.virt);
return 0;
}
static int collect_one_remap(void *obj, ProtobufCMessage *msg)
{
int ret = -1;
RemapFilePathEntry *rfe;
struct file_desc *fdesc;
struct reg_file_info *rfi;
rfe = pb_msg(msg, RemapFilePathEntry);
fdesc = find_file_desc_raw(FD_TYPES__REG, rfe->orig_id);
if (fdesc == NULL) {
pr_err("Remap for non existing file %#x\n",
rfe->orig_id);
goto out;
}
rfi = container_of(fdesc, struct reg_file_info, d);
pr_info("Configuring remap %#x -> %#x\n", rfi->rfe->id, rfe->remap_id);
if (!rfe->has_remap_type) {
rfe->has_remap_type = true;
/* backward compatibility with images */
if (rfe->remap_id & REMAP_GHOST) {
rfe->remap_id &= ~REMAP_GHOST;
rfe->remap_type = REMAP_TYPE__GHOST;
} else
rfe->remap_type = REMAP_TYPE__LINKED;
}
switch (rfe->remap_type) {
case REMAP_TYPE__LINKED:
ret = open_remap_linked(rfi, rfe);
break;
case REMAP_TYPE__GHOST:
ret = open_remap_ghost(rfi, rfe);
break;
case REMAP_TYPE__PROCFS:
ret = open_remap_dead_process(rfi, rfe);
break;
default:
pr_err("unknown remap type %u\n", rfe->remap_type);
goto out;
}
out:
return ret;
}
struct collect_image_info remap_cinfo = {
.fd_type = CR_FD_REMAP_FPATH,
.pb_type = PB_REMAP_FPATH,
.collect = collect_one_remap,
};
static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_dev)
{
struct cr_img *img;
GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT;
pr_info("Dumping ghost file contents (id %#x)\n", id);
img = open_image(CR_FD_GHOST_FILE, O_DUMP, id);
if (!img)
return -1;
gfe.uid = userns_uid(st->st_uid);
gfe.gid = userns_gid(st->st_gid);
gfe.mode = st->st_mode;
gfe.has_dev = gfe.has_ino = true;
gfe.dev = phys_dev;
gfe.ino = st->st_ino;
if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) {
gfe.has_rdev = true;
gfe.rdev = st->st_rdev;
}
if (pb_write_one(img, &gfe, PB_GHOST_FILE))
return -1;
if (S_ISREG(st->st_mode)) {
int fd, ret;
char lpath[PSFDS];
/*
* Reopen file locally since it may have no read
* permissions when drained
*/
sprintf(lpath, "/proc/self/fd/%d", _fd);
fd = open(lpath, O_RDONLY);
if (fd < 0) {
pr_perror("Can't open ghost original file");
return -1;
}
ret = copy_file(fd, img_raw_fd(img), st->st_size);
close(fd);
if (ret)
return -1;
}
close_image(img);
return 0;
}
void remap_put(struct file_remap *remap)
{
mutex_lock(ghost_file_mutex);
if (--remap->users == 0) {
int mntns_root;
pr_info("Unlink the ghost %s\n", remap->path);
mntns_root = mntns_get_root_by_mnt_id(remap->mnt_id);
unlinkat(mntns_root, remap->path, 0);
}
mutex_unlock(ghost_file_mutex);
}
struct file_remap *lookup_ghost_remap(u32 dev, u32 ino)
{
struct ghost_file *gf;
mutex_lock(ghost_file_mutex);
list_for_each_entry(gf, &ghost_files, list) {
if (gf->ino == ino && (gf->dev == dev)) {
gf->remap.users++;
mutex_unlock(ghost_file_mutex);
return &gf->remap;
}
}
mutex_unlock(ghost_file_mutex);
return NULL;
}
static int dump_ghost_remap(char *path, const struct stat *st,
int lfd, u32 id, struct ns_id *nsid)
{
struct ghost_file *gf;
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
dev_t phys_dev;
pr_info("Dumping ghost file for fd %d id %#x\n", lfd, id);
if (st->st_size > MAX_GHOST_FILE_SIZE) {
pr_err("Can't dump ghost file %s of %"PRIu64" size\n",
path, st->st_size);
return -1;
}
phys_dev = phys_stat_resolve_dev(nsid, st->st_dev, path);
list_for_each_entry(gf, &ghost_files, list)
if ((gf->dev == phys_dev) && (gf->ino == st->st_ino))
goto dump_entry;
gf = xmalloc(sizeof(*gf));
if (gf == NULL)
return -1;
gf->dev = phys_dev;
gf->ino = st->st_ino;
gf->id = ghost_file_ids++;
list_add_tail(&gf->list, &ghost_files);
if (dump_ghost_file(lfd, gf->id, st, phys_dev))
return -1;
dump_entry:
rpe.orig_id = id;
rpe.remap_id = gf->id;
rpe.has_remap_type = true;
rpe.remap_type = REMAP_TYPE__GHOST;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static void __rollback_link_remaps(bool do_unlink)
{
struct link_remap_rlb *rlb, *tmp;
int mntns_root;
if (!opts.link_remap_ok)
return;
list_for_each_entry_safe(rlb, tmp, &link_remaps, list) {
mntns_root = mntns_get_root_fd(rlb->mnt_ns);
if (mntns_root < 0)
return;
list_del(&rlb->list);
if (do_unlink)
unlinkat(mntns_root, rlb->path, 0);
xfree(rlb->path);
xfree(rlb);
}
}
void delete_link_remaps(void) { __rollback_link_remaps(true); }
void free_link_remaps(void) { __rollback_link_remaps(false); }
static int create_link_remap(char *path, int len, int lfd,
u32 *idp, struct ns_id *nsid)
{
char link_name[PATH_MAX], *tmp;
RegFileEntry rfe = REG_FILE_ENTRY__INIT;
FownEntry fwn = FOWN_ENTRY__INIT;
struct link_remap_rlb *rlb;
int mntns_root;
if (!opts.link_remap_ok) {
pr_err("Can't create link remap for %s. "
"Use " LREMAP_PARAM " option.\n", path);
return -1;
}
/*
* Linked remapping -- we create a hard link on a removed file
* in the directory original file used to sit.
*
* Bad news is than we can't easily open lfd's parent dir. Thus
* we have to just generate an absolute path and use it. The linkat
* will fail if we chose the bad one.
*/
link_name[0] = '.';
memcpy(link_name + 1, path, len);
tmp = link_name + len;
while (*tmp != '/') {
BUG_ON(tmp == link_name);
tmp--;
}
fd_id_generate_special(NULL, idp);
rfe.id = *idp;
rfe.flags = 0;
rfe.pos = 0;
rfe.fown = &fwn;
rfe.name = link_name + 1;
/* Any 'unique' name works here actually. Remap works by reg-file ids. */
snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name - 1), "link_remap.%d", rfe.id);
mntns_root = mntns_get_root_fd(nsid);
if (linkat(lfd, "", mntns_root, link_name, AT_EMPTY_PATH) < 0) {
pr_perror("Can't link remap to %s", path);
return -1;
}
/*
* Remember the name to delete it if needed on error or
* rollback action. Note we don't expect that there will
* be a HUGE number of link remaps, so in a sake of speed
* we keep all data in memory.
*/
rlb = xmalloc(sizeof(*rlb));
if (!rlb)
goto err1;
rlb->path = strdup(link_name);
if (!rlb->path)
goto err2;
rlb->mnt_ns = nsid;
list_add(&rlb->list, &link_remaps);
return pb_write_one(img_from_set(glob_imgset, CR_FD_REG_FILES), &rfe, PB_REG_FILE);
err2:
xfree(rlb);
err1:
pr_perror("Can't register rollback for %s", path);
return -1;
}
static int dump_linked_remap(char *path, int len, const struct stat *ost,
int lfd, u32 id, struct ns_id *nsid)
{
u32 lid;
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
if (create_link_remap(path, len, lfd, &lid, nsid))
return -1;
rpe.orig_id = id;
rpe.remap_id = lid;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static int have_seen_dead_pid(pid_t pid)
{
static pid_t *dead_pids = NULL;
static int n_dead_pids = 0;
size_t i;
for (i = 0; i < n_dead_pids; i++) {
if (dead_pids[i] == pid)
return 1;
}
if (xrealloc_safe(&dead_pids, sizeof(*dead_pids) * (n_dead_pids + 1)))
return -1;
dead_pids[n_dead_pids++] = pid;
return 0;
}
static int dump_dead_process_remap(pid_t pid, char *path, int len, const struct stat *ost,
int lfd, u32 id, struct ns_id *nsid)
{
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
int ret;
ret = have_seen_dead_pid(pid);
if (ret < 0)
return -1;
if (ret) {
pr_info("Found dead pid %d already, skipping remap\n", pid);
return 0;
}
rpe.orig_id = id;
rpe.remap_id = pid;
rpe.has_remap_type = true;
rpe.remap_type = REMAP_TYPE__PROCFS;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static bool is_sillyrename_name(char *name)
{
int i;
name = strrchr(name, '/');
BUG_ON(name == NULL); /* see check in dump_one_reg_file */
name++;
/*
* Strictly speaking this check is not bullet-proof. User
* can create file with this name by hands and we have no
* API to distinguish really-silly-renamed files from those
* fake names :(
*
* But since NFS people expect .nfsXXX files to be unstable,
* we treat them as such too.
*/
if (strncmp(name, SILLYNAME_PREF, sizeof(SILLYNAME_PREF) - 1))
return false;
name += sizeof(SILLYNAME_PREF) - 1;
for (i = 0; i < SILLYNAME_SUFF_LEN; i++)
if (!isxdigit(name[i]))
return false;
return true;
}
static inline bool nfs_silly_rename(char *rpath, const struct fd_parms *parms)
{
return (parms->fs_type == NFS_SUPER_MAGIC) && is_sillyrename_name(rpath);
}
static void strip_deleted(struct fd_link *link)
{
const char postfix[] = " (deleted)";
const size_t plen = strlen(postfix);
if (link->len > plen) {
size_t at = link->len - plen;
if (!strcmp(&link->name[at], postfix)) {
pr_debug("Stip %s' tag from '%s'\n",
postfix, link->name);
link->name[at] = '\0';
link->len -= plen;
}
}
}
static int check_path_remap(struct fd_link *link, const struct fd_parms *parms,
int lfd, u32 id, struct ns_id *nsid)
{
char *rpath = link->name;
int plen = link->len;
int ret, mntns_root;
struct stat pst;
const struct stat *ost = &parms->stat;
if (parms->fs_type == PROC_SUPER_MAGIC) {
/* The file points to /proc/pid/<foo> where pid is a dead
* process. We remap this file by adding this pid to be
* fork()ed into a TASK_HELPER state so that we can point to it
* on restore.
*/
pid_t pid;
char *start, *end;
/* skip "./proc/" */
start = strstr(rpath, "/") + 1;
if (!start)
return -1;
start = strstr(start, "/") + 1;
if (!start)
return -1;
pid = strtol(start, &end, 10);
/* if we didn't find another /, this path something
* like ./proc/kmsg, which we shouldn't mess with. */
if (*end == '/') {
*end = 0;
ret = access(rpath, F_OK);
*end = '/';
if (ret) {
pr_info("Dumping dead process remap of %d\n", pid);
return dump_dead_process_remap(pid, rpath + 1, plen - 1, ost, lfd, id, nsid);
}
}
return 0;
} else if (parms->fs_type == DEVPTS_SUPER_MAGIC) {
/*
* It's safe to call stripping here because
* file paths are having predefined format for
* this FS and can't have a valid " (deleted)"
* postfix as a part of not deleted filename.
*/
strip_deleted(link);
/*
* Devpts devices/files are generated by the
* kernel itself so we should not try to generate
* any kind of ghost files here even if file is
* no longer exist.
*/
return 0;
}
if (ost->st_nlink == 0) {
/*
* Unpleasant, but easy case. File is completely invisible
* from the FS. Just dump its contents and that's it. But
* be careful whether anybody still has any of its hardlinks
* also open.
*/
strip_deleted(link);
return dump_ghost_remap(rpath + 1, ost, lfd, id, nsid);
}
if (nfs_silly_rename(rpath, parms)) {
/*
* If this is NFS silly-rename file the path we have at hands
* will be accessible by fstat(), but once we kill the dumping
* tasks it will disappear. So we just go ahead an dump it as
* linked-remap file (NFS will allow us to create more hard
* links on it) to have some persistent name at hands.
*/
pr_debug("Dump silly-rename linked remap for %x\n", id);
return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid);
}
mntns_root = mntns_get_root_fd(nsid);
if (mntns_root < 0)
return -1;
ret = fstatat(mntns_root, rpath, &pst, 0);
if (ret < 0) {
/*
* Linked file, but path is not accessible (unless any
* other error occurred). We can create a temporary link to it
* uning linkat with AT_EMPTY_PATH flag and remap it to this
* name.
*/
if (errno == ENOENT)
return dump_linked_remap(rpath + 1, plen - 1,
ost, lfd, id, nsid);
pr_perror("Can't stat path");
return -1;
}
if ((pst.st_ino != ost->st_ino) || (pst.st_dev != ost->st_dev)) {
if (opts.evasive_devices &&
(S_ISCHR(ost->st_mode) || S_ISBLK(ost->st_mode)) &&
pst.st_rdev == ost->st_rdev)
return 0;
/*
* FIXME linked file, but the name we see it by is reused
* by somebody else. We can dump it with linked remaps, but
* we'll have difficulties on restore -- we will have to
* move the exisint file aside, then restore this one,
* unlink, then move the original file back. It's fairly
* easy to do, but we don't do it now, since unlinked files
* have the "(deleted)" suffix in proc and name conflict
* is unlikely :)
*/
pr_err("Unaccessible path opened %u:%u, need %u:%u\n",
(int)pst.st_dev, (int)pst.st_ino,
(int)ost->st_dev, (int)ost->st_ino);
return -1;
}
/*
* File is linked and visible by the name it is opened by
* this task. Go ahead and dump it.
*/
return 0;
}
static bool should_check_size(int flags)
{
/* Skip size if file has O_APPEND and O_WRONLY flags (e.g. log file). */
if (((flags & O_ACCMODE) == O_WRONLY) &&
(flags & O_APPEND))
return false;
return true;
}
int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
{
struct fd_link _link, *link;
struct ns_id *nsid;
struct cr_img *rimg;
RegFileEntry rfe = REG_FILE_ENTRY__INIT;
if (!p->link) {
if (fill_fdlink(lfd, p, &_link))
return -1;
link = &_link;
} else
link = p->link;
nsid = lookup_nsid_by_mnt_id(p->mnt_id);
if (nsid == NULL) {
pr_err("Can't lookup mount=%d for fd=%d path=%s\n",
p->mnt_id, p->fd, link->name + 1);
return -1;
}
if (p->mnt_id >= 0 && (root_ns_mask & CLONE_NEWNS)) {
rfe.mnt_id = p->mnt_id;
rfe.has_mnt_id = true;
}
pr_info("Dumping path for %d fd via self %d [%s]\n",
p->fd, lfd, &link->name[1]);
/*
* The regular path we can handle should start with slash.
*/
if (link->name[1] != '/') {
pr_err("The path [%s] is not supported\n", &link->name[1]);
return -1;
}
if (check_path_remap(link, p, lfd, id, nsid))
return -1;
rfe.id = id;
rfe.flags = p->flags;
rfe.pos = p->pos;
rfe.fown = (FownEntry *)&p->fown;
rfe.name = &link->name[1];
if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags)) {
rfe.has_size = true;
rfe.size = p->stat.st_size;
}
rimg = img_from_set(glob_imgset, CR_FD_REG_FILES);
return pb_write_one(rimg, &rfe, PB_REG_FILE);
}
const struct fdtype_ops regfile_dump_ops = {
.type = FD_TYPES__REG,
.dump = dump_one_reg_file,
};
static void convert_path_from_another_mp(char *src, char *dst, int dlen,
struct mount_info *smi,
struct mount_info *dmi)
{
int off;
/*
* mi->mountpoint ./foo/bar
* mi->ns_mountpoint /foo/bar
* rfi->path foo/bar/baz
*/
off = strlen(smi->ns_mountpoint + 1);
BUG_ON(strlen(smi->root) < strlen(dmi->root));
/*
* Create paths relative to this mount.
* Absolute path to the mount point + difference between source
* and destination roots + path relative to the mountpoint.
*/
snprintf(dst, dlen, "%s/%s/%s",
dmi->ns_mountpoint + 1,
smi->root + strlen(dmi->root),
src + off);
}
static int linkat_hard(int odir, char *opath, int ndir, char *npath, uid_t owner)
{
int ret, old_fsuid = -1;
if (root_ns_mask & CLONE_NEWUSER)
/*
* Kernel has strange secutiry restrictions about
* linkat. If the fsuid of the caller doesn't equals
* the uid of the file and the file is not "safe"
* one, then only global CAP_CHOWN will be allowed
* to link().
*
* Next, when we're in user namespace we're ns root,
* but not global CAP_CHOWN. Thus, even though we
* ARE ns root, we will not be allowed to link() at
* files that belong to regular users %)
*
* Fortunately, the setfsuid() requires ns-level
* CAP_SETUID which we have.
*/
old_fsuid = setfsuid(owner);
ret = linkat(odir, opath, ndir, npath, 0);
if (ret < 0)
pr_perror("Can't link %s -> %s", opath, npath);
if (root_ns_mask & CLONE_NEWUSER) {
setfsuid(old_fsuid);
if (setfsuid(-1) != old_fsuid)
pr_warn("Failed to restore old fsuid!\n");
/*
* Don't fail here. We still have chances to run till
* the pie/restorer, and if _this_ guy fails to set
* the proper fsuid, then we'll abort the restore.
*/
}
return ret;
}
/*
* This routine properly resolves d's path handling ghost/link-remaps.
* The open_cb is a routine that does actual open, it differs for
* files, directories, fifos, etc.
*/
static int rfi_remap(struct reg_file_info *rfi)
{
struct mount_info *mi, *rmi, *tmi;
char _path[PATH_MAX], *path = _path;
char _rpath[PATH_MAX], *rpath = _rpath;
int mntns_root;
if (rfi->rfe->mnt_id == -1) {
/* Know nothing about mountpoints */
mntns_root = mntns_get_root_by_mnt_id(-1);
path = rfi->path;
rpath = rfi->remap->path;
goto out_root;
}
mi = lookup_mnt_id(rfi->rfe->mnt_id);
if (rfi->rfe->mnt_id == rfi->remap->mnt_id) {
/* Both links on the same mount point */
tmi = mi;
path = rfi->path;
rpath = rfi->remap->path;
goto out;
}
rmi = lookup_mnt_id(rfi->remap->mnt_id);
/*
* Find the common bind-mount. We know that one mount point was
* really mounted and all other were bind-mounted from it, so the
* lowest mount must contains all bind-mounts.
*/
for (tmi = mi; tmi->bind; tmi = tmi->bind)
;
BUG_ON(tmi->s_dev != rmi->s_dev);
BUG_ON(tmi->s_dev != mi->s_dev);
/* Calcalate paths on the device (root mount) */
convert_path_from_another_mp(rfi->path, path, sizeof(_path), mi, tmi);
convert_path_from_another_mp(rfi->remap->path, rpath, sizeof(_rpath), rmi, tmi);
out:
pr_debug("%d: Link %s -> %s\n", tmi->mnt_id, rpath, path);
mntns_root = mntns_get_root_fd(tmi->nsid);
out_root:
return linkat_hard(mntns_root, rpath, mntns_root, path, rfi->remap->owner);
}
int open_path(struct file_desc *d,
int(*open_cb)(int mntns_root, struct reg_file_info *, void *), void *arg)
{
struct reg_file_info *rfi;
int tmp, mntns_root;
char *orig_path = NULL;
if (inherited_fd(d, &tmp))
return tmp;
rfi = container_of(d, struct reg_file_info, d);
if (rfi->remap) {
mutex_lock(ghost_file_mutex);
if (rfi->remap->is_dir) {
/*
* FIXME Can't make directory under new name.
* Will have to open it under the ghost one :(
*/
orig_path = rfi->path;
rfi->path = rfi->remap->path;
} else if (rfi_remap(rfi) < 0) {
static char tmp_path[PATH_MAX];
if (errno != EEXIST) {
pr_err("Can't link %s -> %s", rfi->path,
rfi->remap->path);
return -1;
}
/*
* The file whose name we're trying to create
* exists. Need to pick some other one, we're
* going to remove it anyway.
*
* Strictly speaking, this is cheating, file
* name shouldn't change. But since NFS with
* its silly-rename doesn't care, why should we?
*/
orig_path = rfi->path;
rfi->path = tmp_path;
snprintf(tmp_path, sizeof(tmp_path), "%s.cr_link", orig_path);
pr_debug("Fake %s -> %s link\n", rfi->path, rfi->remap->path);
if (rfi_remap(rfi) < 0) {
pr_perror("Can't create even fake link!");
return -1;
}
}
}
mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
tmp = open_cb(mntns_root, rfi, arg);
if (tmp < 0) {
pr_perror("Can't open file %s", rfi->path);
return -1;
}
if (rfi->rfe->has_size && !rfi->size_checked) {
struct stat st;
if (fstat(tmp, &st) < 0) {
pr_perror("Can't fstat opened file");
return -1;
}
if (st.st_size != rfi->rfe->size) {
pr_err("File %s has bad size %"PRIu64" (expect %"PRIu64")\n",
rfi->path, st.st_size,
rfi->rfe->size);
return -1;
}
/*
* This is only visible in the current process, so
* change w/o locks. Other tasks sharing the same
* file will get one via unix sockets.
*/
rfi->size_checked = true;
}
if (rfi->remap) {
if (!rfi->remap->is_dir) {
unlinkat(mntns_root, rfi->path, 0);
}
BUG_ON(!rfi->remap->users);
if (--rfi->remap->users == 0) {
pr_info("Unlink the ghost %s\n", rfi->remap->path);
mntns_root = mntns_get_root_by_mnt_id(rfi->remap->mnt_id);
unlinkat(mntns_root, rfi->remap->path, rfi->remap->is_dir ? AT_REMOVEDIR : 0);
}
if (orig_path)
rfi->path = orig_path;
mutex_unlock(ghost_file_mutex);
}
if (restore_fown(tmp, rfi->rfe->fown))
return -1;
return tmp;
}
int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
u32 flags = *(u32 *)arg;
int fd;
fd = openat(ns_root_fd, rfi->path, flags);
if (fd < 0) {
pr_perror("Can't open file %s on restore", rfi->path);
return fd;
}
return fd;
}
static int do_open_reg_noseek(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
return do_open_reg_noseek_flags(ns_root_fd, rfi, &rfi->rfe->flags);
}
static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
int fd;
fd = do_open_reg_noseek(ns_root_fd, rfi, arg);
if (fd < 0)
return fd;
if ((rfi->rfe->pos != -1ULL) &&
lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) {
pr_perror("Can't restore file pos");
close(fd);
return -1;
}
return fd;
}
int open_reg_fd(struct file_desc *fd)
{
return open_path(fd, do_open_reg_noseek, NULL);
}
int open_reg_by_id(u32 id)
{
struct file_desc *fd;
/*
* This one gets called by exe link, chroot and cwd
* restoring code. No need in calling lseek on either
* of them.
*/
fd = find_file_desc_raw(FD_TYPES__REG, id);
if (fd == NULL) {
pr_err("Can't find regfile for %#x\n", id);
return -1;
}
return open_reg_fd(fd);
}
int get_filemap_fd(struct vma_area *vma)
{
u32 flags;
/*
* Thevma->fd should have been assigned in collect_filemap
*
* We open file w/o lseek, as mappings don't care about it
*/
BUG_ON(vma->vmfd == NULL);
if (vma->e->has_fdflags)
flags = vma->e->fdflags;
else if ((vma->e->prot & PROT_WRITE) &&
vma_area_is(vma, VMA_FILE_SHARED))
flags = O_RDWR;
else
flags = O_RDONLY;
return open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
}
static void remap_get(struct file_desc *fdesc, char typ)
{
struct reg_file_info *rfi;
rfi = container_of(fdesc, struct reg_file_info, d);
if (rfi->remap) {
pr_debug("One more remap user (%c) for %s\n",
typ, rfi->remap->path);
/* No lock, we're still sngle-process here */
rfi->remap->users++;
}
}
static void collect_reg_fd(struct file_desc *fdesc,
struct fdinfo_list_entry *fle, struct rst_info *ri)
{
if (list_empty(&fdesc->fd_info_head))
remap_get(fdesc, 'f');
collect_gen_fd(fle, ri);
}
static int open_fe_fd(struct file_desc *fd)
{
return open_path(fd, do_open_reg, NULL);
}
static char *reg_file_path(struct file_desc *d, char *buf, size_t s)
{
struct reg_file_info *rfi;
rfi = container_of(d, struct reg_file_info, d);
return rfi->path;
}
static struct file_desc_ops reg_desc_ops = {
.type = FD_TYPES__REG,
.open = open_fe_fd,
.collect_fd = collect_reg_fd,
.name = reg_file_path,
};
struct file_desc *try_collect_special_file(u32 id, int optional)
{
struct file_desc *fdesc;
/*
* Files dumped for vmas/exe links can have remaps
* configured. Need to bump-up users for them, otherwise
* the open_path() would unlink the remap file after
* the very first open.
*/
fdesc = find_file_desc_raw(FD_TYPES__REG, id);
if (fdesc == NULL) {
if (!optional)
pr_err("No entry for reg-file-ID %#x\n", id);
return NULL;
}
remap_get(fdesc, 's');
return fdesc;
}
static int collect_one_regfile(void *o, ProtobufCMessage *base)
{
struct reg_file_info *rfi = o;
static char dot[] = ".";
rfi->rfe = pb_msg(base, RegFileEntry);
/* change "/foo" into "foo" and "/" into "." */
if (rfi->rfe->name[1] == '\0')
rfi->path = dot;
else
rfi->path = rfi->rfe->name + 1;
rfi->remap = NULL;
rfi->size_checked = false;
pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe->id);
return file_desc_add(&rfi->d, rfi->rfe->id, &reg_desc_ops);
}
struct collect_image_info reg_file_cinfo = {
.fd_type = CR_FD_REG_FILES,
.pb_type = PB_REG_FILE,
.priv_size = sizeof(struct reg_file_info),
.collect = collect_one_regfile,
};
int prepare_shared_reg_files(void)
{
ghost_file_mutex = shmalloc(sizeof(*ghost_file_mutex));
if (!ghost_file_mutex)
return -1;
mutex_init(ghost_file_mutex);
return 0;
}