2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 18:07:57 +00:00
criu/files-reg.c

1283 lines
30 KiB
C
Raw Normal View History

#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
#include <ctype.h>
/* Stolen from kernel/fs/nfs/unlink.c */
#define SILLYNAME_PREF ".nfs"
#define SILLYNAME_SUFF_LEN (((unsigned)sizeof(u64) << 1) + ((unsigned)sizeof(unsigned int) << 1))
#include "cr_options.h"
#include "imgset.h"
#include "file-ids.h"
#include "mount.h"
#include "files.h"
#include "image.h"
#include "list.h"
#include "util.h"
#include "fs-magic.h"
#include "asm/atomic.h"
#include "namespaces.h"
#include "proc_parse.h"
#include "pstree.h"
#include "protobuf.h"
#include "protobuf/regfile.pb-c.h"
#include "protobuf/remap-file-path.pb-c.h"
#include "files-reg.h"
#include "plugin.h"
int setfsuid(uid_t fsuid);
/*
* Ghost files are those not visible from the FS. Dumping them is
* nasty and the only way we have -- just carry its contents with
* us. Any brave soul to implement link unlinked file back?
*/
struct ghost_file {
struct list_head list;
u32 id;
u32 dev;
u32 ino;
struct file_remap remap;
};
static u32 ghost_file_ids = 1;
static LIST_HEAD(ghost_files);
static mutex_t *ghost_file_mutex;
/*
* To rollback link remaps.
*/
struct link_remap_rlb {
struct list_head list;
struct ns_id *mnt_ns;
char *path;
};
static LIST_HEAD(link_remaps);
static int create_ghost(struct ghost_file *gf, GhostFileEntry *gfe, char *root, struct cr_img *img)
{
int gfd, ghost_flags, ret = -1;
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s/%s", root, gf->remap.path);
if (S_ISFIFO(gfe->mode)) {
if (mknod(path, gfe->mode, 0)) {
pr_perror("Can't create node for ghost file");
goto err;
}
ghost_flags = O_RDWR; /* To not block */
} else if (S_ISCHR(gfe->mode) || S_ISBLK(gfe->mode)) {
if (!gfe->has_rdev) {
pr_err("No rdev for ghost device\n");
goto err;
}
if (mknod(path, gfe->mode, gfe->rdev)) {
pr_perror("Can't create node for ghost dev");
goto err;
}
ghost_flags = O_WRONLY;
} else if (S_ISDIR(gfe->mode)) {
if (mkdir(path, gfe->mode)) {
pr_perror("Can't make ghost dir");
goto err;
}
ghost_flags = O_DIRECTORY;
} else
ghost_flags = O_WRONLY | O_CREAT | O_EXCL;
gfd = open(path, ghost_flags, gfe->mode);
if (gfd < 0) {
pr_perror("Can't open ghost file %s", path);
goto err;
}
if (fchown(gfd, gfe->uid, gfe->gid) < 0) {
pr_perror("Can't reset user/group on ghost %s", path);
goto err_c;
}
if (S_ISREG(gfe->mode)) {
if (copy_file(img_raw_fd(img), gfd, 0) < 0)
goto err_c;
}
ret = 0;
err_c:
close(gfd);
err:
return ret;
}
static int open_remap_ghost(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct ghost_file *gf;
GhostFileEntry *gfe = NULL;
struct cr_img *img;
char *root;
list_for_each_entry(gf, &ghost_files, list)
if (gf->id == rfe->remap_id)
goto gf_found;
/*
* Ghost not found. We will create one in the same dir
* as the very first client of it thus resolving any
* issues with cross-device links.
*/
pr_info("Opening ghost file %#x for %s\n", rfe->remap_id, rfi->path);
root = rst_get_mnt_root(rfi->rfe->mnt_id);
if (root == NULL) {
pr_err("The %d mount is not found\n", rfi->rfe->mnt_id);
return -1;
}
gf = shmalloc(sizeof(*gf));
if (!gf)
return -1;
gf->remap.path = xmalloc(PATH_MAX);
gf->remap.mnt_id = rfi->rfe->mnt_id;
if (!gf->remap.path)
goto err;
img = open_image(CR_FD_GHOST_FILE, O_RSTR, rfe->remap_id);
if (!img)
goto err;
if (pb_read_one(img, &gfe, PB_GHOST_FILE) < 0)
goto close_ifd;
/*
* For old formats where optional has_[dev|ino] is
* not present we will have zeros here which is quite
* a sign for "absent" fields.
*/
gf->dev = gfe->dev;
gf->ino = gfe->ino;
if (S_ISDIR(gfe->mode))
strncpy(gf->remap.path, rfi->path, PATH_MAX);
else
snprintf(gf->remap.path, PATH_MAX, "%s.cr.%x.ghost", rfi->path, rfe->remap_id);
if (create_ghost(gf, gfe, root, img))
goto close_ifd;
ghost_file_entry__free_unpacked(gfe, NULL);
close_image(img);
gf->id = rfe->remap_id;
gf->remap.users = 0;
gf->remap.is_dir = S_ISDIR(gfe->mode);
gf->remap.owner = gfe->uid;
list_add_tail(&gf->list, &ghost_files);
gf_found:
rfi->remap = &gf->remap;
return 0;
close_ifd:
close_image(img);
err:
if (gfe)
ghost_file_entry__free_unpacked(gfe, NULL);
xfree(gf->remap.path);
shfree_last(gf);
return -1;
}
static int open_remap_linked(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct file_remap *rm;
struct file_desc *rdesc;
struct reg_file_info *rrfi;
uid_t owner = -1;
rdesc = find_file_desc_raw(FD_TYPES__REG, rfe->remap_id);
if (!rdesc) {
pr_err("Can't find target file %x\n", rfe->remap_id);
return -1;
}
rm = xmalloc(sizeof(*rm));
if (!rm)
return -1;
rrfi = container_of(rdesc, struct reg_file_info, d);
pr_info("Remapped %s -> %s\n", rfi->path, rrfi->path);
if (root_ns_mask & CLONE_NEWUSER) {
int rfd;
struct stat st;
rfd = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
if (fstatat(rfd, rrfi->path, &st, AT_SYMLINK_NOFOLLOW)) {
pr_perror("Can't get owner of link remap %s", rrfi->path);
return -1;
}
owner = st.st_uid;
}
rm->path = rrfi->path;
rm->users = 0;
rm->is_dir = false;
rm->owner = owner;
rm->mnt_id = rfi->rfe->mnt_id;
rfi->remap = rm;
return 0;
}
static int open_remap_dead_process(struct reg_file_info *rfi,
RemapFilePathEntry *rfe)
{
struct pstree_item *helper;
for_each_pstree_item(helper) {
/* don't need to add multiple tasks */
if (helper->pid.virt == rfe->remap_id) {
pr_info("Skipping helper for restoring /proc/%d; pid exists\n", rfe->remap_id);
return 0;
}
}
helper = alloc_pstree_helper();
if (!helper)
return -1;
helper->sid = root_item->sid;
helper->pgid = root_item->pgid;
helper->pid.virt = rfe->remap_id;
helper->parent = root_item;
list_add_tail(&helper->sibling, &root_item->children);
pr_info("Added a helper for restoring /proc/%d\n", helper->pid.virt);
return 0;
}
static int collect_one_remap(void *obj, ProtobufCMessage *msg)
{
int ret = -1;
RemapFilePathEntry *rfe;
struct file_desc *fdesc;
struct reg_file_info *rfi;
rfe = pb_msg(msg, RemapFilePathEntry);
fdesc = find_file_desc_raw(FD_TYPES__REG, rfe->orig_id);
if (fdesc == NULL) {
pr_err("Remap for non existing file %#x\n",
rfe->orig_id);
goto out;
}
rfi = container_of(fdesc, struct reg_file_info, d);
pr_info("Configuring remap %#x -> %#x\n", rfi->rfe->id, rfe->remap_id);
if (!rfe->has_remap_type) {
rfe->has_remap_type = true;
/* backward compatibility with images */
if (rfe->remap_id & REMAP_GHOST) {
rfe->remap_id &= ~REMAP_GHOST;
rfe->remap_type = REMAP_TYPE__GHOST;
} else
rfe->remap_type = REMAP_TYPE__LINKED;
}
switch (rfe->remap_type) {
case REMAP_TYPE__LINKED:
ret = open_remap_linked(rfi, rfe);
break;
case REMAP_TYPE__GHOST:
ret = open_remap_ghost(rfi, rfe);
break;
case REMAP_TYPE__PROCFS:
ret = open_remap_dead_process(rfi, rfe);
break;
default:
pr_err("unknown remap type %u\n", rfe->remap_type);
goto out;
}
out:
return ret;
}
struct collect_image_info remap_cinfo = {
.fd_type = CR_FD_REMAP_FPATH,
.pb_type = PB_REMAP_FPATH,
.collect = collect_one_remap,
};
static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_dev)
{
struct cr_img *img;
GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT;
pr_info("Dumping ghost file contents (id %#x)\n", id);
img = open_image(CR_FD_GHOST_FILE, O_DUMP, id);
if (!img)
return -1;
gfe.uid = userns_uid(st->st_uid);
gfe.gid = userns_gid(st->st_gid);
gfe.mode = st->st_mode;
gfe.has_dev = gfe.has_ino = true;
gfe.dev = phys_dev;
gfe.ino = st->st_ino;
if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) {
gfe.has_rdev = true;
gfe.rdev = st->st_rdev;
}
if (pb_write_one(img, &gfe, PB_GHOST_FILE))
return -1;
if (S_ISREG(st->st_mode)) {
int fd, ret;
char lpath[PSFDS];
/*
* Reopen file locally since it may have no read
* permissions when drained
*/
sprintf(lpath, "/proc/self/fd/%d", _fd);
fd = open(lpath, O_RDONLY);
if (fd < 0) {
pr_perror("Can't open ghost original file");
return -1;
}
ret = copy_file(fd, img_raw_fd(img), st->st_size);
close(fd);
if (ret)
return -1;
}
close_image(img);
return 0;
}
void remap_put(struct file_remap *remap)
{
mutex_lock(ghost_file_mutex);
if (--remap->users == 0) {
int mntns_root;
pr_info("Unlink the ghost %s\n", remap->path);
mntns_root = mntns_get_root_by_mnt_id(remap->mnt_id);
unlinkat(mntns_root, remap->path, 0);
}
mutex_unlock(ghost_file_mutex);
}
struct file_remap *lookup_ghost_remap(u32 dev, u32 ino)
{
struct ghost_file *gf;
mutex_lock(ghost_file_mutex);
list_for_each_entry(gf, &ghost_files, list) {
if (gf->ino == ino && (gf->dev == dev)) {
gf->remap.users++;
mutex_unlock(ghost_file_mutex);
return &gf->remap;
}
}
mutex_unlock(ghost_file_mutex);
return NULL;
}
static int dump_ghost_remap(char *path, const struct stat *st,
int lfd, u32 id, struct ns_id *nsid)
{
struct ghost_file *gf;
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
dev_t phys_dev;
pr_info("Dumping ghost file for fd %d id %#x\n", lfd, id);
if (st->st_size > opts.ghost_limit) {
pr_err("Can't dump ghost file %s of %"PRIu64" size, increase limit\n",
path, st->st_size);
return -1;
}
phys_dev = phys_stat_resolve_dev(nsid, st->st_dev, path);
list_for_each_entry(gf, &ghost_files, list)
if ((gf->dev == phys_dev) && (gf->ino == st->st_ino))
goto dump_entry;
gf = xmalloc(sizeof(*gf));
if (gf == NULL)
return -1;
gf->dev = phys_dev;
gf->ino = st->st_ino;
gf->id = ghost_file_ids++;
list_add_tail(&gf->list, &ghost_files);
if (dump_ghost_file(lfd, gf->id, st, phys_dev))
return -1;
dump_entry:
rpe.orig_id = id;
rpe.remap_id = gf->id;
rpe.has_remap_type = true;
rpe.remap_type = REMAP_TYPE__GHOST;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static void __rollback_link_remaps(bool do_unlink)
{
struct link_remap_rlb *rlb, *tmp;
int mntns_root;
if (!opts.link_remap_ok)
return;
list_for_each_entry_safe(rlb, tmp, &link_remaps, list) {
mntns_root = mntns_get_root_fd(rlb->mnt_ns);
if (mntns_root < 0)
return;
list_del(&rlb->list);
if (do_unlink)
unlinkat(mntns_root, rlb->path, 0);
xfree(rlb->path);
xfree(rlb);
}
}
void delete_link_remaps(void) { __rollback_link_remaps(true); }
void free_link_remaps(void) { __rollback_link_remaps(false); }
static int create_link_remap(char *path, int len, int lfd,
u32 *idp, struct ns_id *nsid)
{
char link_name[PATH_MAX], *tmp;
RegFileEntry rfe = REG_FILE_ENTRY__INIT;
FownEntry fwn = FOWN_ENTRY__INIT;
struct link_remap_rlb *rlb;
int mntns_root;
if (!opts.link_remap_ok) {
pr_err("Can't create link remap for %s. "
"Use " LREMAP_PARAM " option.\n", path);
return -1;
}
/*
* Linked remapping -- we create a hard link on a removed file
* in the directory original file used to sit.
*
* Bad news is than we can't easily open lfd's parent dir. Thus
* we have to just generate an absolute path and use it. The linkat
* will fail if we chose the bad one.
*/
link_name[0] = '.';
memcpy(link_name + 1, path, len);
tmp = link_name + len;
while (*tmp != '/') {
BUG_ON(tmp == link_name);
tmp--;
}
fd_id_generate_special(NULL, idp);
rfe.id = *idp;
rfe.flags = 0;
rfe.pos = 0;
rfe.fown = &fwn;
rfe.name = link_name + 1;
/* Any 'unique' name works here actually. Remap works by reg-file ids. */
snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name - 1), "link_remap.%d", rfe.id);
mntns_root = mntns_get_root_fd(nsid);
if (linkat(lfd, "", mntns_root, link_name, AT_EMPTY_PATH) < 0) {
pr_perror("Can't link remap to %s", path);
return -1;
}
/*
* Remember the name to delete it if needed on error or
* rollback action. Note we don't expect that there will
* be a HUGE number of link remaps, so in a sake of speed
* we keep all data in memory.
*/
rlb = xmalloc(sizeof(*rlb));
if (!rlb)
goto err1;
rlb->path = strdup(link_name);
if (!rlb->path)
goto err2;
rlb->mnt_ns = nsid;
list_add(&rlb->list, &link_remaps);
return pb_write_one(img_from_set(glob_imgset, CR_FD_REG_FILES), &rfe, PB_REG_FILE);
err2:
xfree(rlb);
err1:
pr_perror("Can't register rollback for %s", path);
return -1;
}
static int dump_linked_remap(char *path, int len, const struct stat *ost,
int lfd, u32 id, struct ns_id *nsid)
{
u32 lid;
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
if (create_link_remap(path, len, lfd, &lid, nsid))
return -1;
rpe.orig_id = id;
rpe.remap_id = lid;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static int have_seen_dead_pid(pid_t pid)
{
static pid_t *dead_pids = NULL;
static int n_dead_pids = 0;
size_t i;
for (i = 0; i < n_dead_pids; i++) {
if (dead_pids[i] == pid)
return 1;
}
if (xrealloc_safe(&dead_pids, sizeof(*dead_pids) * (n_dead_pids + 1)))
return -1;
dead_pids[n_dead_pids++] = pid;
return 0;
}
static int dump_dead_process_remap(pid_t pid, char *path, int len, const struct stat *ost,
int lfd, u32 id, struct ns_id *nsid)
{
RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT;
int ret;
ret = have_seen_dead_pid(pid);
if (ret < 0)
return -1;
if (ret) {
pr_info("Found dead pid %d already, skipping remap\n", pid);
return 0;
}
rpe.orig_id = id;
rpe.remap_id = pid;
rpe.has_remap_type = true;
rpe.remap_type = REMAP_TYPE__PROCFS;
return pb_write_one(img_from_set(glob_imgset, CR_FD_REMAP_FPATH),
&rpe, PB_REMAP_FPATH);
}
static bool is_sillyrename_name(char *name)
{
int i;
name = strrchr(name, '/');
BUG_ON(name == NULL); /* see check in dump_one_reg_file */
name++;
/*
* Strictly speaking this check is not bullet-proof. User
* can create file with this name by hands and we have no
* API to distinguish really-silly-renamed files from those
* fake names :(
*
* But since NFS people expect .nfsXXX files to be unstable,
* we treat them as such too.
*/
if (strncmp(name, SILLYNAME_PREF, sizeof(SILLYNAME_PREF) - 1))
return false;
name += sizeof(SILLYNAME_PREF) - 1;
for (i = 0; i < SILLYNAME_SUFF_LEN; i++)
if (!isxdigit(name[i]))
return false;
return true;
}
static inline bool nfs_silly_rename(char *rpath, const struct fd_parms *parms)
{
return (parms->fs_type == NFS_SUPER_MAGIC) && is_sillyrename_name(rpath);
}
int strip_deleted(struct fd_link *link)
{
struct dcache_prepends {
const char *str;
size_t len;
} static const prepends[] = {
{
.str = " (deleted)",
.len = 10,
}, {
.str = "//deleted",
.len = 9,
}
};
size_t i;
for (i = 0; i < ARRAY_SIZE(prepends); i++) {
size_t at;
if (link->len <= prepends[i].len)
continue;
at = link->len - prepends[i].len;
if (!strcmp(&link->name[at], prepends[i].str)) {
pr_debug("Stip %s' tag from '%s'\n",
prepends[i].str, link->name);
link->name[at] = '\0';
link->len -= prepends[i].len;
return 1;
}
}
return 0;
}
static int check_path_remap(struct fd_link *link, const struct fd_parms *parms,
int lfd, u32 id, struct ns_id *nsid)
{
char *rpath = link->name;
int plen = link->len;
int ret, mntns_root;
struct stat pst;
const struct stat *ost = &parms->stat;
if (parms->fs_type == PROC_SUPER_MAGIC) {
/* The file points to /proc/pid/<foo> where pid is a dead
* process. We remap this file by adding this pid to be
* fork()ed into a TASK_HELPER state so that we can point to it
* on restore.
*/
pid_t pid;
char *start, *end;
/* skip "./proc/" */
start = strstr(rpath, "/") + 1;
if (!start)
return -1;
start = strstr(start, "/") + 1;
if (!start)
return -1;
pid = strtol(start, &end, 10);
/* if we didn't find another /, this path something
* like ./proc/kmsg, which we shouldn't mess with. */
if (*end == '/') {
*end = 0;
ret = access(rpath, F_OK);
*end = '/';
if (ret) {
pr_info("Dumping dead process remap of %d\n", pid);
return dump_dead_process_remap(pid, rpath + 1, plen - 1, ost, lfd, id, nsid);
}
}
return 0;
} else if (parms->fs_type == DEVPTS_SUPER_MAGIC) {
/*
* It's safe to call stripping here because
* file paths are having predefined format for
* this FS and can't have a valid " (deleted)"
* postfix as a part of not deleted filename.
*/
strip_deleted(link);
/*
* Devpts devices/files are generated by the
* kernel itself so we should not try to generate
* any kind of ghost files here even if file is
* no longer exist.
*/
return 0;
}
if (ost->st_nlink == 0) {
/*
* Unpleasant, but easy case. File is completely invisible
* from the FS. Just dump its contents and that's it. But
* be careful whether anybody still has any of its hardlinks
* also open.
*/
strip_deleted(link);
return dump_ghost_remap(rpath + 1, ost, lfd, id, nsid);
}
if (nfs_silly_rename(rpath, parms)) {
/*
* If this is NFS silly-rename file the path we have at hands
* will be accessible by fstat(), but once we kill the dumping
* tasks it will disappear. So we just go ahead an dump it as
* linked-remap file (NFS will allow us to create more hard
* links on it) to have some persistent name at hands.
*/
pr_debug("Dump silly-rename linked remap for %x\n", id);
return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid);
}
mntns_root = mntns_get_root_fd(nsid);
if (mntns_root < 0)
return -1;
ret = fstatat(mntns_root, rpath, &pst, 0);
if (ret < 0) {
/*
* Linked file, but path is not accessible (unless any
* other error occurred). We can create a temporary link to it
* uning linkat with AT_EMPTY_PATH flag and remap it to this
* name.
*/
if (errno == ENOENT)
return dump_linked_remap(rpath + 1, plen - 1,
ost, lfd, id, nsid);
pr_perror("Can't stat path");
return -1;
}
if ((pst.st_ino != ost->st_ino) || (pst.st_dev != ost->st_dev)) {
if (opts.evasive_devices &&
(S_ISCHR(ost->st_mode) || S_ISBLK(ost->st_mode)) &&
pst.st_rdev == ost->st_rdev)
return 0;
/*
* FIXME linked file, but the name we see it by is reused
* by somebody else. We can dump it with linked remaps, but
* we'll have difficulties on restore -- we will have to
* move the exisint file aside, then restore this one,
* unlink, then move the original file back. It's fairly
* easy to do, but we don't do it now, since unlinked files
* have the "(deleted)" suffix in proc and name conflict
* is unlikely :)
*/
pr_err("Unaccessible path opened %u:%u, need %u:%u\n",
(int)pst.st_dev, (int)pst.st_ino,
(int)ost->st_dev, (int)ost->st_ino);
return -1;
}
/*
* File is linked and visible by the name it is opened by
* this task. Go ahead and dump it.
*/
return 0;
}
static bool should_check_size(int flags)
{
/* Skip size if file has O_APPEND and O_WRONLY flags (e.g. log file). */
if (((flags & O_ACCMODE) == O_WRONLY) &&
(flags & O_APPEND))
return false;
return true;
}
int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
{
struct fd_link _link, *link;
struct ns_id *nsid;
struct cr_img *rimg;
RegFileEntry rfe = REG_FILE_ENTRY__INIT;
if (!p->link) {
if (fill_fdlink(lfd, p, &_link))
return -1;
link = &_link;
} else
link = p->link;
nsid = lookup_nsid_by_mnt_id(p->mnt_id);
if (nsid == NULL) {
pr_err("Can't lookup mount=%d for fd=%d path=%s\n",
p->mnt_id, p->fd, link->name + 1);
return -1;
}
if (p->mnt_id >= 0 && (root_ns_mask & CLONE_NEWNS)) {
rfe.mnt_id = p->mnt_id;
rfe.has_mnt_id = true;
}
pr_info("Dumping path for %d fd via self %d [%s]\n",
p->fd, lfd, &link->name[1]);
/*
* The regular path we can handle should start with slash.
*/
if (link->name[1] != '/') {
pr_err("The path [%s] is not supported\n", &link->name[1]);
return -1;
}
if (check_path_remap(link, p, lfd, id, nsid))
return -1;
rfe.id = id;
rfe.flags = p->flags;
rfe.pos = p->pos;
rfe.fown = (FownEntry *)&p->fown;
rfe.name = &link->name[1];
if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags)) {
rfe.has_size = true;
rfe.size = p->stat.st_size;
}
rimg = img_from_set(glob_imgset, CR_FD_REG_FILES);
return pb_write_one(rimg, &rfe, PB_REG_FILE);
}
const struct fdtype_ops regfile_dump_ops = {
.type = FD_TYPES__REG,
.dump = dump_one_reg_file,
};
static void convert_path_from_another_mp(char *src, char *dst, int dlen,
struct mount_info *smi,
struct mount_info *dmi)
{
int off;
/*
* mi->mountpoint ./foo/bar
* mi->ns_mountpoint /foo/bar
* rfi->path foo/bar/baz
*/
off = strlen(smi->ns_mountpoint + 1);
BUG_ON(strlen(smi->root) < strlen(dmi->root));
/*
* Create paths relative to this mount.
* Absolute path to the mount point + difference between source
* and destination roots + path relative to the mountpoint.
*/
snprintf(dst, dlen, "%s/%s/%s",
dmi->ns_mountpoint + 1,
smi->root + strlen(dmi->root),
src + off);
}
static int linkat_hard(int odir, char *opath, int ndir, char *npath, uid_t owner)
{
int ret, old_fsuid = -1;
if (root_ns_mask & CLONE_NEWUSER)
/*
* Kernel has strange secutiry restrictions about
* linkat. If the fsuid of the caller doesn't equals
* the uid of the file and the file is not "safe"
* one, then only global CAP_CHOWN will be allowed
* to link().
*
* Next, when we're in user namespace we're ns root,
* but not global CAP_CHOWN. Thus, even though we
* ARE ns root, we will not be allowed to link() at
* files that belong to regular users %)
*
* Fortunately, the setfsuid() requires ns-level
* CAP_SETUID which we have.
*/
old_fsuid = setfsuid(owner);
ret = linkat(odir, opath, ndir, npath, 0);
if (ret < 0)
pr_perror("Can't link %s -> %s", opath, npath);
if (root_ns_mask & CLONE_NEWUSER) {
setfsuid(old_fsuid);
if (setfsuid(-1) != old_fsuid)
pr_warn("Failed to restore old fsuid!\n");
/*
* Don't fail here. We still have chances to run till
* the pie/restorer, and if _this_ guy fails to set
* the proper fsuid, then we'll abort the restore.
*/
}
return ret;
}
/*
* This routine properly resolves d's path handling ghost/link-remaps.
* The open_cb is a routine that does actual open, it differs for
* files, directories, fifos, etc.
*/
static int rfi_remap(struct reg_file_info *rfi)
{
struct mount_info *mi, *rmi, *tmi;
char _path[PATH_MAX], *path = _path;
char _rpath[PATH_MAX], *rpath = _rpath;
int mntns_root;
if (rfi->rfe->mnt_id == -1) {
/* Know nothing about mountpoints */
mntns_root = mntns_get_root_by_mnt_id(-1);
path = rfi->path;
rpath = rfi->remap->path;
goto out_root;
}
mi = lookup_mnt_id(rfi->rfe->mnt_id);
if (rfi->rfe->mnt_id == rfi->remap->mnt_id) {
/* Both links on the same mount point */
tmi = mi;
path = rfi->path;
rpath = rfi->remap->path;
goto out;
}
rmi = lookup_mnt_id(rfi->remap->mnt_id);
/*
* Find the common bind-mount. We know that one mount point was
* really mounted and all other were bind-mounted from it, so the
* lowest mount must contains all bind-mounts.
*/
for (tmi = mi; tmi->bind; tmi = tmi->bind)
;
BUG_ON(tmi->s_dev != rmi->s_dev);
BUG_ON(tmi->s_dev != mi->s_dev);
/* Calcalate paths on the device (root mount) */
convert_path_from_another_mp(rfi->path, path, sizeof(_path), mi, tmi);
convert_path_from_another_mp(rfi->remap->path, rpath, sizeof(_rpath), rmi, tmi);
out:
pr_debug("%d: Link %s -> %s\n", tmi->mnt_id, rpath, path);
mntns_root = mntns_get_root_fd(tmi->nsid);
out_root:
return linkat_hard(mntns_root, rpath, mntns_root, path, rfi->remap->owner);
}
int open_path(struct file_desc *d,
int(*open_cb)(int mntns_root, struct reg_file_info *, void *), void *arg)
{
struct reg_file_info *rfi;
int tmp, mntns_root;
char *orig_path = NULL;
if (inherited_fd(d, &tmp))
return tmp;
Add inherit fd support There are cases where a process's file descriptor cannot be restored from the checkpoint images. For example, a pipe file descriptor with one end in the checkpointed process and the other end in a separate process (that was not part of the checkpointed process tree) cannot be restored because after checkpoint the pipe will be broken. There are also cases where the user wants to use a new file during restore instead of the original file at checkpoint time. For example, the user wants to change the log file of a process from /path/to/oldlog to /path/to/newlog. In these cases, criu's caller should set up a new file descriptor to be inherited by the restored process and specify the file descriptor with the --inherit-fd command line option. The argument of --inherit-fd has the format fd[%d]:%s, where %d tells criu which of its own file descriptors to use for restoring the file identified by %s. As a debugging aid, if the argument has the format debug[%d]:%s, it tells criu to write out the string after colon to the file descriptor %d. This can be used, for example, as an easy way to leave a "restore marker" in the output stream of the process. It's important to note that inherit fd support breaks applications that depend on the state of the file descriptor being inherited. So, consider inherit fd only for specific use cases that you know for sure won't break the application. For examples please visit http://criu.org/Category:HOWTO. v2: Added a check in send_fd_to_self() to avoid closing an inherit fd. Also, as an extra measure of caution, added checks in the inherit fd look up functions to make sure that the inherit fd hasn't been reused. The patch also includes minor cosmetic changes. Signed-off-by: Saied Kazemi <saied@google.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2014-12-09 23:20:00 +03:00
rfi = container_of(d, struct reg_file_info, d);
if (rfi->remap) {
mutex_lock(ghost_file_mutex);
if (rfi->remap->is_dir) {
/*
* FIXME Can't make directory under new name.
* Will have to open it under the ghost one :(
*/
orig_path = rfi->path;
rfi->path = rfi->remap->path;
} else if (rfi_remap(rfi) < 0) {
static char tmp_path[PATH_MAX];
if (errno != EEXIST) {
pr_err("Can't link %s -> %s", rfi->path,
rfi->remap->path);
return -1;
}
/*
* The file whose name we're trying to create
* exists. Need to pick some other one, we're
* going to remove it anyway.
*
* Strictly speaking, this is cheating, file
* name shouldn't change. But since NFS with
* its silly-rename doesn't care, why should we?
*/
orig_path = rfi->path;
rfi->path = tmp_path;
snprintf(tmp_path, sizeof(tmp_path), "%s.cr_link", orig_path);
pr_debug("Fake %s -> %s link\n", rfi->path, rfi->remap->path);
if (rfi_remap(rfi) < 0) {
pr_perror("Can't create even fake link!");
return -1;
}
}
}
mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
tmp = open_cb(mntns_root, rfi, arg);
if (tmp < 0) {
pr_perror("Can't open file %s", rfi->path);
return -1;
}
if (rfi->rfe->has_size && !rfi->size_checked) {
struct stat st;
if (fstat(tmp, &st) < 0) {
pr_perror("Can't fstat opened file");
return -1;
}
if (st.st_size != rfi->rfe->size) {
pr_err("File %s has bad size %"PRIu64" (expect %"PRIu64")\n",
rfi->path, st.st_size,
rfi->rfe->size);
return -1;
}
/*
* This is only visible in the current process, so
* change w/o locks. Other tasks sharing the same
* file will get one via unix sockets.
*/
rfi->size_checked = true;
}
if (rfi->remap) {
if (!rfi->remap->is_dir) {
unlinkat(mntns_root, rfi->path, 0);
}
BUG_ON(!rfi->remap->users);
if (--rfi->remap->users == 0) {
pr_info("Unlink the ghost %s\n", rfi->remap->path);
mntns_root = mntns_get_root_by_mnt_id(rfi->remap->mnt_id);
unlinkat(mntns_root, rfi->remap->path, rfi->remap->is_dir ? AT_REMOVEDIR : 0);
}
if (orig_path)
rfi->path = orig_path;
mutex_unlock(ghost_file_mutex);
}
if (restore_fown(tmp, rfi->rfe->fown))
return -1;
return tmp;
}
int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
u32 flags = *(u32 *)arg;
int fd;
fd = openat(ns_root_fd, rfi->path, flags);
if (fd < 0) {
pr_perror("Can't open file %s on restore", rfi->path);
return fd;
}
return fd;
}
static int do_open_reg_noseek(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
return do_open_reg_noseek_flags(ns_root_fd, rfi, &rfi->rfe->flags);
}
static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg)
{
int fd;
fd = do_open_reg_noseek(ns_root_fd, rfi, arg);
if (fd < 0)
return fd;
if ((rfi->rfe->pos != -1ULL) &&
lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) {
pr_perror("Can't restore file pos");
close(fd);
return -1;
}
return fd;
}
int open_reg_fd(struct file_desc *fd)
{
return open_path(fd, do_open_reg_noseek, NULL);
}
int open_reg_by_id(u32 id)
{
struct file_desc *fd;
/*
* This one gets called by exe link, chroot and cwd
* restoring code. No need in calling lseek on either
* of them.
*/
fd = find_file_desc_raw(FD_TYPES__REG, id);
if (fd == NULL) {
pr_err("Can't find regfile for %#x\n", id);
return -1;
}
return open_reg_fd(fd);
}
int get_filemap_fd(struct vma_area *vma)
{
u32 flags;
/*
* Thevma->fd should have been assigned in collect_filemap
*
* We open file w/o lseek, as mappings don't care about it
*/
BUG_ON(vma->vmfd == NULL);
if (vma->e->has_fdflags)
flags = vma->e->fdflags;
else if ((vma->e->prot & PROT_WRITE) &&
vma_area_is(vma, VMA_FILE_SHARED))
flags = O_RDWR;
else
flags = O_RDONLY;
return open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
}
static void remap_get(struct file_desc *fdesc, char typ)
{
struct reg_file_info *rfi;
rfi = container_of(fdesc, struct reg_file_info, d);
if (rfi->remap) {
pr_debug("One more remap user (%c) for %s\n",
typ, rfi->remap->path);
/* No lock, we're still sngle-process here */
rfi->remap->users++;
}
}
static void collect_reg_fd(struct file_desc *fdesc,
struct fdinfo_list_entry *fle, struct rst_info *ri)
{
if (list_empty(&fdesc->fd_info_head))
remap_get(fdesc, 'f');
collect_gen_fd(fle, ri);
}
static int open_fe_fd(struct file_desc *fd)
{
return open_path(fd, do_open_reg, NULL);
}
static char *reg_file_path(struct file_desc *d, char *buf, size_t s)
{
struct reg_file_info *rfi;
rfi = container_of(d, struct reg_file_info, d);
return rfi->path;
}
static struct file_desc_ops reg_desc_ops = {
.type = FD_TYPES__REG,
.open = open_fe_fd,
.collect_fd = collect_reg_fd,
.name = reg_file_path,
};
struct file_desc *try_collect_special_file(u32 id, int optional)
{
struct file_desc *fdesc;
/*
* Files dumped for vmas/exe links can have remaps
* configured. Need to bump-up users for them, otherwise
* the open_path() would unlink the remap file after
* the very first open.
*/
fdesc = find_file_desc_raw(FD_TYPES__REG, id);
if (fdesc == NULL) {
if (!optional)
pr_err("No entry for reg-file-ID %#x\n", id);
return NULL;
}
remap_get(fdesc, 's');
return fdesc;
}
static int collect_one_regfile(void *o, ProtobufCMessage *base)
{
struct reg_file_info *rfi = o;
static char dot[] = ".";
rfi->rfe = pb_msg(base, RegFileEntry);
/* change "/foo" into "foo" and "/" into "." */
if (rfi->rfe->name[1] == '\0')
rfi->path = dot;
else
rfi->path = rfi->rfe->name + 1;
rfi->remap = NULL;
rfi->size_checked = false;
pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe->id);
return file_desc_add(&rfi->d, rfi->rfe->id, &reg_desc_ops);
}
struct collect_image_info reg_file_cinfo = {
.fd_type = CR_FD_REG_FILES,
.pb_type = PB_REG_FILE,
.priv_size = sizeof(struct reg_file_info),
.collect = collect_one_regfile,
};
int prepare_shared_reg_files(void)
{
ghost_file_mutex = shmalloc(sizeof(*ghost_file_mutex));
if (!ghost_file_mutex)
return -1;
mutex_init(ghost_file_mutex);
return 0;
}