2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 01:51:51 +00:00
criu/irmap.c
Pavel Emelyanov f7f76d6ba6 img: Introduce empty images
When an image of a certian type is not found, CRIU sometimes
fails, sometimes ignores this fact. I propose to ignore this
fact always and treat absent images and those containing no
objects inside (i.e. -- empty). If the latter code flow will
_need_ objects, then criu will fail later.

Why object will be explicitly required? For example, due to
restoring code reading the image with pb_read_one, w/o the
_eof suffix thus required the object to be in the image.

Another example is objects dependencies. E.g. fdinfo objects
require various files objects. So missing image files will
result in non-resolved searches later.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2015-03-13 14:42:54 +03:00

456 lines
8.6 KiB
C

/*
* IRMAP -- inode reverse mapping.
*
* Helps us to map inode number (and device) back to path
* so that we can restore inotify/fanotify-s.
*
* Scanning _is_ slow, so we limit it with hints, which are
* heurisitical known places where notifies are typically put.
*/
#include <stdbool.h>
#include <fcntl.h>
#include <dirent.h>
#include <string.h>
#include <stdio.h>
#include <sys/stat.h>
#include <unistd.h>
#include "xmalloc.h"
#include "irmap.h"
#include "mount.h"
#include "log.h"
#include "util.h"
#include "image.h"
#include "stats.h"
#include "pstree.h"
#include "protobuf.h"
#include "protobuf/fsnotify.pb-c.h"
#include "protobuf/fh.pb-c.h"
#undef LOG_PREFIX
#define LOG_PREFIX "irmap: "
#define IRMAP_CACHE_BITS 5
#define IRMAP_CACHE_SIZE (1 << IRMAP_CACHE_BITS)
#define IRMAP_CACHE_MASK (IRMAP_CACHE_SIZE - 1)
static inline int irmap_hashfn(unsigned int s_dev, unsigned long i_ino)
{
return (s_dev + i_ino) & IRMAP_CACHE_MASK;
}
struct irmap {
unsigned int dev;
unsigned long ino;
char *path;
struct irmap *next;
bool revalidate;
int nr_kids;
struct irmap *kids;
};
static struct irmap *cache[IRMAP_CACHE_SIZE];
static struct irmap hints[] = {
{ .path = "/etc", .nr_kids = -1, },
{ .path = "/var/spool", .nr_kids = -1, },
{ .path = "/lib/udev", .nr_kids = -1, },
{ .path = "/.", .nr_kids = 0, },
{ .path = "/no-such-path", .nr_kids = -1, },
{ },
};
/*
* Update inode (and device) number and cache the entry
*/
static int irmap_update_stat(struct irmap *i)
{
struct stat st;
int mntns_root;
unsigned hv;
if (i->ino)
return 0;
mntns_root = get_service_fd(ROOT_FD_OFF);
pr_debug("Refresh stat for %s\n", i->path);
if (fstatat(mntns_root, i->path + 1, &st, AT_SYMLINK_NOFOLLOW)) {
pr_perror("Can't stat %s", i->path);
return -1;
}
i->revalidate = false;
i->dev = st.st_dev;
i->ino = st.st_ino;
if (!S_ISDIR(st.st_mode))
i->nr_kids = 0; /* don't irmap_update_dir */
hv = irmap_hashfn(i->dev, i->ino);
i->next = cache[hv];
cache[hv] = i;
return 0;
}
/*
* Update list of children, but don't cache any. Later
* we'll scan them one-by-one and cache.
*/
static int irmap_update_dir(struct irmap *t)
{
int fd, nr = 0, dlen, mntns_root;
DIR *dfd;
struct dirent *de;
if (t->nr_kids >= 0)
return 0;
mntns_root = get_service_fd(ROOT_FD_OFF);
pr_debug("Refilling %s dir\n", t->path);
fd = openat(mntns_root, t->path + 1, O_RDONLY);
if (fd < 0) {
pr_perror("Can't open %s", t->path);
return -1;
}
dlen = strlen(t->path);
dfd = fdopendir(fd);
if (!dfd) {
pr_perror("Can't opendir %s", t->path);
return -1;
}
errno = 0;
while ((de = readdir(dfd)) != NULL) {
struct irmap *k;
if (dir_dots(de))
continue;
nr++;
if (xrealloc_safe(&t->kids, nr * sizeof(struct irmap)))
goto out_err;
k = &t->kids[nr - 1];
k->kids = NULL; /* for xrealloc above */
k->ino = 0; /* for irmap_update_stat */
k->nr_kids = -1; /* for irmap_update_dir */
k->path = xmalloc(dlen + strlen(de->d_name) + 2);
if (!k->path)
goto out_err;
sprintf(k->path, "%s/%s", t->path, de->d_name);
}
if (errno) {
pr_perror("Readdir failed");
goto out_err;
}
closedir(dfd);
close(fd);
t->nr_kids = nr;
return 0;
out_err:
xfree(t->kids);
closedir(dfd);
close(fd);
return -1;
}
static struct irmap *irmap_scan(struct irmap *t, unsigned int dev, unsigned long ino)
{
struct irmap *c;
int i;
if (irmap_update_stat(t))
return NULL;
if (t->dev == dev && t->ino == ino)
return t;
if (irmap_update_dir(t))
return NULL;
for (i = 0; i < t->nr_kids; i++) {
c = irmap_scan(&t->kids[i], dev, ino);
if (c)
return c;
}
return NULL;
}
static int irmap_revalidate(struct irmap *c, struct irmap **p)
{
struct stat st;
int mntns_root;
mntns_root = get_service_fd(ROOT_FD_OFF);
pr_debug("Revalidate stat for %s\n", c->path);
if (fstatat(mntns_root, c->path + 1, &st, AT_SYMLINK_NOFOLLOW)) {
/* File can be (re)moved, so just treat it as invalid */
pr_perror("Can't stat %s", c->path);
goto invalid;
}
if (c->dev != st.st_dev)
goto invalid;
if (c->ino != st.st_ino)
goto invalid;
c->revalidate = false;
return 0;
invalid:
pr_debug("\t%x:%lx is invalid\n", c->dev, c->ino);
*p = c->next;
xfree(c->path);
xfree(c);
return 1;
}
static bool doing_predump = false;
char *irmap_lookup(unsigned int s_dev, unsigned long i_ino)
{
struct irmap *c, *h, **p;
char *path = NULL;
int hv;
s_dev = kdev_to_odev(s_dev);
pr_debug("Resolving %x:%lx path\n", s_dev, i_ino);
/*
* If we're in predump, then processes already run
* and the root_item is already freed by that time.
* But the root service fd is already set by the
* irmap_predump_prep, so we just go ahead and scan.
*/
if (!doing_predump &&
__mntns_get_root_fd(root_item->pid.real) < 0)
goto out;
timing_start(TIME_IRMAP_RESOLVE);
hv = irmap_hashfn(s_dev, i_ino);
for (p = &cache[hv]; *p; p = &(*p)->next) {
c = *p;
if (!(c->dev == s_dev && c->ino == i_ino))
continue;
if (c->revalidate && irmap_revalidate(c, p))
continue;
pr_debug("\tFound %s in cache\n", c->path);
path = c->path;
goto out;
}
for (h = hints; h->path; h++) {
pr_debug("Scanning %s hint\n", h->path);
c = irmap_scan(h, s_dev, i_ino);
if (c) {
pr_debug("\tScanned %s\n", c->path);
path = c->path;
goto out;
}
}
out:
timing_stop(TIME_IRMAP_RESOLVE);
return path;
}
/*
* IRMAP pre-cache -- do early irmap scan on pre-dump to reduce
* the freeze time on dump
*/
struct irmap_predump {
unsigned int dev;
unsigned long ino;
FhEntry fh;
struct irmap_predump *next;
};
static struct irmap_predump *predump_queue;
int irmap_queue_cache(unsigned int dev, unsigned long ino,
FhEntry *fh)
{
struct irmap_predump *ip;
ip = xmalloc(sizeof(*ip));
if (!ip)
return -1;
ip->dev = dev;
ip->ino = ino;
ip->fh = *fh;
fh->handle = NULL; /* don't free in free_fhandle */
pr_debug("Queue %x:%lx for pre-dump\n", dev, ino);
ip->next = predump_queue;
predump_queue = ip;
return 0;
}
int irmap_predump_prep(void)
{
/*
* Tasks are about to get released soon, but
* we'll need to do FS scan for irmaps. In this
* scan we will need to know the root dir tasks
* live in. Need to make sure the respective fd
* (service) is set to that root, so that the
* scan works and doesn't race with the tasks
* dying or changind root.
*/
doing_predump = true;
return __mntns_get_root_fd(root_item->pid.real) < 0 ? -1 : 0;
}
int irmap_predump_run(void)
{
int ret = 0;
struct cr_img *img;
struct irmap_predump *ip;
img = open_image_at(AT_FDCWD, CR_FD_IRMAP_CACHE, O_DUMP);
if (!img)
return -1;
pr_info("Running irmap pre-dump\n");
for (ip = predump_queue; ip; ip = ip->next) {
pr_debug("\tchecking %x:%lx\n", ip->dev, ip->ino);
ret = check_open_handle(ip->dev, ip->ino, &ip->fh);
if (ret) {
pr_err("Failed to resolve %x:%lx\n", ip->dev, ip->ino);
break;
}
if (ip->fh.path) {
IrmapCacheEntry ic = IRMAP_CACHE_ENTRY__INIT;
pr_info("Irmap cache %x:%lx -> %s\n", ip->dev, ip->ino, ip->fh.path);
ic.dev = ip->dev;
ic.inode = ip->ino;
ic.path = ip->fh.path;
ret = pb_write_one(img, &ic, PB_IRMAP_CACHE);
if (ret)
break;
}
}
close_image(img);
return ret;
}
static int irmap_cache_one(IrmapCacheEntry *ie)
{
struct irmap *ic;
unsigned hv;
ic = xmalloc(sizeof(*ic));
if (!ic)
return -1;
ic->dev = ie->dev;
ic->ino = ie->inode;
ic->path = xstrdup(ie->path);
if (!ie->path) {
xfree(ic);
return -1;
}
ic->nr_kids = 0;
/*
* We've loaded entry from cache, thus we'll need to check
* whether it's still valid when find it in cache.
*/
ic->revalidate = true;
pr_debug("Pre-cache %x:%lx -> %s\n", ic->dev, ic->ino, ic->path);
hv = irmap_hashfn(ic->dev, ic->ino);
ic->next = cache[hv];
cache[hv] = ic;
return 0;
}
static int open_irmap_cache(struct cr_img **img)
{
int dir = AT_FDCWD;
pr_info("Searching irmap cache in work dir\n");
in:
*img = open_image_at(dir, CR_FD_IRMAP_CACHE, O_RSTR);
if (dir != AT_FDCWD)
close(dir);
if (empty_image(*img)) {
close_image(*img);
if (dir == AT_FDCWD) {
pr_info("Searching irmap cache in parent\n");
dir = openat(get_service_fd(IMG_FD_OFF),
CR_PARENT_LINK, O_RDONLY);
if (dir >= 0)
goto in;
if (errno != ENOENT)
return -1;
}
pr_info("No irmap cache\n");
return 0;
}
if (!*img)
return -1;
pr_info("... done\n");
return 1;
}
int irmap_load_cache(void)
{
int ret;
struct cr_img *img;
ret = open_irmap_cache(&img);
if (ret <= 0)
return ret;
pr_info("Loading irmap cache\n");
while (1) {
IrmapCacheEntry *ic;
ret = pb_read_one_eof(img, &ic, PB_IRMAP_CACHE);
if (ret <= 0)
break;
ret = irmap_cache_one(ic);
if (ret < 0)
break;
irmap_cache_entry__free_unpacked(ic, NULL);
}
close_image(img);
return ret;
}