mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 09:58:09 +00:00
img-streamer: introduction of criu-image-streamer
This adds the ability to stream images with criu-image-streamer The workflow is the following: 1) criu-image-streamer is started, and starts listening on a UNIX socket. 2) CRIU is started. img_streamer_init() is invoked, which connects to the socket. During dump/restore operations, instead of using local disk to open an image file, img_streamer_open() is called to provide a UNIX pipe that is sent over the UNIX socket. 3) Once the operation is done, img_streamer_finish() is called, and the UNIX socket is disconnected. criu-image-streamer can be found at: https://github.com/checkpoint-restore/criu-image-streamer Signed-off-by: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
This commit is contained in:
parent
51c3f8a908
commit
7d79a58f4d
@ -76,6 +76,11 @@ The following levels are available:
|
||||
*-D*, *--images-dir* 'path'::
|
||||
Use 'path' as a base directory where to look for sets of image files.
|
||||
|
||||
*--stream*::
|
||||
dump/restore images using criu-image-streamer.
|
||||
See https://github.com/checkpoint-restore/criu-image-streamer for detailed
|
||||
usage.
|
||||
|
||||
*--prev-images-dir* 'path'::
|
||||
Use 'path' as a parent directory where to look for sets of image files.
|
||||
This option makes sense in case of incremental dumps.
|
||||
|
@ -29,6 +29,7 @@ obj-y += files-reg.o
|
||||
obj-y += fsnotify.o
|
||||
obj-y += image-desc.o
|
||||
obj-y += image.o
|
||||
obj-y += img-streamer.o
|
||||
obj-y += ipc_ns.o
|
||||
obj-y += irmap.o
|
||||
obj-y += kcmp-ids.o
|
||||
|
@ -510,6 +510,7 @@ int parse_options(int argc, char **argv, bool *usage_error,
|
||||
BOOL_OPT(SK_CLOSE_PARAM, &opts.tcp_close),
|
||||
{ "verbosity", optional_argument, 0, 'v' },
|
||||
{ "ps-socket", required_argument, 0, 1091},
|
||||
BOOL_OPT("stream", &opts.stream),
|
||||
{ "config", required_argument, 0, 1089},
|
||||
{ "no-default-config", no_argument, 0, 1090},
|
||||
{ "tls-cacert", required_argument, 0, 1092},
|
||||
|
@ -82,6 +82,7 @@
|
||||
#include "eventpoll.h"
|
||||
#include "memfd.h"
|
||||
#include "timens.h"
|
||||
#include "img-streamer.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@ -1759,6 +1760,7 @@ static int cr_dump_finish(int ret)
|
||||
free_userns_maps();
|
||||
|
||||
close_service_fd(CR_PROC_FD_OFF);
|
||||
close_image_dir();
|
||||
|
||||
if (ret) {
|
||||
pr_err("Dumping FAILED.\n");
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "cr_options.h"
|
||||
#include "servicefd.h"
|
||||
#include "image.h"
|
||||
#include "img-streamer.h"
|
||||
#include "util.h"
|
||||
#include "util-pie.h"
|
||||
#include "criu-log.h"
|
||||
@ -2355,6 +2356,9 @@ skip_ns_bouncing:
|
||||
pr_info("Restore finished successfully. Tasks resumed.\n");
|
||||
write_stats(RESTORE_STATS);
|
||||
|
||||
/* This has the effect of dismissing the image streamer */
|
||||
close_image_dir();
|
||||
|
||||
ret = run_scripts(ACT_POST_RESUME);
|
||||
if (ret != 0)
|
||||
pr_err("Post-resume script ret code %d\n", ret);
|
||||
|
@ -343,7 +343,14 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
||||
if (req->parent_img)
|
||||
SET_CHAR_OPTS(img_parent, req->parent_img);
|
||||
|
||||
if (open_image_dir(images_dir_path) < 0) {
|
||||
/*
|
||||
* Image streaming is not supported with CRIU's service feature as
|
||||
* the streamer must be started for each dump/restore operation.
|
||||
* It is unclear how to do that with RPC, so we punt for now.
|
||||
* This explains why we provide the argument mode=-1 instead of
|
||||
* O_RSTR or O_DUMP.
|
||||
*/
|
||||
if (open_image_dir(images_dir_path, -1) < 0) {
|
||||
pr_perror("Can't open images directory");
|
||||
goto err;
|
||||
}
|
||||
|
@ -54,6 +54,20 @@ void flush_early_log_to_stderr(void)
|
||||
flush_early_log_buffer(STDERR_FILENO);
|
||||
}
|
||||
|
||||
static int image_dir_mode(char *argv[], int optind)
|
||||
{
|
||||
if (!strcmp(argv[optind], "dump") ||
|
||||
!strcmp(argv[optind], "pre-dump") ||
|
||||
(!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "dump")))
|
||||
return O_DUMP;
|
||||
|
||||
if (!strcmp(argv[optind], "restore") ||
|
||||
(!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "restore")))
|
||||
return O_RSTR;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[], char *envp[])
|
||||
{
|
||||
int ret = -1;
|
||||
@ -148,13 +162,30 @@ int main(int argc, char *argv[], char *envp[])
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.stream && image_dir_mode(argv, optind) == -1) {
|
||||
pr_err("--stream cannot be used with the %s command\n", argv[optind]);
|
||||
goto usage;
|
||||
}
|
||||
|
||||
/* We must not open imgs dir, if service is called */
|
||||
if (strcmp(argv[optind], "service")) {
|
||||
ret = open_image_dir(opts.imgs_dir);
|
||||
ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind));
|
||||
if (ret < 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The kernel might send us lethal signals when writing to a pipe
|
||||
* which reader has disappeared. We deal with write() failures on our
|
||||
* own, and prefer not to get killed. So we ignore SIGPIPEs.
|
||||
*
|
||||
* Pipes are used in various places:
|
||||
* 1) Receiving application page data
|
||||
* 2) Transmitting data to the image streamer
|
||||
* 3) Emitting logs (potentially to a pipe).
|
||||
*/
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
||||
/*
|
||||
* When a process group becomes an orphan,
|
||||
* its processes are sent a SIGHUP signal
|
||||
@ -322,6 +353,7 @@ usage:
|
||||
" this requires running a second instance of criu\n"
|
||||
" in lazy-pages mode: 'criu lazy-pages -D DIR'\n"
|
||||
" --lazy-pages and lazy-pages mode require userfaultfd\n"
|
||||
" --stream dump/restore images using criu-image-streamer\n"
|
||||
"\n"
|
||||
"* External resources support:\n"
|
||||
" --external RES dump objects from this list as external resources:\n"
|
||||
|
@ -154,7 +154,6 @@ static int trim_last_parent(char *path)
|
||||
|
||||
static int copy_chunk_from_file(int fd, int img, off_t off, size_t len)
|
||||
{
|
||||
char *buf = NULL;
|
||||
int ret;
|
||||
|
||||
while (len > 0) {
|
||||
@ -167,7 +166,6 @@ static int copy_chunk_from_file(int fd, int img, off_t off, size_t len)
|
||||
len -= ret;
|
||||
}
|
||||
|
||||
xfree(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -213,7 +211,6 @@ static int copy_file_to_chunks(int fd, struct cr_img *img, size_t file_size)
|
||||
|
||||
static int copy_chunk_to_file(int img, int fd, off_t off, size_t len)
|
||||
{
|
||||
char *buf = NULL;
|
||||
int ret;
|
||||
|
||||
while (len > 0) {
|
||||
@ -221,7 +218,11 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len)
|
||||
pr_perror("Can't seek file");
|
||||
return -1;
|
||||
}
|
||||
ret = sendfile(fd, img, NULL, len);
|
||||
|
||||
if (opts.stream)
|
||||
ret = splice(img, NULL, fd, NULL, len, SPLICE_F_MOVE);
|
||||
else
|
||||
ret = sendfile(fd, img, NULL, len);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't send data");
|
||||
return -1;
|
||||
@ -231,7 +232,6 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len)
|
||||
len -= ret;
|
||||
}
|
||||
|
||||
xfree(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
32
criu/image.c
32
criu/image.c
@ -17,6 +17,7 @@
|
||||
#include "images/inventory.pb-c.h"
|
||||
#include "images/pagemap.pb-c.h"
|
||||
#include "proc_parse.h"
|
||||
#include "img-streamer.h"
|
||||
#include "namespaces.h"
|
||||
|
||||
bool ns_per_id = false;
|
||||
@ -415,13 +416,16 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
|
||||
|
||||
flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
|
||||
|
||||
/*
|
||||
* For pages images dedup we need to open images read-write on
|
||||
* restore, that may require proper capabilities, so we ask
|
||||
* usernsd to do it for us
|
||||
*/
|
||||
if (root_ns_mask & CLONE_NEWUSER &&
|
||||
type == CR_FD_PAGES && oflags & O_RDWR) {
|
||||
if (opts.stream && !(oflags & O_FORCE_LOCAL)) {
|
||||
ret = img_streamer_open(path, flags);
|
||||
errno = EIO; /* errno value is meaningless, only the ret value is meaningful */
|
||||
} else if (root_ns_mask & CLONE_NEWUSER &&
|
||||
type == CR_FD_PAGES && oflags & O_RDWR) {
|
||||
/*
|
||||
* For pages images dedup we need to open images read-write on
|
||||
* restore, that may require proper capabilities, so we ask
|
||||
* usernsd to do it for us
|
||||
*/
|
||||
struct openat_args pa = {
|
||||
.flags = flags,
|
||||
.err = 0,
|
||||
@ -520,7 +524,12 @@ struct cr_img *img_from_fd(int fd)
|
||||
return img;
|
||||
}
|
||||
|
||||
int open_image_dir(char *dir)
|
||||
/*
|
||||
* `mode` should be O_RSTR or O_DUMP depending on the intent.
|
||||
* This is used when opts.stream is enabled for picking the right streamer
|
||||
* socket name. `mode` is ignored when opts.stream is not enabled.
|
||||
*/
|
||||
int open_image_dir(char *dir, int mode)
|
||||
{
|
||||
int fd, ret;
|
||||
|
||||
@ -535,7 +544,10 @@ int open_image_dir(char *dir)
|
||||
return -1;
|
||||
fd = ret;
|
||||
|
||||
if (opts.img_parent) {
|
||||
if (opts.stream) {
|
||||
if (img_streamer_init(dir, mode) < 0)
|
||||
goto err;
|
||||
} else if (opts.img_parent) {
|
||||
ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
|
||||
if (ret < 0 && errno != EEXIST) {
|
||||
pr_perror("Can't link parent snapshot");
|
||||
@ -556,6 +568,8 @@ err:
|
||||
|
||||
void close_image_dir(void)
|
||||
{
|
||||
if (opts.stream)
|
||||
img_streamer_finish();
|
||||
close_service_fd(IMG_FD_OFF);
|
||||
}
|
||||
|
||||
|
232
criu/img-streamer.c
Normal file
232
criu/img-streamer.c
Normal file
@ -0,0 +1,232 @@
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "cr_options.h"
|
||||
#include "img-streamer.h"
|
||||
#include "image.h"
|
||||
#include "images/img-streamer.pb-c.h"
|
||||
#include "protobuf.h"
|
||||
#include "servicefd.h"
|
||||
#include "rst-malloc.h"
|
||||
#include "common/scm.h"
|
||||
#include "common/lock.h"
|
||||
|
||||
/*
|
||||
* We use different path names for the dump and restore sockets because:
|
||||
* 1) The user may want to perform both at the same time (akin to live
|
||||
* migration). Specifying the same images-dir is convenient.
|
||||
* 2) It fails quickly when the user mix-up the streamer and CRIU operations.
|
||||
* (e.g., streamer is in capture more, while CRIU is in restore mode).
|
||||
*/
|
||||
#define IMG_STREAMER_CAPTURE_SOCKET_NAME "streamer-capture.sock"
|
||||
#define IMG_STREAMER_SERVE_SOCKET_NAME "streamer-serve.sock"
|
||||
|
||||
/* All requests go through the same socket connection. We must synchronize */
|
||||
static mutex_t *img_streamer_fd_lock;
|
||||
|
||||
/* Either O_DUMP or O_RSTR */
|
||||
static int img_streamer_mode;
|
||||
|
||||
static const char *socket_name_for_mode(int mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case O_DUMP: return IMG_STREAMER_CAPTURE_SOCKET_NAME;
|
||||
case O_RSTR: return IMG_STREAMER_SERVE_SOCKET_NAME;
|
||||
default: BUG(); return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* img_streamer_init() connects to the image streamer socket.
|
||||
* mode should be either O_DUMP or O_RSTR.
|
||||
*/
|
||||
int img_streamer_init(const char *image_dir, int mode)
|
||||
{
|
||||
struct sockaddr_un addr;
|
||||
int sockfd;
|
||||
|
||||
img_streamer_mode = mode;
|
||||
|
||||
sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (sockfd < 0) {
|
||||
pr_perror("Unable to instantiate UNIX socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.sun_family = AF_UNIX;
|
||||
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s",
|
||||
image_dir, socket_name_for_mode(mode));
|
||||
|
||||
if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
||||
pr_perror("Unable to connect to image streamer socket: %s", addr.sun_path);
|
||||
goto err;
|
||||
}
|
||||
|
||||
img_streamer_fd_lock = shmalloc(sizeof(*img_streamer_fd_lock));
|
||||
if (!img_streamer_fd_lock) {
|
||||
pr_err("Failed to allocate memory\n");
|
||||
goto err;
|
||||
}
|
||||
mutex_init(img_streamer_fd_lock);
|
||||
|
||||
if (install_service_fd(IMG_STREAMER_FD_OFF, sockfd) < 0)
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
close(sockfd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* img_streamer_finish() indicates that no more files will be opened.
|
||||
* In other words, img_streamer_open() will no longer be called.
|
||||
*/
|
||||
void img_streamer_finish(void)
|
||||
{
|
||||
if (get_service_fd(IMG_STREAMER_FD_OFF) >= 0) {
|
||||
pr_info("Dismissing the image streamer\n");
|
||||
close_service_fd(IMG_STREAMER_FD_OFF);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The regular protobuf APIs pb_write_one() and pb_read_one() operate over a
|
||||
* `struct cr_img` object. Sadly, we don't have such object. We just have a
|
||||
* file descriptor. The following pb_write_one_fd() and pb_read_one_fd()
|
||||
* provide a protobuf API over a file descriptor. The implementation is a bit
|
||||
* of a hack, but should be fine. At some point we can revisit to have a
|
||||
* proper protobuf API over fds.
|
||||
*/
|
||||
static int pb_write_one_fd(int fd, void *obj, int type)
|
||||
{
|
||||
int ret;
|
||||
struct cr_img img;
|
||||
memset(&img, 0, sizeof(img));
|
||||
|
||||
img._x.fd = fd;
|
||||
ret = pb_write_one(&img, obj, type);
|
||||
if (ret < 0)
|
||||
pr_perror("Failed to communicate with the image streamer");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pb_read_one_fd(int fd, void **pobj, int type)
|
||||
{
|
||||
int ret;
|
||||
struct cr_img img;
|
||||
memset(&img, 0, sizeof(img));
|
||||
|
||||
img._x.fd = fd;
|
||||
ret = pb_read_one(&img, pobj, type);
|
||||
if (ret < 0)
|
||||
pr_perror("Failed to communicate with the image streamer");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int send_file_request(char *filename)
|
||||
{
|
||||
ImgStreamerRequestEntry req = IMG_STREAMER_REQUEST_ENTRY__INIT;
|
||||
req.filename = filename;
|
||||
return pb_write_one_fd(get_service_fd(IMG_STREAMER_FD_OFF),
|
||||
&req, PB_IMG_STREAMER_REQUEST);
|
||||
}
|
||||
|
||||
static int recv_file_reply(bool *exists)
|
||||
{
|
||||
ImgStreamerReplyEntry *reply;
|
||||
int ret = pb_read_one_fd(get_service_fd(IMG_STREAMER_FD_OFF),
|
||||
(void **)&reply, PB_IMG_STREAMER_REPLY);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
*exists = reply->exists;
|
||||
free(reply);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Using a pipe for image file transfers allows the data to be spliced by the
|
||||
* image streamer, greatly improving performance.
|
||||
* Transfer rates of up to 15GB/s can be seen with this technique.
|
||||
*/
|
||||
#define READ_PIPE 0 /* index of the read pipe returned by pipe() */
|
||||
#define WRITE_PIPE 1
|
||||
static int establish_streamer_file_pipe(void)
|
||||
{
|
||||
/*
|
||||
* If the other end of the pipe closes, the kernel will want to kill
|
||||
* us with a SIGPIPE. These signal must be ignored, which we do in
|
||||
* crtools.c:main() with signal(SIGPIPE, SIG_IGN).
|
||||
*/
|
||||
int ret = -1;
|
||||
int criu_pipe_direction = img_streamer_mode == O_DUMP ? WRITE_PIPE : READ_PIPE;
|
||||
int streamer_pipe_direction = 1 - criu_pipe_direction;
|
||||
int fds[2];
|
||||
|
||||
if (pipe(fds) < 0) {
|
||||
pr_perror("Unable to create pipe");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (send_fd(get_service_fd(IMG_STREAMER_FD_OFF),
|
||||
NULL, 0, fds[streamer_pipe_direction]) < 0)
|
||||
close(fds[criu_pipe_direction]);
|
||||
else
|
||||
ret = fds[criu_pipe_direction];
|
||||
|
||||
close(fds[streamer_pipe_direction]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int _img_streamer_open(char *filename)
|
||||
{
|
||||
if (send_file_request(filename) < 0)
|
||||
return -1;
|
||||
|
||||
if (img_streamer_mode == O_RSTR) {
|
||||
/* The streamer replies whether the file exists */
|
||||
bool exists;
|
||||
if (recv_file_reply(&exists) < 0)
|
||||
return -1;
|
||||
|
||||
if (!exists)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* When the image streamer encounters a fatal error, it won't report
|
||||
* errors via protobufs. Instead, CRIU will get a broken pipe error
|
||||
* when trying to access a streaming pipe. This behavior is similar to
|
||||
* what would happen if we were connecting criu and * criu-image-streamer
|
||||
* via a shell pipe.
|
||||
*/
|
||||
|
||||
return establish_streamer_file_pipe();
|
||||
}
|
||||
|
||||
/*
|
||||
* Opens an image file via a UNIX pipe with the image streamer.
|
||||
*
|
||||
* Return:
|
||||
* A file descriptor on success
|
||||
* -ENOENT when the file was not found.
|
||||
* -1 on any other error.
|
||||
*/
|
||||
int img_streamer_open(char *filename, int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUG_ON(flags != img_streamer_mode);
|
||||
|
||||
mutex_lock(img_streamer_fd_lock);
|
||||
ret = _img_streamer_open(filename);
|
||||
mutex_unlock(img_streamer_fd_lock);
|
||||
return ret;
|
||||
}
|
@ -143,6 +143,7 @@ struct cr_options {
|
||||
int weak_sysctls;
|
||||
int status_fd;
|
||||
bool orphan_pts_master;
|
||||
int stream;
|
||||
pid_t tree_id;
|
||||
int log_level;
|
||||
char *imgs_dir;
|
||||
|
@ -145,7 +145,7 @@ static inline int img_raw_fd(struct cr_img *img)
|
||||
|
||||
extern off_t img_raw_size(struct cr_img *img);
|
||||
|
||||
extern int open_image_dir(char *dir);
|
||||
extern int open_image_dir(char *dir, int mode);
|
||||
extern void close_image_dir(void);
|
||||
|
||||
extern struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...);
|
||||
|
8
criu/include/img-streamer.h
Normal file
8
criu/include/img-streamer.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef IMAGE_STREAMER_H
|
||||
#define IMAGE_STREAMER_H
|
||||
|
||||
extern int img_streamer_init(const char *image_dir, int mode);
|
||||
extern void img_streamer_finish(void);
|
||||
extern int img_streamer_open(char *filename, int flags);
|
||||
|
||||
#endif /* IMAGE_STREAMER_H */
|
@ -62,8 +62,10 @@ enum {
|
||||
PB_GHOST_CHUNK,
|
||||
PB_FILE,
|
||||
PB_MEMFD_FILE,
|
||||
PB_MEMFD_INODE, /* 60 */
|
||||
PB_MEMFD_INODE,
|
||||
PB_TIMENS,
|
||||
PB_IMG_STREAMER_REQUEST,
|
||||
PB_IMG_STREAMER_REPLY,
|
||||
|
||||
/* PB_AUTOGEN_STOP */
|
||||
|
||||
|
@ -14,6 +14,7 @@ enum sfd_type {
|
||||
|
||||
LOG_FD_OFF,
|
||||
IMG_FD_OFF,
|
||||
IMG_STREAMER_FD_OFF,
|
||||
PROC_FD_OFF, /* fd with /proc for all proc_ calls */
|
||||
PROC_PID_FD_OFF,
|
||||
CR_PROC_FD_OFF, /* some other's proc fd:
|
||||
|
@ -1406,9 +1406,9 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta)
|
||||
/*
|
||||
* We optimize the case when rsti(t)->vma_io is empty.
|
||||
*
|
||||
* This is useful for for remote images, where all VMAs are premapped
|
||||
* (pr->pieok is false). This avoids re-opening the CR_FD_PAGES file,
|
||||
* which could be no longer be available.
|
||||
* This is useful when using the image streamer, where all VMAs are
|
||||
* premapped (pr->pieok is false). This avoids re-opening the
|
||||
* CR_FD_PAGES file, which may only be readable only once.
|
||||
*/
|
||||
if (list_empty(&rsti(t)->vma_io)) {
|
||||
ta->vma_ios = NULL;
|
||||
|
@ -382,6 +382,10 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, unsigned lo
|
||||
int pfd;
|
||||
int pr_flags = (fd_type == CR_FD_PAGEMAP) ? PR_TASK : PR_SHMEM;
|
||||
|
||||
/* Image streaming lacks support for incremental images */
|
||||
if (opts.stream)
|
||||
goto out;
|
||||
|
||||
pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
|
||||
if (pfd < 0 && errno == ENOENT)
|
||||
goto out;
|
||||
@ -928,6 +932,10 @@ int check_parent_local_xfer(int fd_type, unsigned long img_id)
|
||||
struct stat st;
|
||||
int ret, pfd;
|
||||
|
||||
/* Image streaming lacks support for incremental images */
|
||||
if (opts.stream)
|
||||
return 0;
|
||||
|
||||
pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
|
||||
if (pfd < 0 && errno == ENOENT)
|
||||
return 0;
|
||||
|
@ -406,6 +406,49 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot use maybe_read_page_local() for streaming images as it uses
|
||||
* pread(), seeking in the file. Instead, we use this custom page reader.
|
||||
*/
|
||||
static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr,
|
||||
int nr, void *buf, unsigned flags)
|
||||
{
|
||||
unsigned long len = nr * PAGE_SIZE;
|
||||
int fd = img_raw_fd(pr->pi);
|
||||
int ret;
|
||||
size_t curr = 0;
|
||||
|
||||
pr_debug("\tpr%lu-%u Read page from self %lx/%"PRIx64"\n",
|
||||
pr->img_id, pr->id, pr->cvaddr, pr->pi_off);
|
||||
|
||||
/* We can't seek. The requested address better match */
|
||||
BUG_ON(pr->cvaddr != vaddr);
|
||||
|
||||
while (1) {
|
||||
ret = read(fd, buf + curr, len - curr);
|
||||
if (ret == 0) {
|
||||
pr_err("Reached EOF unexpectedly while reading page from image\n");
|
||||
return -1;
|
||||
} else if (ret < 0) {
|
||||
pr_perror("Can't read mapping page %d", ret);
|
||||
return -1;
|
||||
}
|
||||
curr += ret;
|
||||
if (curr == len)
|
||||
break;
|
||||
}
|
||||
|
||||
if (opts.auto_dedup)
|
||||
pr_warn_once("Can't dedup when streaming images\n");
|
||||
|
||||
if (ret == 0 && pr->io_complete)
|
||||
ret = pr->io_complete(pr, vaddr, nr);
|
||||
|
||||
pr->pi_off += len;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -601,6 +644,10 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int
|
||||
int pfd, ret;
|
||||
struct page_read *parent = NULL;
|
||||
|
||||
/* Image streaming lacks support for incremental images */
|
||||
if (opts.stream)
|
||||
goto out;
|
||||
|
||||
pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
|
||||
if (pfd < 0 && errno == ENOENT)
|
||||
goto out;
|
||||
@ -657,7 +704,19 @@ static int init_pagemaps(struct page_read *pr)
|
||||
off_t fsize;
|
||||
int nr_pmes, nr_realloc;
|
||||
|
||||
fsize = img_raw_size(pr->pmi);
|
||||
if (opts.stream) {
|
||||
/*
|
||||
* TODO - There is no easy way to estimate the size of the
|
||||
* pagemap that is still to be read from the pipe. Possible
|
||||
* solution is to ask the image streamer for the size of the
|
||||
* image. 1024 is a wild guess (more space is allocated if
|
||||
* needed).
|
||||
*/
|
||||
fsize = 1024;
|
||||
} else {
|
||||
fsize = img_raw_size(pr->pmi);
|
||||
}
|
||||
|
||||
if (fsize < 0)
|
||||
return -1;
|
||||
|
||||
@ -781,6 +840,8 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p
|
||||
|
||||
if (remote)
|
||||
pr->maybe_read_page = maybe_read_page_remote;
|
||||
else if (opts.stream)
|
||||
pr->maybe_read_page = maybe_read_page_img_streamer;
|
||||
else {
|
||||
pr->maybe_read_page = maybe_read_page_local;
|
||||
if (!pr->parent && !opts.lazy_pages)
|
||||
|
@ -63,6 +63,7 @@
|
||||
#include "images/seccomp.pb-c.h"
|
||||
#include "images/binfmt-misc.pb-c.h"
|
||||
#include "images/autofs.pb-c.h"
|
||||
#include "images/img-streamer.pb-c.h"
|
||||
|
||||
struct cr_pb_message_desc cr_pb_descs[PB_MAX];
|
||||
|
||||
|
14
criu/util.c
14
criu/util.c
@ -423,13 +423,19 @@ int copy_file(int fd_in, int fd_out, size_t bytes)
|
||||
{
|
||||
ssize_t written = 0;
|
||||
size_t chunk = bytes ? bytes : 4096;
|
||||
ssize_t ret;
|
||||
|
||||
while (1) {
|
||||
ssize_t ret;
|
||||
|
||||
ret = sendfile(fd_out, fd_in, NULL, chunk);
|
||||
/*
|
||||
* When fd_out is a pipe, sendfile() returns -EINVAL, so we
|
||||
* fallback to splice(). Not sure why.
|
||||
*/
|
||||
if (opts.stream)
|
||||
ret = splice(fd_in, NULL, fd_out, NULL, chunk, SPLICE_F_MOVE);
|
||||
else
|
||||
ret = sendfile(fd_out, fd_in, NULL, chunk);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't send data to ghost file");
|
||||
pr_perror("Can't transfer data to ghost file from image");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,7 @@ proto-obj-y += macvlan.o
|
||||
proto-obj-y += sit.o
|
||||
proto-obj-y += memfd.o
|
||||
proto-obj-y += timens.o
|
||||
proto-obj-y += img-streamer.o
|
||||
|
||||
CFLAGS += -iquote $(obj)/
|
||||
|
||||
|
16
images/img-streamer.proto
Normal file
16
images/img-streamer.proto
Normal file
@ -0,0 +1,16 @@
|
||||
syntax = "proto2";
|
||||
|
||||
// This message is sent from CRIU to the streamer.
|
||||
// * During dump, it communicates the name of the file that is about to be sent
|
||||
// to the streamer.
|
||||
// * During restore, CRIU requests image files from the streamer. The message is
|
||||
// used to communicate the name of the desired file.
|
||||
message img_streamer_request_entry {
|
||||
required string filename = 1;
|
||||
}
|
||||
|
||||
// This message is sent from the streamer to CRIU. It is only used during
|
||||
// restore to report whether the requested file exists.
|
||||
message img_streamer_reply_entry {
|
||||
required bool exists = 1;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user