diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 28913a7fb..2729bc95a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified. Turn on memory changes tracker in the kernel. If the option is not passed the memory tracker get turned on implicitly. +*--pre-dump-mode*='mode':: + There are two 'mode' to operate pre-dump algorithm. The 'splice' mode + is parasite based, whereas 'read' mode is based on process_vm_readv + syscall. The 'read' mode incurs reduced frozen time and reduced + memory pressure as compared to 'splice' mode. Default is 'splice' mode. + *dump* ~~~~~~ Performs a checkpoint procedure. diff --git a/criu/config.c b/criu/config.c index 2ad2fd43c..e5d42efe4 100644 --- a/criu/config.c +++ b/criu/config.c @@ -276,6 +276,7 @@ void init_opts(void) opts.empty_ns = 0; opts.status_fd = -1; opts.log_level = DEFAULT_LOGLEVEL; + opts.pre_dump_mode = PRE_DUMP_SPLICE; } bool deprecated_ok(char *what) @@ -517,6 +518,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, { "cgroup-yard", required_argument, 0, 1096 }, + { "pre-dump-mode", required_argument, 0, 1097}, { }, }; @@ -818,6 +820,14 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1096: SET_CHAR_OPTS(cgroup_yard, optarg); break; + case 1097: + if (!strcmp("read", optarg)) { + opts.pre_dump_mode = PRE_DUMP_READ; + } else if (strcmp("splice", optarg)) { + pr_err("Unable to parse value of --pre-dump-mode\n"); + return 1; + } + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 95ba2e5ce..392e9ac50 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req) opts.lazy_pages = req->lazy_pages; } + if (req->has_pre_dump_mode) { + switch (req->pre_dump_mode) { + case CRIU_PRE_DUMP_MODE__SPLICE: + opts.pre_dump_mode = PRE_DUMP_SPLICE; + break; + case CRIU_PRE_DUMP_MODE__READ: + opts.pre_dump_mode = PRE_DUMP_READ; + break; + default: + goto err; + } + } + if (req->ps) { opts.port = (short)req->ps->port; diff --git a/criu/crtools.c b/criu/crtools.c index 5740b806d..700fad994 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -428,6 +428,8 @@ usage: " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" +" --pre-dump-mode splice - parasite based pre-dumping (default)\n" +" read - process_vm_readv syscall based pre-dumping\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index da7c10d69..2c1451e86 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -38,6 +38,12 @@ struct cg_root_opt { char *newroot; }; +/* + * Pre-dump variants + */ +#define PRE_DUMP_SPLICE 1 /* Pre-dump using parasite */ +#define PRE_DUMP_READ 2 /* Pre-dump using process_vm_readv syscall */ + /* * Cgroup management options. */ @@ -81,6 +87,7 @@ struct cr_options { int evasive_devices; int link_remap_ok; int log_file_per_pid; + int pre_dump_mode; bool swrk_restore; char *output; char *root; diff --git a/criu/mem.c b/criu/mem.c index de66a6210..911b9d21c 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, if (mdc->lazy) memcpy(pargs_iovs(args), pp->iovs, sizeof(struct iovec) * pp->nr_iovs); - ret = drain_pages(pp, ctl, args); + + /* + * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump + * will happen after task unfreezing in cr_pre_dump_finish(). This is + * actual optimization which reduces time for which process was frozen + * during pre-dump. + */ + if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ) + ret = 0; + else + ret = drain_pages(pp, ctl, args); + if (!ret && !mdc->pre_dump) ret = xfer_pages(pp, &xfer); if (ret) diff --git a/images/rpc.proto b/images/rpc.proto index c402259ac..fc2f1bce2 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -47,6 +47,11 @@ enum criu_cg_mode { DEFAULT = 6; }; +enum criu_pre_dump_mode { + SPLICE = 1; + READ = 2; +}; + message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ @@ -121,6 +126,7 @@ message criu_opts { optional bool tls = 58; optional bool tls_no_cn_verify = 59; optional string cgroup_yard = 60; + optional criu_pre_dump_mode pre_dump_mode = 61 [default = SPLICE]; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 14ddff26d..fffb9fd9c 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path) return criu_local_set_parent_images(global_opts, path); } +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) +{ + opts->rpc->has_pre_dump_mode = true; + if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { + opts->rpc->pre_dump_mode = mode; + return 0; + } + return -1; +} + +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode) +{ + return criu_local_set_pre_dump_mode(global_opts, mode); +} + void criu_local_set_track_mem(criu_opts *opts, bool track_mem) { opts->rpc->has_track_mem = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index cb37c5291..22db0fdcf 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -43,6 +43,11 @@ enum criu_cg_mode { CRIU_CG_MODE_DEFAULT, }; +enum criu_pre_dump_mode { + CRIU_PRE_DUMP_SPLICE = 1, + CRIU_PRE_DUMP_READ = 2 +}; + int criu_set_service_address(const char *path); void criu_set_service_fd(int fd); int criu_set_service_binary(const char *path); @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path); int criu_add_inherit_fd(int fd, const char *key); int criu_add_external(const char *key); int criu_set_page_server_address_port(const char *address, int port); +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode); /* * The criu_notify_arg_t na argument is an opaque @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode); void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na)); diff --git a/test/zdtm.py b/test/zdtm.py index f0a102413..3c0cee667 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1019,6 +1019,7 @@ class criu: self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] + self.__pre_dump_mode = opts['pre_dump_mode'] def fini(self): if self.__lazy_migrate: @@ -1249,6 +1250,8 @@ class criu: a_opts += ['--leave-stopped'] if self.__empty_ns: a_opts += ['--empty-ns', 'net'] + if self.__pre_dump_mode: + a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode] nowait = False if self.__lazy_migrate and action == "dump": @@ -1835,7 +1838,7 @@ class Launcher: 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', - 'tls', 'criu_bin', 'crit_bin') + 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) if self.__use_log: @@ -2482,6 +2485,10 @@ rp.add_argument("--criu-bin", rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--pre-dump-mode", + help="Use splice or read mode of pre-dumping", + choices=['splice', 'read'], + default='splice') lp = sp.add_parser("list", help="List tests") lp.set_defaults(action=list_tests)