mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-31 22:35:33 +00:00
seize: Take --timeout option into account when freezing processes
When we're freezing processes we don't count on anything but rather do 5 attempts with constantly increasing delay. Lets rather do the following: - take --timeout option into account (which is 5 seconds by default) and split it into 100 ms steps; - when frezing processes check freezer status every 100 ms. Same time it looks that 5 seconds by default is too small for high loaded containers. Lets increase it to 10 seconds by default. [ skinsbursky@: Frankly speaking, in this particular case increasing intervals are not nice. This is not a network issue or something. Usually freezing takes less than a second, but more, that, say 200ms. Otherwise it takes quite o lot of time. If step size is growing all the time, in most of the cases criu will waste hundreds of milliseconds between iterX (say, third) and (iterX+1) because of the growing step size. 100ms step looks solid enough: not to small to produce a lot of syscalls and not to large to waste a lot of time. With previous scheme freezing was usually taking half a second more that it should because of this growing step. [ gorcunov@: You won't belive, but been able to sepcify --timeout 0 here allowed me and Stas to catch serieous problem in freezer code. https://lkml.org/lkml/2016/8/3/317 Without this feature we would have to patch criu instead. So you know, this would be great to keep it for catching more kernel bugs ;) Reported-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com> Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
This commit is contained in:
committed by
Pavel Emelyanov
parent
5a43e55e24
commit
9fae23fbe2
@@ -38,7 +38,7 @@ struct cg_root_opt {
|
||||
*/
|
||||
#define DEFAULT_GHOST_LIMIT (1 << 20)
|
||||
|
||||
#define DEFAULT_TIMEOUT 5
|
||||
#define DEFAULT_TIMEOUT 10
|
||||
|
||||
struct irmap;
|
||||
|
||||
|
33
criu/seize.c
33
criu/seize.c
@@ -319,10 +319,30 @@ static int log_unfrozen_stacks(char *root)
|
||||
|
||||
static int freeze_processes(void)
|
||||
{
|
||||
int i, fd, exit_code = -1;
|
||||
int fd, exit_code = -1;
|
||||
char path[PATH_MAX];
|
||||
const char *state = thawed;
|
||||
|
||||
static const unsigned long step_ms = 100;
|
||||
unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
|
||||
unsigned long i;
|
||||
|
||||
const struct timespec req = {
|
||||
.tv_nsec = step_ms * 1000000,
|
||||
.tv_sec = 0,
|
||||
};
|
||||
|
||||
if (unlikely(!nr_attempts)) {
|
||||
/*
|
||||
* If timeout is turned off, lets
|
||||
* wait for at least 10 seconds.
|
||||
*/
|
||||
nr_attempts = (10 * 1000000) / step_ms;
|
||||
}
|
||||
|
||||
pr_debug("freezing processes: %lu attempst with %lu ms steps\n",
|
||||
nr_attempts, step_ms);
|
||||
|
||||
snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
|
||||
fd = open(path, O_RDWR);
|
||||
if (fd < 0) {
|
||||
@@ -350,10 +370,7 @@ static int freeze_processes(void)
|
||||
* freezer.state.
|
||||
* Here is one extra attempt to check that everything are frozen.
|
||||
*/
|
||||
for (i = 0; i <= NR_ATTEMPTS; i++) {
|
||||
struct timespec req = {};
|
||||
u64 timeout;
|
||||
|
||||
for (i = 0; i <= nr_attempts; i++) {
|
||||
if (seize_cgroup_tree(opts.freeze_cgroup, state) < 0)
|
||||
goto err;
|
||||
|
||||
@@ -376,13 +393,10 @@ static int freeze_processes(void)
|
||||
if (alarm_timeouted())
|
||||
goto err;
|
||||
|
||||
timeout = 100000000 * (i + 1); /* 100 msec */
|
||||
req.tv_nsec = timeout % 1000000000;
|
||||
req.tv_sec = timeout / 1000000000;
|
||||
nanosleep(&req, NULL);
|
||||
}
|
||||
|
||||
if (i > NR_ATTEMPTS) {
|
||||
if (i > nr_attempts) {
|
||||
pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
|
||||
|
||||
if (!pr_quelled(LOG_DEBUG))
|
||||
@@ -391,6 +405,7 @@ static int freeze_processes(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_debug("freezing processes: %lu attempts done\n", i);
|
||||
exit_code = 0;
|
||||
err:
|
||||
if (exit_code == 0 || freezer_thawed) {
|
||||
|
Reference in New Issue
Block a user