mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
seize: use separate checkpoint_devices function
Move `run_plugins(CHECKPOINT_DEVICES)` out of `collect_pstree()` to ensure that the function's sole responsibility is to use the cgroup freezer for the process tree. This allows us to avoid a time-out error when checkpointing applications with large GPU state. v2: This patch calls `checkpoint_devices()` only for `criu dump`. Support for GPU checkpointing with `pre-dump` will be introduced in a separate patch. Suggested-by: Andrei Vagin <avagin@google.com> Suggested-by: Jesus Ramos <jeramos@nvidia.com> Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
This commit is contained in:
parent
59b022db35
commit
dcd8808db0
@ -2192,6 +2192,9 @@ int cr_dump_tasks(pid_t pid)
|
||||
if (collect_pstree())
|
||||
goto err;
|
||||
|
||||
if (checkpoint_devices())
|
||||
goto err;
|
||||
|
||||
if (collect_pstree_ids())
|
||||
goto err;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __CR_SEIZE_H__
|
||||
|
||||
extern int collect_pstree(void);
|
||||
extern int checkpoint_devices(void);
|
||||
struct pstree_item;
|
||||
extern void pstree_switch_state(struct pstree_item *root_item, int st);
|
||||
extern const char *get_real_freezer_state(void);
|
||||
|
27
criu/seize.c
27
criu/seize.c
@ -1050,7 +1050,6 @@ int collect_pstree(void)
|
||||
pid_t pid = root_item->pid->real;
|
||||
int ret, exit_code = -1;
|
||||
struct proc_status_creds creds;
|
||||
struct pstree_item *iter;
|
||||
|
||||
timing_start(TIME_FREEZING);
|
||||
|
||||
@ -1111,14 +1110,6 @@ int collect_pstree(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_pstree_item(iter) {
|
||||
if (!task_alive(iter))
|
||||
continue;
|
||||
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
|
||||
if (ret < 0 && ret != -ENOTSUP)
|
||||
goto err;
|
||||
}
|
||||
|
||||
exit_code = 0;
|
||||
timing_stop(TIME_FREEZING);
|
||||
timing_start(TIME_FROZEN);
|
||||
@ -1128,3 +1119,21 @@ err:
|
||||
alarm(0);
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
int checkpoint_devices(void)
|
||||
{
|
||||
struct pstree_item *iter;
|
||||
int ret, exit_code = -1;
|
||||
|
||||
for_each_pstree_item(iter) {
|
||||
if (!task_alive(iter))
|
||||
continue;
|
||||
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
|
||||
if (ret < 0 && ret != -ENOTSUP)
|
||||
goto err;
|
||||
}
|
||||
|
||||
exit_code = 0;
|
||||
err:
|
||||
return exit_code;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user