mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 18:07:57 +00:00
seize: use separate checkpoint_devices function
Move `run_plugins(CHECKPOINT_DEVICES)` out of `collect_pstree()` to ensure that the function's sole responsibility is to use the cgroup freezer for the process tree. This allows us to avoid a time-out error when checkpointing applications with large GPU state. v2: This patch calls `checkpoint_devices()` only for `criu dump`. Support for GPU checkpointing with `pre-dump` will be introduced in a separate patch. Suggested-by: Andrei Vagin <avagin@google.com> Suggested-by: Jesus Ramos <jeramos@nvidia.com> Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
This commit is contained in:
parent
59b022db35
commit
dcd8808db0
@ -2192,6 +2192,9 @@ int cr_dump_tasks(pid_t pid)
|
|||||||
if (collect_pstree())
|
if (collect_pstree())
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (checkpoint_devices())
|
||||||
|
goto err;
|
||||||
|
|
||||||
if (collect_pstree_ids())
|
if (collect_pstree_ids())
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define __CR_SEIZE_H__
|
#define __CR_SEIZE_H__
|
||||||
|
|
||||||
extern int collect_pstree(void);
|
extern int collect_pstree(void);
|
||||||
|
extern int checkpoint_devices(void);
|
||||||
struct pstree_item;
|
struct pstree_item;
|
||||||
extern void pstree_switch_state(struct pstree_item *root_item, int st);
|
extern void pstree_switch_state(struct pstree_item *root_item, int st);
|
||||||
extern const char *get_real_freezer_state(void);
|
extern const char *get_real_freezer_state(void);
|
||||||
|
27
criu/seize.c
27
criu/seize.c
@ -1050,7 +1050,6 @@ int collect_pstree(void)
|
|||||||
pid_t pid = root_item->pid->real;
|
pid_t pid = root_item->pid->real;
|
||||||
int ret, exit_code = -1;
|
int ret, exit_code = -1;
|
||||||
struct proc_status_creds creds;
|
struct proc_status_creds creds;
|
||||||
struct pstree_item *iter;
|
|
||||||
|
|
||||||
timing_start(TIME_FREEZING);
|
timing_start(TIME_FREEZING);
|
||||||
|
|
||||||
@ -1111,14 +1110,6 @@ int collect_pstree(void)
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_pstree_item(iter) {
|
|
||||||
if (!task_alive(iter))
|
|
||||||
continue;
|
|
||||||
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
|
|
||||||
if (ret < 0 && ret != -ENOTSUP)
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
exit_code = 0;
|
exit_code = 0;
|
||||||
timing_stop(TIME_FREEZING);
|
timing_stop(TIME_FREEZING);
|
||||||
timing_start(TIME_FROZEN);
|
timing_start(TIME_FROZEN);
|
||||||
@ -1128,3 +1119,21 @@ err:
|
|||||||
alarm(0);
|
alarm(0);
|
||||||
return exit_code;
|
return exit_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int checkpoint_devices(void)
|
||||||
|
{
|
||||||
|
struct pstree_item *iter;
|
||||||
|
int ret, exit_code = -1;
|
||||||
|
|
||||||
|
for_each_pstree_item(iter) {
|
||||||
|
if (!task_alive(iter))
|
||||||
|
continue;
|
||||||
|
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
|
||||||
|
if (ret < 0 && ret != -ENOTSUP)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
exit_code = 0;
|
||||||
|
err:
|
||||||
|
return exit_code;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user