mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
criu/plugin: Introduce new plugin hooks PAUSE_DEVICES and CHECKPOINT_DEVICES to be used during pstree collection
PAUSE_DEVICES is called before a process is frozen and is used by the CUDA plugin to place the process in a state that's ready to be checkpointed and quiesce any pending work CHECKPOINT_DEVICES is called after all processes in the tree have been frozen and PAUSE'd and performs the actual checkpointing operation for CUDA applications Signed-off-by: Jesus Ramos <jeramos@nvidia.com>
This commit is contained in:
parent
1012e542e5
commit
5f486d5aee
@ -56,6 +56,10 @@ enum {
|
|||||||
|
|
||||||
CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9,
|
CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9,
|
||||||
|
|
||||||
|
CR_PLUGIN_HOOK__PAUSE_DEVICES = 10,
|
||||||
|
|
||||||
|
CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
|
||||||
|
|
||||||
CR_PLUGIN_HOOK__MAX
|
CR_PLUGIN_HOOK__MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -72,6 +76,8 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, int fd, const struct
|
|||||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const uint64_t addr,
|
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const uint64_t addr,
|
||||||
const uint64_t old_pgoff, uint64_t *new_pgoff, int *plugin_fd);
|
const uint64_t old_pgoff, uint64_t *new_pgoff, int *plugin_fd);
|
||||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
|
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
|
||||||
|
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
|
||||||
|
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CR_PLUGIN_STAGE__DUMP,
|
CR_PLUGIN_STAGE__DUMP,
|
||||||
|
@ -57,6 +57,8 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
|
|||||||
__assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma");
|
__assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma");
|
||||||
__assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map");
|
__assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map");
|
||||||
__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
|
__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
|
||||||
|
__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
|
||||||
|
__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
|
||||||
|
|
||||||
#undef __assign_hook
|
#undef __assign_hook
|
||||||
|
|
||||||
|
18
criu/seize.c
18
criu/seize.c
@ -16,6 +16,7 @@
|
|||||||
#include "pstree.h"
|
#include "pstree.h"
|
||||||
#include "criu-log.h"
|
#include "criu-log.h"
|
||||||
#include <compel/ptrace.h>
|
#include <compel/ptrace.h>
|
||||||
|
#include "plugin.h"
|
||||||
#include "proc_parse.h"
|
#include "proc_parse.h"
|
||||||
#include "seccomp.h"
|
#include "seccomp.h"
|
||||||
#include "seize.h"
|
#include "seize.h"
|
||||||
@ -637,6 +638,11 @@ static int collect_children(struct pstree_item *item)
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = run_plugins(PAUSE_DEVICES, pid);
|
||||||
|
if (ret < 0 && ret != -ENOTSUP) {
|
||||||
|
goto free;
|
||||||
|
}
|
||||||
|
|
||||||
if (!opts.freeze_cgroup)
|
if (!opts.freeze_cgroup)
|
||||||
/* fails when meets a zombie */
|
/* fails when meets a zombie */
|
||||||
__ignore_value(compel_interrupt_task(pid));
|
__ignore_value(compel_interrupt_task(pid));
|
||||||
@ -966,6 +972,7 @@ int collect_pstree(void)
|
|||||||
pid_t pid = root_item->pid->real;
|
pid_t pid = root_item->pid->real;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
struct proc_status_creds creds;
|
struct proc_status_creds creds;
|
||||||
|
struct pstree_item *iter;
|
||||||
|
|
||||||
timing_start(TIME_FREEZING);
|
timing_start(TIME_FREEZING);
|
||||||
|
|
||||||
@ -984,6 +991,11 @@ int collect_pstree(void)
|
|||||||
if (opts.freeze_cgroup && freeze_processes())
|
if (opts.freeze_cgroup && freeze_processes())
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
ret = run_plugins(PAUSE_DEVICES, pid);
|
||||||
|
if (ret < 0 && ret != -ENOTSUP) {
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
|
if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
|
||||||
set_cr_errno(ESRCH);
|
set_cr_errno(ESRCH);
|
||||||
goto err;
|
goto err;
|
||||||
@ -1017,6 +1029,12 @@ int collect_pstree(void)
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for_each_pstree_item(iter) {
|
||||||
|
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
|
||||||
|
if (ret < 0 && ret != -ENOTSUP)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
timing_stop(TIME_FREEZING);
|
timing_stop(TIME_FREEZING);
|
||||||
timing_start(TIME_FROZEN);
|
timing_start(TIME_FROZEN);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user