mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-29 05:18:00 +00:00
criu: Allow disabling freeze cgroups
Some plugins (e.g., CUDA) may not function correctly when processes are frozen using cgroups. This change introduces a mechanism to disable the use of freeze cgroups during process seizing, even if explicitly requested via the --freeze-cgroup option. The CUDA plugin is updated to utilize this new mechanism to ensure compatibility. Signed-off-by: Andrei Vagin <avagin@google.com>
This commit is contained in:
parent
59f49c6276
commit
651df375bd
@ -8,5 +8,6 @@ extern bool alarm_timeouted(void);
|
|||||||
|
|
||||||
extern char *task_comm_info(pid_t pid, char *comm, size_t size);
|
extern char *task_comm_info(pid_t pid, char *comm, size_t size);
|
||||||
extern char *__task_comm_info(pid_t pid);
|
extern char *__task_comm_info(pid_t pid);
|
||||||
|
extern void dont_use_freeze_cgroup(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
66
criu/seize.c
66
criu/seize.c
@ -25,6 +25,19 @@
|
|||||||
#include "xmalloc.h"
|
#include "xmalloc.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
|
static bool freeze_cgroup_disabled;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disables the use of freeze cgroups for process seizing, even if explicitly
|
||||||
|
* requested via the --freeze-cgroup option. This is necessary for plugins
|
||||||
|
* (e.g., CUDA) that do not function correctly when processes are frozen using
|
||||||
|
* cgroups.
|
||||||
|
*/
|
||||||
|
void __attribute__((used)) dont_use_freeze_cgroup(void)
|
||||||
|
{
|
||||||
|
freeze_cgroup_disabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
char *task_comm_info(pid_t pid, char *comm, size_t size)
|
char *task_comm_info(pid_t pid, char *comm, size_t size)
|
||||||
{
|
{
|
||||||
bool is_read = false;
|
bool is_read = false;
|
||||||
@ -397,7 +410,7 @@ static int freezer_detach(void)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!opts.freeze_cgroup)
|
if (!opts.freeze_cgroup || freeze_cgroup_disabled)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) {
|
for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) {
|
||||||
@ -492,6 +505,31 @@ static int log_unfrozen_stacks(char *root)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_freezer_cgroup(void)
|
||||||
|
{
|
||||||
|
enum freezer_state state = THAWED;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
BUG_ON(!freeze_cgroup_disabled);
|
||||||
|
|
||||||
|
fd = freezer_open();
|
||||||
|
if (fd < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
state = get_freezer_state(fd);
|
||||||
|
close(fd);
|
||||||
|
if (state == FREEZER_ERROR) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state != THAWED) {
|
||||||
|
pr_err("One or more plugins are incompatible with the freezer cgroup in the FROZEN state.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int freeze_processes(void)
|
static int freeze_processes(void)
|
||||||
{
|
{
|
||||||
int fd, exit_code = -1;
|
int fd, exit_code = -1;
|
||||||
@ -643,7 +681,7 @@ static int collect_children(struct pstree_item *item)
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!opts.freeze_cgroup)
|
if (!opts.freeze_cgroup || freeze_cgroup_disabled)
|
||||||
/* fails when meets a zombie */
|
/* fails when meets a zombie */
|
||||||
__ignore_value(compel_interrupt_task(pid));
|
__ignore_value(compel_interrupt_task(pid));
|
||||||
|
|
||||||
@ -831,7 +869,8 @@ static int collect_threads(struct pstree_item *item)
|
|||||||
|
|
||||||
pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid);
|
pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid);
|
||||||
|
|
||||||
if (!opts.freeze_cgroup && compel_interrupt_task(pid))
|
if ((!opts.freeze_cgroup || freeze_cgroup_disabled) &&
|
||||||
|
compel_interrupt_task(pid))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ret = compel_wait_task(pid, item_ppid(item), parse_pid_status, NULL, &t_creds.s, NULL);
|
ret = compel_wait_task(pid, item_ppid(item), parse_pid_status, NULL, &t_creds.s, NULL);
|
||||||
@ -887,7 +926,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i
|
|||||||
{
|
{
|
||||||
int attempts = NR_ATTEMPTS, nr_inprogress = 1;
|
int attempts = NR_ATTEMPTS, nr_inprogress = 1;
|
||||||
|
|
||||||
if (opts.freeze_cgroup)
|
if (opts.freeze_cgroup && !freeze_cgroup_disabled)
|
||||||
attempts = 1;
|
attempts = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -993,12 +1032,16 @@ int collect_pstree(void)
|
|||||||
|
|
||||||
pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);
|
pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);
|
||||||
|
|
||||||
if (opts.freeze_cgroup && freeze_processes())
|
if (opts.freeze_cgroup && !freeze_cgroup_disabled) {
|
||||||
goto err;
|
if (freeze_processes())
|
||||||
|
goto err;
|
||||||
if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
|
} else {
|
||||||
set_cr_errno(ESRCH);
|
if (opts.freeze_cgroup && check_freezer_cgroup())
|
||||||
goto err;
|
goto err;
|
||||||
|
if (compel_interrupt_task(pid)) {
|
||||||
|
set_cr_errno(ESRCH);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = compel_wait_task(pid, -1, parse_pid_status, NULL, &creds.s, NULL);
|
ret = compel_wait_task(pid, -1, parse_pid_status, NULL, &creds.s, NULL);
|
||||||
@ -1024,7 +1067,8 @@ int collect_pstree(void)
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (opts.freeze_cgroup && freezer_wait_processes()) {
|
if (opts.freeze_cgroup && !freeze_cgroup_disabled &&
|
||||||
|
freezer_wait_processes()) {
|
||||||
ret = -1;
|
ret = -1;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
@ -483,6 +483,8 @@ int cuda_plugin_init(int stage)
|
|||||||
INIT_LIST_HEAD(&cuda_pids);
|
INIT_LIST_HEAD(&cuda_pids);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dont_use_freeze_cgroup();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user