2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-09-05 08:45:49 +00:00

images/inventory: add field for enabled plugins

This patch extends the inventory image with a `plugins` field that
contains an array of plugins which were used during checkpoint,
for example, to save GPU state. In particular, the CUDA and AMDGPU
plugins are added to this field only when the checkpoint contains
GPU state. This allows to disable unnecessary plugins during restore,
show appropriate error messages if required CRIU plugin are missing,
and migrate a process that does not use GPU from a GPU-enabled system
to CPU-only environment.

We use the `optional plugins_entry` for backwards compatibility. This
entry allows us to distinguish between *unset* and *missing* field:

- When the field is missing, it indicates that the checkpoint was
  created with a previous version of CRIU, and all plugins should be
  *enabled* during restore.

- When the field is empty, it indicates that no plugins were used during
  checkpointing. Thus, all plugins can be *disabled* during restore.

Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
This commit is contained in:
Radostin Stoyanov
2024-10-04 12:14:29 +01:00
committed by Andrei Vagin
parent 87b5ac9d9f
commit adf2c5be96
7 changed files with 193 additions and 5 deletions

View File

@@ -60,6 +60,10 @@ static LIST_HEAD(update_vma_info_list);
size_t kfd_max_buffer_size;
bool plugin_added_to_inventory = false;
bool plugin_disabled = false;
/**************************************************************************************************/
/* Call ioctl, restarting if it is interrupted */
@@ -332,6 +336,13 @@ void getenv_size_t(const char *var, size_t *value)
int amdgpu_plugin_init(int stage)
{
if (stage == CR_PLUGIN_STAGE__RESTORE) {
if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) {
plugin_disabled = true;
return 0;
}
}
pr_info("initialized: %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
topology_init(&src_topology);
@@ -365,6 +376,9 @@ int amdgpu_plugin_init(int stage)
void amdgpu_plugin_fini(int stage, int ret)
{
if (plugin_disabled)
return;
pr_info("finished %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
if (stage == CR_PLUGIN_STAGE__RESTORE)
@@ -414,6 +428,14 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf)
if (ret)
pr_perror("%s(), Can't handle VMAs of input device", __func__);
if (!ret && !plugin_added_to_inventory) {
ret = add_inventory_plugin(CR_PLUGIN_DESC.name);
if (ret)
pr_err("Failed to add AMDGPU plugin to inventory image\n");
else
plugin_added_to_inventory = true;
}
return ret;
}
CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma)
@@ -1540,6 +1562,9 @@ int amdgpu_plugin_restore_file(int id)
size_t img_size;
FILE *img_fp = NULL;
if (plugin_disabled)
return -ENOTSUP;
pr_info("Initialized kfd plugin restorer with ID = %d\n", id);
snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id);
@@ -1746,6 +1771,9 @@ int amdgpu_plugin_update_vmamap(const char *in_path, const uint64_t addr, const
char *p_end;
bool is_kfd = false, is_renderD = false;
if (plugin_disabled)
return -ENOTSUP;
plugin_log_msg("Enter %s\n", __func__);
strncpy(path, in_path, sizeof(path));
@@ -1805,6 +1833,9 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
struct kfd_ioctl_criu_args args = { 0 };
int fd, exit_code = 0;
if (plugin_disabled)
return -ENOTSUP;
pr_info("Inside %s for target pid = %d\n", __func__, target_pid);
fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);