criu/plugin: Add NVIDIA CUDA plugin

Adding support for the NVIDIA cuda-checkpoint utility, requires the use of an r555 or higher driver along with the cuda-checkpoint binary. Signed-off-by: Jesus Ramos <jeramos@nvidia.com>
2025-08-22 01:51:51 +00:00 · 2024-05-31 13:38:54 -07:00 · 2024-05-31 13:38:54 -07:00 · bf417dd050
commit bf417dd050
parent 5f486d5aee
5 changed files with 578 additions and 4 deletions
--- a/15
+++ b/15
@ -165,7 +165,7 @@ HOSTCFLAGS		+= $(WARNINGS) $(DEFINES) -iquote include/
 export AFLAGS CFLAGS USERCLFAGS HOSTCFLAGS

 # Default target
-all: criu lib crit
+all: criu lib crit cuda_plugin
 .PHONY: all

 #
@ -298,15 +298,19 @@ clean-amdgpu_plugin:
 	$(Q) $(MAKE) -C plugins/amdgpu clean
 .PHONY: clean-amdgpu_plugin

+clean-cuda_plugin:
+	$(Q) $(MAKE) -C plugins/cuda clean
+.PHONY: clean-cuda_plugin
+
 clean-top:
 	$(Q) $(MAKE) -C Documentation clean
 	$(Q) $(MAKE) $(build)=test/compel clean
 	$(Q) $(RM) .gitid
 .PHONY: clean-top

-clean: clean-top clean-amdgpu_plugin
+clean: clean-top clean-amdgpu_plugin clean-cuda_plugin

-mrproper-top: clean-top clean-amdgpu_plugin
+mrproper-top: clean-top clean-amdgpu_plugin clean-cuda_plugin
 	$(Q) $(RM) $(CONFIG_HEADER)
 	$(Q) $(RM) $(VERSION_HEADER)
 	$(Q) $(RM) $(COMPEL_VERSION_HEADER)
@ -338,6 +342,10 @@ amdgpu_plugin: criu
 	$(Q) $(MAKE) -C plugins/amdgpu all
 .PHONY: amdgpu_plugin

+cuda_plugin: criu
+	$(Q) $(MAKE) -C plugins/cuda all
+.PHONY: cuda_plugin
+
 crit: lib
 	$(Q) $(MAKE) -C crit
 .PHONY: crit
@ -424,6 +432,7 @@ help:
 	@echo '      lint            - Run code linters'
 	@echo '      indent          - Indent C code'
 	@echo '      amdgpu_plugin   - Make AMD GPU plugin'
+	@echo '      cuda_plugin     - Make NVIDIA CUDA plugin'
 .PHONY: help

 ruff:
--- a/Makefile.install
+++ b/Makefile.install
@ -72,12 +72,16 @@ install-amdgpu_plugin: amdgpu_plugin
 	$(Q) $(MAKE) -C plugins/amdgpu install
 .PHONY: install-amdgpu_plugin

+install-cuda_plugin: cuda_plugin
+	$(Q) $(MAKE) -C plugins/cuda install
+.PHONY: install-cuda_plugin
+
 install-compel: $(compel-install-targets)
 	$(Q) $(MAKE) $(build)=compel install
 	$(Q) $(MAKE) $(build)=compel/plugins install
 .PHONY: install-compel

-install: install-man install-lib install-crit install-criu install-compel install-amdgpu_plugin ;
+install: install-man install-lib install-crit install-criu install-compel install-amdgpu_plugin install-cuda_plugin ;
 .PHONY: install

 uninstall:
@ -88,4 +92,5 @@ uninstall:
 	$(Q) $(MAKE) $(build)=compel $@
 	$(Q) $(MAKE) $(build)=compel/plugins $@
 	$(Q) $(MAKE) -C plugins/amdgpu $@
+	$(Q) $(MAKE) -C plugins/cuda $@
 .PHONY: uninstall
--- a/plugins/cuda/Makefile
+++ b/plugins/cuda/Makefile
@ -0,0 +1,42 @@
+PLUGIN_NAME := cuda_plugin
+PLUGIN_SOBJ := cuda_plugin.so
+
+DEPS_CUDA := $(PLUGIN_SOBJ)
+
+PLUGIN_INCLUDE  	:= -iquote../../include
+PLUGIN_INCLUDE  	+= -iquote../../criu/include
+PLUGIN_INCLUDE  	+= -iquote../../criu/arch/$(ARCH)/include/
+PLUGIN_INCLUDE  	+= -iquote../../
+
+COMPEL := ../../compel/compel-host
+
+CC := gcc
+PLUGIN_CFLAGS := -g -Wall -Werror -shared -nostartfiles -fPIC
+
+__nmk_dir ?= ../../scripts/nmk/scripts/
+include $(__nmk_dir)msg.mk
+
+all: $(DEPS_CUDA)
+
+cuda_plugin.so: cuda_plugin.c
+	$(call msg-gen, $@)
+	$(Q) $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS)
+
+clean:
+	$(call msg-clean, $@)
+	$(Q) $(RM) $(PLUGIN_SOBJ)
+.PHONY: clean
+
+mrproper: clean
+
+install:
+	$(Q) mkdir -p $(DESTDIR)$(PLUGINDIR)
+	$(E) "  INSTALL " $(PLUGIN_NAME)
+	$(Q) install -m 644 $(PLUGIN_SOBJ) $(DESTDIR)$(PLUGINDIR)
+.PHONY: install
+
+uninstall:
+	$(E) " UNINSTALL" $(PLUGIN_NAME)
+	$(Q) $(RM) $(DESTDIR)$(PLUGINDIR)/$(PLUGIN_SOBJ)
+.PHONY: uninstall
+
--- a/plugins/cuda/README.md
+++ b/plugins/cuda/README.md
@ -0,0 +1,59 @@
+Checkpoint and Restore for CUDA applications with CRIU
+======================================================
+
+# Requirements
+The cuda-checkpoint utility should be placed somewhere in your $PATH and an r555
+or higher GPU driver is required for CUDA CRIU integration support.
+
+## cuda-checkpoint
+The cuda-checkpoint utility can be found at:
+https://github.com/NVIDIA/cuda-checkpoint
+
+cuda-checkpoint is a binary utility used to issue checkpointing commands to CUDA
+applications. Updating the cuda-checkpoint utility between driver releases
+should not be necessary as the utility simply exposes some extra driver behavior
+so driver updates are all that's needed to get access to newer features.
+
+# Checkpointing Procedure
+cuda-checkpoint exposes 4 actions used in the checkpointing process: lock,
+checkpoint, restore, unlock.
+
+* lock - Used with the PAUSE_DEVICES hook while a process is still running to
+  quiesce the application into a state where it can be checkpointed
+* checkpoint - Used with the CHECKPOINT_DEVICES hook once a process has been
+  seized/frozen to perform the actual checkpointing operation
+* restore/unlock - Used with the RESUME_DEVICES_LATE hook to restore the CUDA
+  state and release the process back to it's running state
+
+These actions are facilitated by a CUDA checkpoint+restore thread that the CUDA
+plugin will re-wake when needed.
+
+# Known Limitations
+* Currently GPU memory contents are brought into main system memory and CRIU
+  then checkpoints that as part of the normal procedure. On systems with many
+  GPU's with high GPU memory usage this can cause memory thrashing. A future
+  CUDA release will add support for dumping the memory contents to files to
+  alleviate this as well as support in the CRIU plugin.
+* There's currently a small race between when a PAUSE_DEVICES hook is called on
+  a running process and a process calls cuInit() and finishes initializing CUDA
+  after the PAUSE is issued but before the process is frozen to checkpoint. This
+  will cause cuda-checkpoint to report that the process is in an illegal state
+  for checkpointing and it's recommended to just attempt the CRIU procedure
+  again, this should be very rare.
+* Applications that use NVML will leave some leftover device references as NVML
+  is not currently supported for checkpointing. There will be support for this
+  in later drivers. A possible temporary workaround is to have the
+  {DUMP,RESTORE}_EXT_FILE hook just ignore /dev/nvidiactl and /dev/nvidia{0..N}
+  remaining references for these applications as in most cases NVML is used to
+  get info such as gpu count and some capabilities and these values are never
+  accessed again and unlikely to change.
+* CUDA applications that fork() but don't call exec() but also don't issue any
+  CUDA API calls will have some leftover references to /dev/nvidia* and fail to
+  checkpoint as a result. This can be worked around in a similar fashion to the
+  NVML case where the leftover references can be ignored as CUDA is not fork()
+  safe anyway.
+* Restore currently requires that you restore on a system with similar GPU's and
+  same GPU count.
+* NVIDIA UVM Managed Memory, MIG (Multi Instance GPU), and MPS (Multi-Process
+  Service) are currently not supported for checkpointing. Future CUDA releases
+  will add support for these.
--- a/plugins/cuda/cuda_plugin.c
+++ b/plugins/cuda/cuda_plugin.c
@ -0,0 +1,459 @@
+#include "criu-log.h"
+#include "plugin.h"
+#include "util.h"
+#include "cr_options.h"
+#include "pid.h"
+#include "proc_parse.h"
+
+#include <common/list.h>
+#include <compel/infect.h>
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+/* cuda-checkpoint binary should live in your PATH */
+#define CUDA_CHECKPOINT "cuda-checkpoint"
+
+/* cuda-checkpoint --action flags */
+#define ACTION_LOCK	  "lock"
+#define ACTION_CHECKPOINT "checkpoint"
+#define ACTION_RESTORE	  "restore"
+#define ACTION_UNLOCK	  "unlock"
+
+#define CUDA_CKPT_BUF_SIZE (128)
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "cuda_plugin: "
+
+/* Disable plugin functionality if cuda-checkpoint is not in $PATH or driver
+ * version doesn't support --action flag
+ */
+bool plugin_disabled = false;
+
+struct pid_info {
+	int pid;
+	char checkpointed;
+	struct list_head list;
+};
+
+/* Used to track which PID's we've paused CUDA operations on so far so we can
+ * release them after we're done with the DUMP
+ */
+struct list_head cuda_pids;
+
+static void dealloc_pid_buffer(struct list_head *pid_buf)
+{
+	struct pid_info *info;
+	struct pid_info *n;
+
+	list_for_each_entry_safe(info, n, pid_buf, list) {
+		list_del(&info->list);
+		xfree(info);
+	}
+}
+
+static int add_pid_to_buf(struct list_head *pid_buf, int pid)
+{
+	struct pid_info *new = xmalloc(sizeof(*new));
+
+	if (new == NULL) {
+		return -1;
+	}
+
+	new->pid = pid;
+	new->checkpointed = 0;
+	list_add_tail(&new->list, pid_buf);
+
+	return 0;
+}
+
+static int update_checkpointed_pid(struct list_head *pid_buf, int pid)
+{
+	struct pid_info *info;
+
+	list_for_each_entry(info, pid_buf, list) {
+		if (info->pid == pid) {
+			info->checkpointed = 1;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size)
+{
+#define READ  0
+#define WRITE 1
+	int fd[2];
+
+	if (pipe(fd) != 0) {
+		pr_err("Couldn't create pipes for reading cuda-checkpoint output\n");
+		return -1;
+	}
+
+	buf[0] = '\0';
+
+	int child_pid = fork();
+	if (child_pid == -1) {
+		pr_err("Failed to fork to exec cuda-checkpoint\n");
+		close(fd[READ]);
+		close(fd[WRITE]);
+		return -1;
+	}
+
+	if (child_pid == 0) { // child
+		if (dup2(fd[WRITE], STDOUT_FILENO) == -1) {
+			return -1;
+		}
+		if (dup2(fd[WRITE], STDERR_FILENO) == -1) {
+			return -1;
+		}
+		close(fd[READ]);
+		return execvp(args[0], (char **)args);
+	} else { // parent
+		close(fd[WRITE]);
+
+		int bytes_read = read(fd[READ], buf, buf_size);
+		if (bytes_read > 0) {
+			buf[bytes_read - 1] = '\0';
+		}
+
+		// Clear out any of the remaining output in the pipe in case the buffer wasn't large enough
+		struct pollfd read_poll = { .fd = fd[READ], .events = POLLIN | POLLHUP };
+		while (true) {
+			int poll_status = poll(&read_poll, 1, -1);
+			if (poll_status == -1) {
+				close(fd[READ]);
+				pr_err("Unexpected error when clearing cuda-checkpoint output buffer\n");
+				return -1;
+			}
+			if (read_poll.revents & POLLHUP) {
+				break;
+			}
+			// POLLIN, read into scratch buffer to flush things out
+			char scratch[64];
+			bytes_read = read(fd[READ], scratch, sizeof(scratch));
+		}
+
+		int status;
+		if (waitpid(child_pid, &status, 0) == -1 || !WIFEXITED(status)) {
+			pr_err("cuda-checkpoint exited improperly, couldn't complete operation\n");
+			close(fd[READ]);
+			return -1;
+		}
+
+		close(fd[READ]);
+
+		return WEXITSTATUS(status);
+	}
+}
+
+static bool cuda_checkpoint_supports_flag(const char *flag)
+{
+	char msg_buf[2048];
+	const char *args[] = { CUDA_CHECKPOINT, "-h", NULL };
+	int ret = launch_cuda_checkpoint(args, msg_buf, sizeof(msg_buf));
+	if (ret != 0) {
+		pr_err("Failed to launch cuda-checkpoint utility, check that the utility is present in your $PATH\n");
+		return false;
+	}
+
+	if (strstr(msg_buf, flag) == NULL) {
+		return false;
+	}
+
+	return true;
+}
+
+/* Retrieve the cuda restore thread TID from the root pid */
+static int get_cuda_restore_tid(int root_pid)
+{
+	char pid_buf[16];
+	char pid_out[CUDA_CKPT_BUF_SIZE];
+
+	snprintf(pid_buf, sizeof(pid_buf), "%d", root_pid);
+
+	const char *args[] = { CUDA_CHECKPOINT, "--get-restore-tid", "--pid", pid_buf, NULL };
+	int ret = launch_cuda_checkpoint(args, pid_out, sizeof(pid_out));
+	if (ret != 0) {
+		pr_err("Failed to launch cuda-checkpoint to retrieve restore tid: %s\n", pid_out);
+		return -1;
+	}
+
+	return atoi(pid_out);
+}
+
+static int cuda_process_checkpoint_action(int pid, const char *action, unsigned int timeout, char *msg_buf,
+					  int buf_size)
+{
+	char pid_buf[16];
+	char timeout_buf[16];
+
+	snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
+
+	const char *args[] = { CUDA_CHECKPOINT, "--action", action, "--pid", pid_buf, NULL /* --timeout */,
+			       NULL /* timeout_val */, NULL };
+	if (timeout > 0) {
+		snprintf(timeout_buf, sizeof(timeout_buf), "%d", timeout);
+		args[5] = "--timeout";
+		args[6] = timeout_buf;
+	}
+
+	return launch_cuda_checkpoint(args, msg_buf, buf_size);
+}
+
+static int interrupt_restore_thread(int restore_tid, k_rtsigset_t *restore_sigset)
+{
+	/* Since we resumed a thread that CRIU previously already froze we need to
+	 * INTERRUPT it once again, task was already SEIZE'd so we don't need to do
+	 * a compel_interrupt_task()
+	 */
+	if (ptrace(PTRACE_INTERRUPT, restore_tid, NULL, 0)) {
+		pr_err("Could not interrupt cuda restore tid %d after checkpoint, process may be in strange state\n",
+		       restore_tid);
+		return -1;
+	}
+
+	struct proc_status_creds creds;
+	if (compel_wait_task(restore_tid, -1, parse_pid_status, NULL, &creds.s, NULL) != COMPEL_TASK_ALIVE) {
+		pr_err("compel_wait_task failed after interrupt\n");
+		return -1;
+	}
+
+	if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, PTRACE_O_SUSPEND_SECCOMP | PTRACE_O_TRACESYSGOOD)) {
+		pr_err("Failed to set ptrace options on interrupt for restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(*restore_sigset), restore_sigset)) {
+		pr_err("Unable to restore original sigmask to restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int resume_restore_thread(int restore_tid, k_rtsigset_t *save_sigset)
+{
+	k_rtsigset_t block;
+
+	if (ptrace(PTRACE_GETSIGMASK, restore_tid, sizeof(*save_sigset), save_sigset)) {
+		pr_err("Failed to get current sigmask for restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	ksigfillset(&block);
+	ksigdelset(&block, SIGTRAP);
+
+	if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(block), &block)) {
+		pr_err("Failed to block signals on restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	// Clear out PTRACE_O_SUSPEND_SECCOMP when we resume the restore thread
+	if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, 0)) {
+		pr_err("Could not clear ptrace options on restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	if (ptrace(PTRACE_CONT, restore_tid, NULL, 0)) {
+		pr_err("Could not resume cuda restore tid %d\n", restore_tid);
+		return -1;
+	}
+
+	return 0;
+}
+
+int cuda_plugin_checkpoint_devices(int pid)
+{
+	int restore_tid;
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+	int int_ret;
+	int status;
+	k_rtsigset_t save_sigset;
+
+	if (plugin_disabled) {
+		return 0;
+	}
+
+	restore_tid = get_cuda_restore_tid(pid);
+
+	/* We can possibly hit a race with cuInit() where we are past the point of
+	 * locking the process but at lock time cuInit() hadn't completed in which
+	 * case cuda-checkpoint will report that we're in an invalid state to
+	 * checkpoint
+	 */
+	if (restore_tid == -1) {
+		pr_info("No need to checkpoint devices on pid %d\n", pid);
+		return 0;
+	}
+
+	pr_info("Checkpointing CUDA devices on pid %d restore_tid %d\n", pid, restore_tid);
+	/* We need to resume the checkpoint thread to prepare the mappings for
+	 * checkpointing
+	 */
+	if (resume_restore_thread(restore_tid, &save_sigset)) {
+		return -1;
+	}
+	status = cuda_process_checkpoint_action(pid, ACTION_CHECKPOINT, 0, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("CHECKPOINT_DEVICES failed with %s\n", msg_buf);
+		goto interrupt;
+	}
+	status = update_checkpointed_pid(&cuda_pids, pid);
+	if (status) {
+		pr_err("Failed to track checkpointed pid %d\n", pid);
+		status = cuda_process_checkpoint_action(pid, ACTION_RESTORE, 0, msg_buf, sizeof(msg_buf));
+		if (status) {
+			pr_err("Failed to restore process after error %s on pid %d\n", msg_buf, pid);
+		}
+	}
+interrupt:
+	int_ret = interrupt_restore_thread(restore_tid, &save_sigset);
+
+	return status != 0 ? status : int_ret;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, cuda_plugin_checkpoint_devices);
+
+int cuda_plugin_pause_devices(int pid)
+{
+	int restore_tid;
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+
+	if (plugin_disabled) {
+		return 0;
+	}
+
+	restore_tid = get_cuda_restore_tid(pid);
+
+	if (restore_tid == -1) {
+		pr_info("no need to pause devices on pid %d\n", pid);
+		return 0;
+	}
+
+	pr_info("pausing devices on pid %d\n", pid);
+	int status = cuda_process_checkpoint_action(pid, ACTION_LOCK, opts.timeout * 1000, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("PAUSE_DEVICES failed with %s\n", msg_buf);
+		return -1;
+	}
+	if (add_pid_to_buf(&cuda_pids, pid)) {
+		pr_err("unable to track paused pid %d\n", pid);
+		status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf));
+		if (status) {
+			pr_err("Failed to unlock process status %s, pid %d may hang\n", msg_buf, pid);
+		}
+		return -1;
+	}
+
+	return 0;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__PAUSE_DEVICES, cuda_plugin_pause_devices)
+
+int resume_device(int pid, int checkpointed)
+{
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+	int status;
+	int ret = 0;
+	int int_ret;
+	k_rtsigset_t save_sigset;
+
+	int restore_tid = get_cuda_restore_tid(pid);
+	if (restore_tid == -1) {
+		pr_info("No need to resume devices on pid %d\n", pid);
+		return 0;
+	}
+
+	pr_info("resuming devices on pid %d\n", pid);
+	/* The resuming process has to stay frozen during this time otherwise
+	 * attempting to access a UVM pointer will crash if we haven't restored the
+	 * underlying mappings yet
+	 */
+	pr_debug("Restore thread pid %d found for real pid %d\n", restore_tid, pid);
+	/* wakeup the restore thread so we can handle the restore for this pid,
+	 * rseq_cs has to be restored before execution
+	 */
+	if (resume_restore_thread(restore_tid, &save_sigset)) {
+		return -1;
+	}
+
+	if (checkpointed) {
+		status = cuda_process_checkpoint_action(pid, ACTION_RESTORE, 0, msg_buf, sizeof(msg_buf));
+		if (status) {
+			pr_err("RESUME_DEVICES RESTORE failed with %s\n", msg_buf);
+			ret = -1;
+			goto interrupt;
+		}
+	}
+
+	status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("RESUME_DEVICES UNLOCK failed with %s\n", msg_buf);
+		ret = -1;
+	}
+
+interrupt:
+	int_ret = interrupt_restore_thread(restore_tid, &save_sigset);
+
+	return ret != 0 ? ret : int_ret;
+}
+
+int cuda_plugin_resume_devices_late(int pid)
+{
+	if (plugin_disabled) {
+		return 0;
+	}
+
+	return resume_device(pid, 1);
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, cuda_plugin_resume_devices_late)
+
+int cuda_plugin_init(int stage)
+{
+	if (!cuda_checkpoint_supports_flag("--action")) {
+		pr_warn("cuda-checkpoint --action flag not supported, an r555 or higher version driver is required. Disabling CUDA plugin\n");
+		plugin_disabled = true;
+		return 0;
+	}
+
+	pr_info("initialized: %s stage %d\n", CR_PLUGIN_DESC.name, stage);
+
+	/* In the DUMP stage track all the PID's we've paused CUDA operations on to
+	 * release them when we're done if the user requested the leave-running option
+	 */
+	if (stage == CR_PLUGIN_STAGE__DUMP) {
+		INIT_LIST_HEAD(&cuda_pids);
+	}
+
+	return 0;
+}
+
+void cuda_plugin_fini(int stage, int ret)
+{
+	if (plugin_disabled) {
+		return;
+	}
+
+	pr_info("finished %s stage %d err %d\n", CR_PLUGIN_DESC.name, stage, ret);
+
+	/* Release all the paused PID's at the end of the DUMP stage in case the
+	 * user provides the -R (leave-running) flag or an error occurred
+	 */
+	if (stage == CR_PLUGIN_STAGE__DUMP && (opts.final_state == TASK_ALIVE || ret != 0)) {
+		struct pid_info *info;
+		list_for_each_entry(info, &cuda_pids, list) {
+			resume_device(info->pid, info->checkpointed);
+		}
+	}
+	if (stage == CR_PLUGIN_STAGE__DUMP) {
+		dealloc_pid_buffer(&cuda_pids);
+	}
+}
+CR_PLUGIN_REGISTER("cuda_plugin", cuda_plugin_init, cuda_plugin_fini)