criu/plugins/amdgpu/amdgpu_plugin_topology.c


#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <linux/limits.h>

#include <dirent.h>
#include "common/list.h"

#include "xmalloc.h"
#include "kfd_ioctl.h"
#include "amdgpu_plugin_util.h"
#include "amdgpu_plugin_topology.h"

#define TOPOLOGY_PATH "/sys/class/kfd/kfd/topology/nodes/"
#define MAX_PARAMETER_LEN 64

/* User override options */
/* Skip firmware version check */
bool kfd_fw_version_check = true;
/* Skip SDMA firmware version check */
bool kfd_sdma_fw_version_check = true;
/* Skip caches count check */
bool kfd_caches_count_check = true;
/* Skip num gws check */
bool kfd_num_gws_check = true;
/* Skip vram size check */
bool kfd_vram_size_check = true;
/* Preserve NUMA regions */
bool kfd_numa_check = true;
/* Skip capability check */
bool kfd_capability_check = true;

/*
 * During dump, we can use any fd value so fd_next is always -1.
 * During restore, we have to use a fd value that does not conflict with fd values in use by the target restore process.
 * fd_next is initialized as 1 greater than the highest-numbered file descriptor used by the target restore process.
 */
int fd_next = -1;

int open_drm_render_device(int minor)
{
	char path[128];
	int fd, ret_fd;

	if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
		pr_perror("DRM render minor %d out of range [%d, %d]", minor, DRM_FIRST_RENDER_NODE,
			  DRM_LAST_RENDER_NODE);
		return -EINVAL;
	}

	snprintf(path, sizeof(path), "/dev/dri/renderD%d", minor);
	fd = open(path, O_RDWR | O_CLOEXEC);
	if (fd < 0) {
		if (errno != ENOENT && errno != EPERM) {
			pr_err("Failed to open %s: %s\n", path, strerror(errno));
			if (errno == EACCES)
				pr_err("Check user is in \"video\" group\n");
		}
		return -EBADFD;
	}

	if (fd_next < 0)
		return fd;

	ret_fd = fcntl(fd, F_DUPFD, fd_next++);
	close(fd);

	if (ret_fd < 0)
		pr_perror("Failed to duplicate fd for minor:%d (fd_next:%d)", minor, fd_next);

	return ret_fd;
}

static const char *link_type(uint32_t type)
{
	switch (type) {
	case TOPO_IOLINK_TYPE_PCIE:
		return "PCIe";
	case TOPO_IOLINK_TYPE_XGMI:
		return "XGMI";
	}
	return "Unsupported";
}

static struct tp_node *p2pgroup_get_node_by_gpu_id(const struct tp_p2pgroup *group, const uint32_t gpu_id)
{
	struct tp_node *node;

	list_for_each_entry(node, &group->nodes, listm_p2pgroup) {
		if (node->gpu_id == gpu_id)
			return node;
	}
	return NULL;
}

int node_get_drm_render_device(struct tp_node *node)
{
	if (node->drm_fd < 0)
		node->drm_fd = open_drm_render_device(node->drm_render_minor);

	return node->drm_fd;
}

void sys_close_drm_render_devices(struct tp_system *sys)
{
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		if (node->drm_fd >= 0) {
			close(node->drm_fd);
			node->drm_fd = -1;
		}
	}
}

static struct tp_iolink *node_get_iolink_to_node_id(const struct tp_node *node, const uint32_t type,
						    const uint32_t node_id)
{
	struct tp_iolink *iolink;

	list_for_each_entry(iolink, &node->iolinks, listm) {
		if (iolink->node_to_id == node_id && iolink->type == type)
			return iolink;
	}
	return NULL;
}

struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor)
{
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		if (node->drm_render_minor == drm_render_minor)
			return node;
	}
	return NULL;
}

struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index)
{
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		if (NODE_IS_GPU(node) && index-- == 0)
			return node;
	}
	return NULL;
}

struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32_t gpu_id)
{
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		if (node->gpu_id == gpu_id)
			return node;
	}
	return NULL;
}

static struct tp_node *sys_get_node_by_node_id(const struct tp_system *sys, const uint32_t node_id)
{
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		if (node->id == node_id)
			return node;
	}
	return NULL;
}

static struct tp_p2pgroup *sys_get_p2pgroup_with_gpu_id(const struct tp_system *sys, const int type,
							const uint32_t gpu_id)
{
	struct tp_p2pgroup *p2pgroup;

	list_for_each_entry(p2pgroup, &sys->xgmi_groups, listm_system) {
		if (p2pgroup->type != type)
			continue;

		if (p2pgroup_get_node_by_gpu_id(p2pgroup, gpu_id))
			return p2pgroup;
	}
	return NULL;
}

static struct tp_iolink *get_tp_peer_iolink(const struct tp_node *from_node, const struct tp_node *to_node,
					    const uint8_t type)
{
	struct tp_iolink *iolink;

	list_for_each_entry(iolink, &from_node->iolinks, listm) {
		if (iolink->node_to_id == to_node->id && iolink->type == type)
			return iolink;
	}
	return NULL;
}

static bool maps_dest_cpu_mapped(const struct device_maps *maps, const uint32_t dest_id)
{
	struct id_map *id_map;

	list_for_each_entry(id_map, &maps->cpu_maps, listm) {
		if (id_map->dest == dest_id)
			return true;
	}
	return false;
}

static uint32_t maps_get_dest_cpu(const struct device_maps *maps, const uint32_t src_id)
{
	struct id_map *id_map;

	list_for_each_entry(id_map, &maps->cpu_maps, listm) {
		if (id_map->src == src_id)
			return id_map->dest;
	}
	return INVALID_CPU_ID;
}

bool maps_dest_gpu_mapped(const struct device_maps *maps, const uint32_t dest_id)
{
	struct id_map *id_map;

	list_for_each_entry(id_map, &maps->gpu_maps, listm) {
		if (id_map->dest == dest_id)
			return true;
	}
	return false;
}

uint32_t maps_get_dest_gpu(const struct device_maps *maps, const uint32_t src_id)
{
	struct id_map *id_map;

	list_for_each_entry(id_map, &maps->gpu_maps, listm) {
		if (id_map->src == src_id)
			return id_map->dest;
	}
	return 0;
}

static struct id_map *maps_add_cpu_entry(struct device_maps *maps, const uint32_t src_id, const uint32_t dest_id)
{
	struct id_map *id_map = xzalloc(sizeof(*id_map));

	if (!id_map) {
		pr_err("Failed to allocate memory for id_map\n");
		return NULL;
	}

	id_map->src = src_id;
	id_map->dest = dest_id;

	list_add_tail(&id_map->listm, &maps->cpu_maps);

	maps->tail_cpu = &id_map->listm;

	pr_debug("Added CPU mapping [%02d -> %02d]\n", src_id, dest_id);
	return id_map;
}

struct id_map *maps_add_gpu_entry(struct device_maps *maps, const uint32_t src_id, const uint32_t dest_id)
{
	struct id_map *id_map = xzalloc(sizeof(*id_map));

	if (!id_map) {
		pr_err("Failed to allocate memory for id_map\n");
		return NULL;
	}

	id_map->src = src_id;
	id_map->dest = dest_id;

	list_add_tail(&id_map->listm, &maps->gpu_maps);

	maps->tail_gpu = &id_map->listm;

	pr_debug("Added GPU mapping [0x%04X -> 0x%04X]\n", src_id, dest_id);
	return id_map;
}

static void maps_print(struct device_maps *maps)
{
	struct id_map *id_map;

	pr_info("===Maps===============\n");
	list_for_each_entry(id_map, &maps->gpu_maps, listm)
		pr_info("GPU: 0x%04X -> 0x%04X\n", id_map->src, id_map->dest);

	list_for_each_entry(id_map, &maps->cpu_maps, listm)
		pr_info("CPU: %02d -> %02d\n", id_map->src, id_map->dest);
	pr_info("======================\n");
}

void maps_init(struct device_maps *maps)
{
	INIT_LIST_HEAD(&maps->cpu_maps);
	INIT_LIST_HEAD(&maps->gpu_maps);
	maps->tail_cpu = 0;
	maps->tail_gpu = 0;
}

void maps_free(struct device_maps *maps)
{
	while (!list_empty(&maps->cpu_maps)) {
		struct id_map *map = list_first_entry(&maps->cpu_maps, struct id_map, listm);

		list_del(&map->listm);
		xfree(map);
	}
	while (!list_empty(&maps->gpu_maps)) {
		struct id_map *map = list_first_entry(&maps->gpu_maps, struct id_map, listm);

		list_del(&map->listm);
		xfree(map);
	}
}

static void maps_pop(struct device_maps *maps, struct device_maps *remove)
{
	if (remove->tail_cpu)
		list_cut_position(&remove->cpu_maps, &maps->cpu_maps, remove->tail_cpu);

	if (remove->tail_gpu)
		list_cut_position(&remove->gpu_maps, &maps->gpu_maps, remove->tail_gpu);

	maps_free(remove);
}

static int maps_push(struct device_maps *maps, struct device_maps *new)
{
	struct id_map *src_id_map, *dest_id_map;

	list_for_each_entry(src_id_map, &new->cpu_maps, listm) {
		list_for_each_entry(dest_id_map, &maps->cpu_maps, listm) {
			if (src_id_map->src == dest_id_map->src || src_id_map->dest == dest_id_map->dest) {
				pr_err("CPU mapping already exists src [%02d->%02d] new [%02d->%02d]\n",
				       src_id_map->src, src_id_map->dest, dest_id_map->src, dest_id_map->dest);
				return -EINVAL;
			}
		}
	}
	list_for_each_entry(src_id_map, &new->gpu_maps, listm) {
		list_for_each_entry(dest_id_map, &maps->gpu_maps, listm) {
			if (src_id_map->src == dest_id_map->src || src_id_map->dest == dest_id_map->dest) {
				pr_err("GPU mapping already exists src [0x%04X -> 0x%04X] new [0x%04X -> 0x%04X]\n",
				       src_id_map->src, src_id_map->dest, dest_id_map->src, dest_id_map->dest);
				return -EINVAL;
			}
		}
	}

	list_splice(&new->cpu_maps, &maps->cpu_maps);
	list_splice(&new->gpu_maps, &maps->gpu_maps);

	return 0;
}

struct tp_iolink *node_add_iolink(struct tp_node *node, uint32_t type, uint32_t node_to_id)
{
	struct tp_iolink *iolink = xzalloc(sizeof(*iolink));

	if (!iolink)
		return NULL;

	iolink->type = type;
	/* iolink->node_to will be filled in topology_determine_iolinks */
	iolink->node_to_id = node_to_id;
	iolink->node_from = node;

	list_add_tail(&iolink->listm, &node->iolinks);
	return iolink;
}

struct tp_p2pgroup *sys_add_group(struct tp_system *sys, uint32_t type)
{
	struct tp_p2pgroup *group;

	group = xzalloc(sizeof(*group));
	if (!group)
		return NULL;

	INIT_LIST_HEAD(&group->nodes);
	group->type = type;
	list_add_tail(&group->listm_system, &sys->xgmi_groups);
	if (type == TOPO_IOLINK_TYPE_XGMI)
		sys->num_xgmi_groups++;

	return group;
}

struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id)
{
	struct tp_node *node = NULL;

	node = xzalloc(sizeof(*node));
	if (!node)
		return NULL;

	node->id = id;
	node->gpu_id = gpu_id;
	node->drm_fd = -1;
	INIT_LIST_HEAD(&node->iolinks);
	list_add_tail(&node->listm_system, &sys->nodes);
	sys->num_nodes++;

	return node;
}

static bool get_prop(char *line, char *name, uint64_t *value)
{
	char format[16];
	sprintf(format, " %%%ds %%lu", MAX_PARAMETER_LEN);
	if (sscanf(line, format, name, value) != 2)
		return false;
	return true;
}

/* Parse node properties in /sys/class/kfd/kfd/topology/nodes/N/properties */
static int parse_topo_node_properties(struct tp_node *dev, const char *dir_path)
{
	FILE *file;
	char path[300];
	char line[300];

	sprintf(path, "%s/properties", dir_path);
	file = fopen(path, "r");
	if (!file) {
		pr_perror("Failed to access %s", path);
		return -EFAULT;
	}

	while (fgets(line, sizeof(line), file)) {
		char name[MAX_PARAMETER_LEN + 1];
		uint64_t value;

		memset(name, 0, sizeof(name));
		if (!get_prop(line, name, &value))
			goto fail;

		if (!strcmp(name, "cpu_cores_count"))
			dev->cpu_cores_count = (uint32_t)value;
		else if (!strcmp(name, "simd_count"))
			dev->simd_count = (uint32_t)value;
		else if (!strcmp(name, "mem_banks_count"))
			dev->mem_banks_count = (uint32_t)value;
		else if (!strcmp(name, "caches_count"))
			dev->caches_count = (uint32_t)value;
		else if (!strcmp(name, "io_links_count"))
			dev->io_links_count = (uint32_t)value;
		else if (!strcmp(name, "max_waves_per_simd"))
			dev->max_waves_per_simd = (uint32_t)value;
		else if (!strcmp(name, "lds_size_in_kb"))
			dev->lds_size_in_kb = (uint32_t)value;
		else if (!strcmp(name, "num_gws"))
			dev->num_gws = (uint32_t)value;
		else if (!strcmp(name, "wave_front_size"))
			dev->wave_front_size = (uint32_t)value;
		else if (!strcmp(name, "array_count"))
			dev->array_count = (uint32_t)value;
		else if (!strcmp(name, "simd_arrays_per_engine"))
			dev->simd_arrays_per_engine = (uint32_t)value;
		else if (!strcmp(name, "cu_per_simd_array"))
			dev->cu_per_simd_array = (uint32_t)value;
		else if (!strcmp(name, "simd_per_cu"))
			dev->simd_per_cu = (uint32_t)value;
		else if (!strcmp(name, "max_slots_scratch_cu"))
			dev->max_slots_scratch_cu = (uint32_t)value;
		else if (!strcmp(name, "vendor_id"))
			dev->vendor_id = (uint32_t)value;
		else if (!strcmp(name, "device_id"))
			dev->device_id = (uint32_t)value;
		else if (!strcmp(name, "domain"))
			dev->domain = (uint32_t)value;
		else if (!strcmp(name, "drm_render_minor"))
			dev->drm_render_minor = (uint32_t)value;
		else if (!strcmp(name, "hive_id"))
			dev->hive_id = value;
		else if (!strcmp(name, "num_sdma_engines"))
			dev->num_sdma_engines = (uint32_t)value;
		else if (!strcmp(name, "num_sdma_xgmi_engines"))
			dev->num_sdma_xgmi_engines = (uint32_t)value;
		else if (!strcmp(name, "num_sdma_queues_per_engine"))
			dev->num_sdma_queues_per_engine = (uint32_t)value;
		else if (!strcmp(name, "num_cp_queues"))
			dev->num_cp_queues = (uint32_t)value;
		else if (!strcmp(name, "fw_version"))
			dev->fw_version = (uint32_t)value;
		else if (!strcmp(name, "capability"))
			dev->capability = (uint32_t)value;
		else if (!strcmp(name, "sdma_fw_version"))
			dev->sdma_fw_version = (uint32_t)value;

		if (!dev->gpu_id && dev->cpu_cores_count >= 1) {
			/* This is a CPU - we do not need to parse the other information */
			break;
		}
	}

	fclose(file);
	return 0;
fail:
	pr_err("Failed to parse line = %s\n", line);
	fclose(file);
	return -EINVAL;
}

/* Parse node memory properties in /sys/class/kfd/kfd/topology/nodes/N/mem_banks */
static int parse_topo_node_mem_banks(struct tp_node *node, const char *dir_path)
{
	struct dirent *dirent_node;
	DIR *d_node;
	char path[300];
	FILE *file = NULL;
	uint32_t heap_type = 0;
	uint64_t mem_size = 0;
	int ret;

	if (!NODE_IS_GPU(node))
		return 0;

	sprintf(path, "%s/mem_banks", dir_path);

	d_node = opendir(path);
	if (!d_node) {
		pr_perror("Can't open %s", path);
		return -EACCES;
	}

	while ((dirent_node = readdir(d_node)) != NULL) {
		char line[300];
		char bank_path[1024];
		struct stat st;
		int id;

		heap_type = 0;
		mem_size = 0;

		/* Only parse numeric directories */
		if (sscanf(dirent_node->d_name, "%d", &id) != 1)
			continue;

		snprintf(bank_path, sizeof(bank_path), "%s/%s", path, dirent_node->d_name);
		if (stat(bank_path, &st)) {
			pr_err("Cannot to access %s\n", path);
			ret = -EACCES;
			goto fail;
		}
		if ((st.st_mode & S_IFMT) == S_IFDIR) {
			char properties_path[PATH_MAX];

			snprintf(properties_path, sizeof(properties_path), "%s/properties", bank_path);

			file = fopen(properties_path, "r");
			if (!file) {
				pr_perror("Failed to access %s", properties_path);
				ret = -EACCES;
				goto fail;
			}

			while (fgets(line, sizeof(line), file)) {
				char name[MAX_PARAMETER_LEN + 1];
				uint64_t value;

				memset(name, 0, sizeof(name));
				if (!get_prop(line, name, &value)) {
					ret = -EINVAL;
					goto fail;
				}

				if (!strcmp(name, "heap_type"))
					heap_type = (uint32_t)value;
				if (!strcmp(name, "size_in_bytes"))
					mem_size = value;
			}

			fclose(file);
			file = NULL;
		}

		if (heap_type == TOPO_HEAP_TYPE_PUBLIC || heap_type == TOPO_HEAP_TYPE_PRIVATE)
			break;
	}

	if ((heap_type != TOPO_HEAP_TYPE_PUBLIC && heap_type != TOPO_HEAP_TYPE_PRIVATE) || !mem_size) {
		pr_err("Failed to determine memory type and size for device in %s\n", dir_path);
		ret = -EINVAL;
		goto fail;
	}

	node->vram_public = (heap_type == TOPO_HEAP_TYPE_PUBLIC);
	node->vram_size = mem_size;
	closedir(d_node);
	return 0;
fail:
	if (file)
		fclose(file);
	closedir(d_node);
	return ret;
}

/* Parse node iolinks properties in /sys/class/kfd/kfd/topology/nodes/N/io_links */
static int parse_topo_node_iolinks(struct tp_node *node, const char *dir_path)
{
	struct dirent *dirent_node;
	DIR *d_node;
	char path[300];
	FILE *file = NULL;
	int ret = 0;

	snprintf(path, sizeof(path), "%s/io_links", dir_path);

	d_node = opendir(path);
	if (!d_node) {
		pr_perror("Can't open %s", path);
		return -EACCES;
	}

	while ((dirent_node = readdir(d_node)) != NULL) {
		char line[300];
		char iolink_path[1024];
		struct stat st;
		int id;

		uint32_t iolink_type = 0;
		uint32_t node_to_id = 0;

		/* Only parse numeric directories */
		if (sscanf(dirent_node->d_name, "%d", &id) != 1)
			continue;

		snprintf(iolink_path, sizeof(iolink_path), "%s/%s", path, dirent_node->d_name);
		if (stat(iolink_path, &st)) {
			pr_err("Cannot to access %s\n", path);
			ret = -EACCES;
			goto fail;
		}
		if ((st.st_mode & S_IFMT) == S_IFDIR) {
			char properties_path[PATH_MAX];

			snprintf(properties_path, sizeof(properties_path), "%s/properties", iolink_path);

			file = fopen(properties_path, "r");
			if (!file) {
				pr_perror("Failed to access %s", properties_path);
				ret = -EACCES;
				goto fail;
			}

			while (fgets(line, sizeof(line), file)) {
				char name[MAX_PARAMETER_LEN + 1];
				uint64_t value;

				memset(name, 0, sizeof(name));
				if (!get_prop(line, name, &value)) {
					ret = -EINVAL;
					goto fail;
				}

				if (!strcmp(name, "type"))
					iolink_type = (uint32_t)value;
				if (!strcmp(name, "node_to"))
					node_to_id = (uint32_t)value;
			}
			fclose(file);
			file = NULL;
		}

		/* We only store the link information for now, then once all topology parsing is
		 * finished we will confirm iolinks
		 */
		if (iolink_type == TOPO_IOLINK_TYPE_PCIE || iolink_type == TOPO_IOLINK_TYPE_XGMI) {
			if (!node_add_iolink(node, iolink_type, node_to_id)) {
				ret = -ENOMEM;
				goto fail;
			}
		}
	}
	closedir(d_node);
	return 0;
fail:
	if (file)
		fclose(file);

	closedir(d_node);
	return ret;
}

/* Parse a node (CPU or GPU) in /sys/class/kfd/kfd/topology/nodes/N */
static int parse_topo_node(struct tp_node *node, const char *dir_path)
{
	if (parse_topo_node_properties(node, dir_path)) {
		pr_err("Failed to parse node properties\n");
		return -EINVAL;
	}
	if (parse_topo_node_mem_banks(node, dir_path)) {
		pr_err("Failed to parse node mem_banks\n");
		return -EINVAL;
	}
	if (parse_topo_node_iolinks(node, dir_path)) {
		pr_err("Failed to parse node iolinks\n");
		return -EINVAL;
	}
	return 0;
}

static const char *p2pgroup_to_str(struct tp_p2pgroup *group)
{
	static char topology_printstr[200];
	struct tp_node *node;
	size_t str_len = 0;

	topology_printstr[0] = '\0';
	str_len += sprintf(&topology_printstr[str_len], "type:%s:", link_type(group->type));

	list_for_each_entry(node, &group->nodes, listm_p2pgroup) {
		str_len += sprintf(&topology_printstr[str_len], "0x%04X ", node->gpu_id);
	}
	return topology_printstr;
}

static const char *mapping_list_to_str(struct list_head *node_list)
{
	static char topology_printstr[200];
	struct tp_node *node;
	size_t str_len = 0;

	topology_printstr[0] = '\0';
	list_for_each_entry(node, node_list, listm_mapping)
		str_len += sprintf(&topology_printstr[str_len], "0x%04X ", node->gpu_id);

	return topology_printstr;
}

void topology_print(const struct tp_system *sys, const char *message)
{
	struct tp_node *node;
	struct tp_p2pgroup *xgmi_group;

	pr_info("===System Topology=[%12s]==================================\n", message);
	list_for_each_entry(node, &sys->nodes, listm_system) {
		struct tp_iolink *iolink;

		if (!NODE_IS_GPU(node)) {
			pr_info("[%d] CPU\n", node->id);
			pr_info("     cpu_cores_count:%u\n", node->cpu_cores_count);
		} else {
			pr_info("[%d] GPU gpu_id:0x%04X\n", node->id, node->gpu_id);
			pr_info("     vendor_id:%u device_id:%u\n", node->vendor_id, node->device_id);
			pr_info("     vram_public:%c vram_size:%lu\n", node->vram_public ? 'Y' : 'N', node->vram_size);
			pr_info("     io_links_count:%u capability:%u\n", node->io_links_count, node->capability);
			pr_info("     mem_banks_count:%u caches_count:%d lds_size_in_kb:%u\n", node->mem_banks_count,
				node->caches_count, node->lds_size_in_kb);
			pr_info("     simd_count:%u max_waves_per_simd:%u\n", node->simd_count,
				node->max_waves_per_simd);
			pr_info("     num_gws:%u wave_front_size:%u array_count:%u\n", node->num_gws,
				node->wave_front_size, node->array_count);
			pr_info("     simd_arrays_per_engine:%u simd_per_cu:%u\n", node->simd_arrays_per_engine,
				node->simd_per_cu);
			pr_info("     max_slots_scratch_cu:%u cu_per_simd_array:%u\n", node->max_slots_scratch_cu,
				node->cu_per_simd_array);
			pr_info("     num_sdma_engines:%u\n", node->num_sdma_engines);
			pr_info("     num_sdma_xgmi_engines:%u num_sdma_queues_per_engine:%u\n",
				node->num_sdma_xgmi_engines, node->num_sdma_queues_per_engine);
			pr_info("     num_cp_queues:%u fw_version:%u sdma_fw_version:%u\n", node->num_cp_queues,
				node->fw_version, node->sdma_fw_version);
		}
		list_for_each_entry(iolink, &node->iolinks, listm) {
			if (!iolink->valid)
				continue;

			pr_info("     iolink type:%s node-to:%d (0x%04X) node-from:%d bi-dir:%s\n",
				link_type(iolink->type), iolink->node_to_id, iolink->node_to->gpu_id,
				iolink->node_from->id, iolink->peer ? "Y" : "N");
		}
	}

	pr_info("===Groups==========================================================\n");
	list_for_each_entry(xgmi_group, &sys->xgmi_groups, listm_system)
		pr_info("%s\n", p2pgroup_to_str(xgmi_group));
	pr_info("===================================================================\n");
}

void topology_init(struct tp_system *sys)
{
	memset(sys, 0, sizeof(*sys));
	INIT_LIST_HEAD(&sys->nodes);
	INIT_LIST_HEAD(&sys->xgmi_groups);
}

void topology_free(struct tp_system *sys)
{
	while (!list_empty(&sys->nodes)) {
		struct tp_node *node = list_first_entry(&sys->nodes, struct tp_node, listm_system);

		list_del(&node->listm_system);

		while (!list_empty(&node->iolinks)) {
			struct tp_iolink *iolink = list_first_entry(&node->iolinks, struct tp_iolink, listm);

			list_del(&iolink->listm);
			xfree(iolink);
		}
		xfree(node);
	}

	while (!list_empty(&sys->xgmi_groups)) {
		struct tp_p2pgroup *p2pgroup = list_first_entry(&sys->xgmi_groups, struct tp_p2pgroup, listm_system);

		list_del(&p2pgroup->listm_system);
		xfree(p2pgroup);
	}

	/* Update Topology as being freed */
	sys->parsed = false;
}

/**
 * @brief Validates iolinks and determine XGMI hives in a system topology
 *
 * On some systems, some GPUs may not be accessible because they are masked by cgroups, but the
 * iolinks to these GPUs are still visible. If the peer GPU is not accessible, we consider that link
 * invalid.
 * In a XGMI hive, each GPU will have a bi-directional iolink to every other GPU. So we create a
 * XGMI group (hive) and add all the GPUs in that hive to the group when iterating over the first
 * GPU in that group.
 *
 * @param sys system topology
 * @return 0 if successful, errno if failed.
 */
int topology_determine_iolinks(struct tp_system *sys)
{
	int ret = 0;
	struct tp_node *node;

	list_for_each_entry(node, &sys->nodes, listm_system) {
		struct tp_iolink *iolink;

		list_for_each_entry(iolink, &node->iolinks, listm) {
			struct tp_p2pgroup *group = NULL;
			struct tp_node *peer_node = NULL;
			struct tp_iolink *peer_iolink = NULL;

			peer_node = sys_get_node_by_node_id(sys, iolink->node_to_id);
			if (!peer_node) {
				/* node not accessible, usually because it is masked by cgroups */
				iolink->valid = false;
				continue;
			}
			iolink->valid = true;
			node->num_valid_iolinks++;

			iolink->node_to = peer_node;
			peer_iolink = get_tp_peer_iolink(peer_node, node, iolink->type);
			if (!peer_iolink)
				continue; /* This is a one-dir link */

			/* We confirmed both sides have same type of iolink */
			iolink->peer = peer_iolink;
			peer_iolink->peer = iolink;

			if (iolink->type == TOPO_IOLINK_TYPE_XGMI) {
				group = sys_get_p2pgroup_with_gpu_id(sys, iolink->type, node->gpu_id);
				if (!group) {
					/* This GPU does not already belong to a group so we create
					 * a new group
					 */
					group = sys_add_group(sys, iolink->type);
					if (!group) {
						ret = -ENOMEM;
						goto fail;
					}
					list_add_tail(&node->listm_p2pgroup, &group->nodes);
				}

				/* Also add peer GPU to this group */
				if (!p2pgroup_get_node_by_gpu_id(group, peer_node->gpu_id))
					list_add_tail(&peer_node->listm_p2pgroup, &group->nodes);
			}
		}
	}

fail:
	/* In case of failure, caller function will call topology_free which will free groups that
	 * were successfully allocated
	 */
	return ret;
}

/**
 * @brief Parse system topology
 *
 * Parse system topology exposed by the drivers in /sys/class/kfd/kfd/topology and fill in the
 * system topology structure.
 *
 * @param sys system topology structure to be filled by this function
 * @param message print this message when printing the topology to logs
 * @return 0 if successful, errno if failed.
 */
int topology_parse(struct tp_system *sys, const char *message)
{
	struct dirent *dirent_system;
	DIR *d_system;
	char path[300];
	int ret;

	if (sys->parsed)
		return 0;

	sys->parsed = true;
	INIT_LIST_HEAD(&sys->nodes);
	INIT_LIST_HEAD(&sys->xgmi_groups);

	d_system = opendir(TOPOLOGY_PATH);
	if (!d_system) {
		pr_perror("Can't open %s", TOPOLOGY_PATH);
		return -EACCES;
	}

	while ((dirent_system = readdir(d_system)) != NULL) {
		struct stat stbuf;
		int id, fd;

		/* Only parse numeric directories */
		if (sscanf(dirent_system->d_name, "%d", &id) != 1)
			continue;

		sprintf(path, "%s%s", TOPOLOGY_PATH, dirent_system->d_name);
		if (stat(path, &stbuf)) {
			/* When cgroup is masking some devices, the path exists, but it is not
			 * accessible, this is not an error
			 */
			pr_info("Cannot to access %s\n", path);
			continue;
		}

		if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {
			struct tp_node *node;
			int len;
			char gpu_id_path[300];
			char read_buf[7]; /* Max gpu_id len is 6 chars */
			unsigned int gpu_id;

			sprintf(gpu_id_path, "%s/%s/gpu_id", TOPOLOGY_PATH, dirent_system->d_name);
			fd = open(gpu_id_path, O_RDONLY);
			if (fd < 0) {
				pr_perror("Failed to access %s", gpu_id_path);
				continue;
			}

			len = read(fd, read_buf, sizeof(read_buf) - 1);
			close(fd);
			if (len < 0)
				continue;

			read_buf[len] = '\0';

			if (sscanf(read_buf, "%d", &gpu_id) != 1)
				continue;

			node = sys_add_node(sys, id, gpu_id);
			if (!node) {
				ret = -ENOMEM;
				goto fail;
			}

			if (parse_topo_node(node, path)) {
				pr_err("Failed to parse node %s\n", path);
				ret = -EINVAL;
				goto fail;
			}
		}
	}
	closedir(d_system);
	return 0;

fail:
	topology_free(sys);
	return ret;
}

static bool device_properties_match(struct tp_node *src, struct tp_node *dest)
{
	if (src->simd_count == dest->simd_count && src->mem_banks_count == dest->mem_banks_count &&
	    src->io_links_count == dest->io_links_count && src->max_waves_per_simd == dest->max_waves_per_simd &&
	    src->lds_size_in_kb == dest->lds_size_in_kb && src->wave_front_size == dest->wave_front_size &&
	    src->array_count == dest->array_count && src->simd_arrays_per_engine == dest->simd_arrays_per_engine &&
	    src->cu_per_simd_array == dest->cu_per_simd_array && src->simd_per_cu == dest->simd_per_cu &&
	    src->max_slots_scratch_cu == dest->max_slots_scratch_cu && src->vendor_id == dest->vendor_id &&
	    src->device_id == dest->device_id && src->num_sdma_engines == dest->num_sdma_engines &&
	    src->num_sdma_xgmi_engines == dest->num_sdma_xgmi_engines &&
	    src->num_sdma_queues_per_engine == dest->num_sdma_queues_per_engine &&
	    src->num_cp_queues == dest->num_cp_queues && src->vram_public == dest->vram_public &&
	    (!kfd_capability_check || (src->capability == dest->capability)) &&
	    (!kfd_vram_size_check || (src->vram_size <= dest->vram_size)) &&
	    (!kfd_num_gws_check || (src->num_gws <= dest->num_gws)) &&
	    (!kfd_caches_count_check || (src->caches_count <= dest->caches_count)) &&
	    (!kfd_fw_version_check || (src->fw_version <= dest->fw_version)) &&
	    (!kfd_sdma_fw_version_check || (src->sdma_fw_version <= dest->sdma_fw_version))) {
		return true;
	}
	return false;
}

/**
 * @brief Determines whether iolink dest can be used to replace src
 *
 * @param src source iolink
 * @param dest destination iolink
 * @return true if dest can replace src
 */
static bool iolink_match(struct tp_iolink *src, struct tp_iolink *dest)
{
	if (!src->valid)
		return true;

	if (!dest->valid)
		return false;

	if (NODE_IS_GPU(src->node_to) != NODE_IS_GPU(dest->node_to))
		return false;

	/* XGMI link can replace PCIE links */
	if (src->type == TOPO_IOLINK_TYPE_XGMI && dest->type == TOPO_IOLINK_TYPE_PCIE)
		return false;

	/* bi-directional links can replace uni-directional links */
	if (src->peer != NULL && dest->peer == NULL)
		return false;

	return true;
}

/**
 * @brief Determines whether src_node can be mapped to dest_node
 *
 * Nodes compatibility are determined by:
 * 1. Comparing the node properties
 * 2. Making sure iolink mappings to CPUs would be compatible with existing iolink mappings in maps
 *
 * If src_node and dest_node are mappable, then map_device will push the new mapping
 * for src_node -> dest_node into new_maps.
 * @param src_sys system topology information on source system
 * @param dest_sys system topology information on destination system
 * @param src_node source GPU
 * @param dest_node destination GPU
 * @param maps list of existing device maps
 * @param new_maps if nodes are mappable, then GPU and CPU mappings will be added to this list
 * @return true if src_node and dest_node are mappable
 */
static bool map_device(struct tp_system *src_sys, struct tp_system *dest_sys, struct tp_node *src_node,
		       struct tp_node *dest_node, struct device_maps *maps, struct device_maps *new_maps)
{
	struct tp_iolink *src_iolink;

	pr_debug("Evaluating mapping nodes [0x%04X -> 0x%04X]\n", src_node->gpu_id, dest_node->gpu_id);

	/* Compare GPU properties from /sys/class/kfd/kfd/topology/nodes/N/properties */
	if (!device_properties_match(src_node, dest_node)) {
		pr_debug("[0x%04X -> 0x%04X] Device properties do not match\n", src_node->gpu_id, dest_node->gpu_id);
		return false;
	}

	if (src_node->num_valid_iolinks > dest_node->num_valid_iolinks) {
		pr_debug("[0x%04X -> 0x%04X] Mismatch between number of iolinks\n", src_node->gpu_id,
			 dest_node->gpu_id);
		return false;
	}

	list_for_each_entry(src_iolink, &src_node->iolinks, listm) {
		/* Go through list of iolinks to CPU and compare them */

		if (!NODE_IS_GPU(src_iolink->node_to)) {
			bool matched_iolink = false;
			/* This is a iolink to CPU */
			pr_debug("Found link to CPU node:%02d\n", src_iolink->node_to->id);

			if (!kfd_numa_check) {
				struct tp_iolink *dest_iolink;

				list_for_each_entry(dest_iolink, &dest_node->iolinks, listm) {
					if (iolink_match(src_iolink, dest_iolink))
						matched_iolink = true;
				}
			} else {
				uint32_t dest_cpu_node_id;

				dest_cpu_node_id = maps_get_dest_cpu(maps, src_iolink->node_to->id);
				if (dest_cpu_node_id == INVALID_CPU_ID)
					dest_cpu_node_id = maps_get_dest_cpu(new_maps, src_iolink->node_to->id);

				if (dest_cpu_node_id == INVALID_CPU_ID) {
					struct tp_iolink *dest_iolink;
					list_for_each_entry(dest_iolink, &dest_node->iolinks, listm) {
						if (iolink_match(src_iolink, dest_iolink) &&
						    !maps_dest_cpu_mapped(maps, dest_iolink->node_to->id) &&
						    !maps_dest_cpu_mapped(new_maps, dest_iolink->node_to->id)) {
							if (!maps_add_cpu_entry(new_maps, src_iolink->node_to->id,
										dest_iolink->node_to->id))
								/* This is a critical error because
								 * we are out of memory
								 */
								return false;

							matched_iolink = true;
							break;
						}
					}
				} else {
					pr_debug("Existing CPU mapping found [%02d-%02d]\n", src_iolink->node_to->id,
						 dest_cpu_node_id);
					/* Confirm that the link to this CPU is same or better */

					struct tp_iolink *dest_iolink = node_get_iolink_to_node_id(
						dest_node, src_iolink->type, dest_cpu_node_id);

					if (dest_iolink && iolink_match(src_iolink, dest_iolink))
						matched_iolink = true;
				}
			}
			if (!matched_iolink) {
				pr_debug("[0x%04X -> 0x%04X] Mismatch between iolink to CPU\n", src_node->gpu_id,
					 dest_node->gpu_id);

				return false;
			}
		} else {
			/* If GPUs have P2P-PCIe iolinks to this GPU, then at least one CPU will
			 * also have a P2P-PCIe iolink to this GPU, so it seems that we do not need
			 * to consider P2P-PCIe iolinks from GPU to GPU for now. Once P2P-PCIe
			 * iolinks are exposed via p2p_links we may have to add additional code here
			 * to validate P2P-PCIe links between GPUs.
			 */
		}
	}
	pr_debug("[0x%04X -> 0x%04X] Map is possible\n", src_node->gpu_id, dest_node->gpu_id);

	if (!maps_add_gpu_entry(new_maps, src_node->gpu_id, dest_node->gpu_id)) {
		/* This is a critical error because we are out of memory */
		return false;
	}
	maps_print(new_maps);
	return true;
}

/**
 * @brief Determines whether list of GPUs in src_nodes are mappable to dest_nodes
 *
 * This function will pick the first node from src_nodes and iterate through all the nodes in
 * dest_nodes and call map_device to determine whether the node is mappable.
 * If a node from dest_nodes is mappable to the first node from src_nodes:
 * 1. This function will remove the first node from src_nodes and the node from dest_nodes
 * 2. Push sub-mappings (new_maps) generated by map_device into existing mappings (maps)
 * 3. Recursively check whether remaining nodes in src_nodes and dest_nodes are mappable.
 *
 * Once src_nodes is empty then we have successfully mapped all the nodes and maps contains a full
 * list of GPU mappings.
 *
 * If there are no nodes in dest_nodes that can be mapped to the first node in src_nodes, then this
 * means we cannot build a full mapping list with the current list of mappings. We backtrack by
 * popping the newly generated sub-mappings(new_maps) from existing mappings (maps) and add the two
 * nodes back to src_nodes and dest_nodes and return false. When this function returns false, the
 * caller function will try a different path by trying to map the first node from src_nodes to the
 * next node in dest_nodes.
 *
 * @param src_sys system topology information on source system
 * @param dest_sys system topology information on destination system
 * @param src_node list of source GPUs that need to be mapped
 * @param dest_node list of destination GPUs that need to be mapped
 * @param maps list of device maps based on current map path
 * @return true if all nodes from src_nodes and dest_nodes are mappable
 */
static bool map_devices(struct tp_system *src_sys, struct tp_system *dest_sys, struct list_head *src_nodes,
			struct list_head *dest_nodes, struct device_maps *maps)
{
	struct tp_node *src_node, *dest_node, *dest_node_tmp;
	struct device_maps new_maps;

	/* Pick the first src node from the list of nodes and look for a dest node that is mappable.
	 * If we find a mappable destination node, then we add src node and dest node mapping to
	 * device_maps and recursively try to map the remaining nodes in the list.
	 * If there are no more src nodes in the list, then we have found a successful combination
	 * of src to dest nodes that are mappable.
	 */
	if (list_empty(src_nodes)) {
		pr_debug("All nodes mapped successfully\n");
		return true;
	}

	pr_debug("Mapping list src nodes [%s]\n", mapping_list_to_str(src_nodes));
	pr_debug("Mapping list dest nodes [%s]\n", mapping_list_to_str(dest_nodes));

	src_node = list_first_entry(src_nodes, struct tp_node, listm_mapping);
	pr_debug("Looking for match for node 0x%04X\n", src_node->gpu_id);

	list_del(&src_node->listm_mapping);

	list_for_each_entry_safe(dest_node, dest_node_tmp, dest_nodes, listm_mapping) {
		maps_init(&new_maps);
		if (map_device(src_sys, dest_sys, src_node, dest_node, maps, &new_maps)) {
			pr_debug("Matched destination node 0x%04X\n", dest_node->gpu_id);

			/* src node and dest node are mappable, add device_maps generated by
			 * map_device to list of current valid device_maps, and recursively try to
			 * map remaining nodes in the list.
			 */

			list_del(&dest_node->listm_mapping);
			if (maps_push(maps, &new_maps))
				return false;

			if (map_devices(src_sys, dest_sys, src_nodes, dest_nodes, maps)) {
				pr_debug("Matched nodes 0x%04X and after\n", dest_node->gpu_id);
				return true;
			} else {
				/* We could not map remaining nodes in the list. Add dest node back
				 * to list and try to map next dest node in list to current src
				 * node.
				 */
				pr_debug("Nodes after [0x%04X -> 0x%04X] did not match, "
					 "adding list back\n",
					 src_node->gpu_id, dest_node->gpu_id);

				list_add(&dest_node->listm_mapping, dest_nodes);
				maps_pop(maps, &new_maps);
			}
		}
	}
	pr_debug("Failed to map nodes 0x%04X and after\n", src_node->gpu_id);

	/* Either: We could not find a mappable dest node for current node, or we could not build a
	 * combination from the remaining nodes in the lists. Add src node back to the list and
	 * caller function will try next possible combination.
	 */
	list_add(&src_node->listm_mapping, src_nodes);

	return false;
}

/**
 * @brief Determines whether list of GPUs in src_xgmi_groups are mappable to list of GPUs in
 * dest_xgmi_groups
 *
 * This function will pick the first XGMI group (hive) from src_xgmi_groups and iterate through the
 * XGMI groups in dest_xgmi_groups. If the group in dest_xgmi_groups is mappable then this function
 * will remove the hives from src_xgmi_groups and dest_xgmi_groups and recursively try to map the
 * remaining hives in src_xgmi_groups and dest_xgmi_groups.
 *
 * If src_xgmi_groups is empty, then this means that we have successfully mapped all the XGMI hives
 * and we have a full list of GPU mappings in maps.
 *
 * If we cannot find a hive inside dest_xgmi_groups that is mappable to the first hive from
 * src_xgmi_groups, then this means that this path is not valid and we need to backtrack. We
 * backtrack by adding the hives back into src_xgmi_groups and dest_xgmi_groups and returning false.
 * The caller function will then try a different path by trying to map the first hive in
 * src_xgmi_groups to the next hive in dest_xgmi_groups.
 *
 * @param src_sys system topology information on source system
 * @param dest_sys system topology information on destination system
 * @param src_xgmi_groups list of source XGMI hives that need to be mapped
 * @param dest_xgmi_groups list of destination XGMI hives that need to be mapped
 * @param maps list of device maps based on current map path
 * @return true if all nodes from src_nodes and dest_nodes are mappable
 */
bool match_xgmi_groups(struct tp_system *src_sys, struct tp_system *dest_sys, struct list_head *src_xgmi_groups,
		       struct list_head *dest_xgmi_groups, struct device_maps *maps)
{
	struct tp_p2pgroup *src_group;
	struct tp_p2pgroup *dest_group;
	struct tp_p2pgroup *dest_group_tmp;

	if (list_empty(src_xgmi_groups)) {
		pr_debug("All groups matched successfully\n");
		return true;
	}

	/* Pick the first src XGMI group from the list. Then try to match src XGMI group with a
	 * dest XGMI group. If we have a dest XGMI group that is mappable, then we try to
	 * recursively map the next src XGMI group in the list, with remaining dest XGMI groups.
	 * If there are no more src XGMI groups in the list, then this means we have successfully
	 * mapped all the groups and we have a valid device_maps
	 */
	src_group = list_first_entry(src_xgmi_groups, struct tp_p2pgroup, listm_system);
	pr_debug("Looking for match for group [%s]\n", p2pgroup_to_str(src_group));

	list_del(&src_group->listm_system);

	list_for_each_entry_safe(dest_group, dest_group_tmp, dest_xgmi_groups, listm_system) {
		struct tp_node *node;

		LIST_HEAD(src_nodes);
		LIST_HEAD(dest_nodes);

		if (src_group->num_nodes > dest_group->num_nodes)
			continue;

		pr_debug("Trying destination group [%s]\n", p2pgroup_to_str(dest_group));

		list_for_each_entry(node, &src_group->nodes, listm_p2pgroup)
			list_add_tail(&node->listm_mapping, &src_nodes);

		list_for_each_entry(node, &dest_group->nodes, listm_p2pgroup)
			list_add_tail(&node->listm_mapping, &dest_nodes);

		/* map_devices will populate maps if successful */
		if (map_devices(src_sys, dest_sys, &src_nodes, &dest_nodes, maps)) {
			/* All the nodes in current src XGMI group are mappable with nodes in
			 * current dest XGMI group. Remove the current groups from the lists
			 * and recursively try to match remaining groups
			 */
			list_del(&dest_group->listm_system);
			pr_debug("Matched destination group [%s]\n", p2pgroup_to_str(dest_group));
			if (match_xgmi_groups(src_sys, dest_sys, src_xgmi_groups, dest_xgmi_groups, maps)) {
				pr_debug("Matched subgroups of [%s]\n", p2pgroup_to_str(dest_group));

				xfree(src_group);
				xfree(dest_group);
				return true;
			} else {
				/* We were not able to map the remaining XGMI groups so we add the
				 * current dest XGMI group back to the list of unmapped groups, and
				 * try to map current src XGMI group with the next dest XGMI in the
				 * list of XGMI groups
				 */
				list_add(&dest_group->listm_system, dest_xgmi_groups);
			}
		}
	}

	/* We have not found a mappable dest XGMI group. We discard this combination. If this is
	 * the first src XGMI group in the list, then it is not possible to match the XGMI groups.
	 * If this was a recursive call, then the calling instance of function will try the next
	 * combination of XGMI groups
	 */

	pr_debug("Failed to match groups [%s]\n", p2pgroup_to_str(src_group));
	list_add_tail(&src_group->listm_system, src_xgmi_groups);

	return false;
}

/**
 * @brief Builds a list of GPU mappings from source topology to destination topology
 *
 * The topology on the destination system may not be identical to the topology on the source
 * system, e.g There can be GPUs with different device ID's and they may be enumerated in a
 * different order. This function builds a list of GPU mappings from the source topology to the
 * destination topology and stores it in maps.
 *
 * The function will first validate all the iolinks and determine XGMI groups (hives) by calling the
 * topology_determine_iolinks(). It will then try to match the GPUs that belong to XGMI hives and
 * after that, match the remaining GPUs.
 *
 * @param src_sys system topology information on source system
 * @param dest_sys system topology information on destination system
 * @param maps list of device maps that was generated by this function
 * @return true if we were able to build a full list of GPU mappings.
 */
int set_restore_gpu_maps(struct tp_system *src_sys, struct tp_system *dest_sys, struct device_maps *maps)
{
	struct tp_node *node;
	int ret = 0;
	int src_num_gpus = 0;
	int dest_num_gpus = 0;

	maps_init(maps);

	ret = topology_determine_iolinks(src_sys);
	if (ret) {
		pr_err("Failed to determine iolinks from source (checkpointed) topology\n");
		return ret;
	}
	topology_print(src_sys, "Source    ");

	ret = topology_determine_iolinks(dest_sys);
	if (ret) {
		pr_err("Failed to determine iolinks from destination (local) topology\n");
		return ret;
	}
	topology_print(dest_sys, "Destination");

	/* Make sure we have same number of GPUs in src and dest */
	list_for_each_entry(node, &src_sys->nodes, listm_system) {
		if (NODE_IS_GPU(node))
			src_num_gpus++;
	}
	list_for_each_entry(node, &dest_sys->nodes, listm_system) {
		if (NODE_IS_GPU(node))
			dest_num_gpus++;
	}

	if (src_num_gpus != dest_num_gpus) {
		pr_err("Number of devices mismatch (checkpointed:%d local:%d)\n", src_num_gpus, dest_num_gpus);
		return -EINVAL;
	}

	if (src_sys->num_xgmi_groups > dest_sys->num_xgmi_groups) {
		pr_err("Number of xgmi groups mismatch (checkpointed:%d local:%d)\n", src_sys->num_xgmi_groups,
		       dest_sys->num_xgmi_groups);
		return -EINVAL;
	}

	/* First try to match the XGMI hives */
	if (src_sys->num_xgmi_groups) {
		if (!match_xgmi_groups(src_sys, dest_sys, &src_sys->xgmi_groups, &dest_sys->xgmi_groups, maps)) {
			pr_err("Failed to match all GPU groups\n");
			return -EINVAL;
		}
		pr_info("Current maps after XGMI groups matched\n");
		maps_print(maps);
	}

	/* We matched all the XGMI hives, now match remaining GPUs */
	LIST_HEAD(src_nodes);
	LIST_HEAD(dest_nodes);

	list_for_each_entry(node, &src_sys->nodes, listm_system) {
		if (NODE_IS_GPU(node) && !maps_get_dest_gpu(maps, node->gpu_id))
			list_add(&node->listm_mapping, &src_nodes);
	}

	list_for_each_entry(node, &dest_sys->nodes, listm_system) {
		if (NODE_IS_GPU(node) && !maps_dest_gpu_mapped(maps, node->gpu_id))
			list_add(&node->listm_mapping, &dest_nodes);
	}

	if (!map_devices(src_sys, dest_sys, &src_nodes, &dest_nodes, maps)) {
		pr_err("Failed to match remaining nodes\n");
		return -EINVAL;
	}

	pr_info("Maps after all nodes matched\n");
	maps_print(maps);

	return ret;
}

int topology_gpu_count(struct tp_system *sys)
{
	struct tp_node *node;
	int count = 0;

	list_for_each_entry(node, &sys->nodes, listm_system)
		if (NODE_IS_GPU(node))
			count++;
	return count;
}