mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-31 06:15:24 +00:00
criu/plugin: Implement system topology parsing
Parse local system topology in /sys/class/kfd/kfd/topology/nodes/ and store properties for each gpu in the CRIU image files. The gpu properties can then be used later during restore to make the process is restored on gpu's with similar properties. Signed-off-by: David Yat Sin <david.yatsin@amd.com>
This commit is contained in:
committed by
Andrei Vagin
parent
c4e3ac7fef
commit
6e99fea2fa
@@ -22,7 +22,7 @@ endif
|
||||
criu-amdgpu.pb-c.c: criu-amdgpu.proto
|
||||
protoc-c --proto_path=. --c_out=. criu-amdgpu.proto
|
||||
|
||||
amdgpu_plugin.so: amdgpu_plugin.c criu-amdgpu.pb-c.c
|
||||
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c
|
||||
$(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE)
|
||||
|
||||
amdgpu_plugin_clean:
|
||||
|
@@ -23,9 +23,7 @@
|
||||
#include "criu-log.h"
|
||||
|
||||
#include "common/list.h"
|
||||
|
||||
#define DRM_FIRST_RENDER_NODE 128
|
||||
#define DRM_LAST_RENDER_NODE 255
|
||||
#include "amdgpu_plugin_topology.h"
|
||||
|
||||
#define AMDGPU_KFD_DEVICE "/dev/kfd"
|
||||
#define PROCPIDMEM "/proc/%d/mem"
|
||||
@@ -57,7 +55,15 @@ struct vma_metadata {
|
||||
uint64_t vma_entry;
|
||||
};
|
||||
|
||||
/************************************ Global Variables ********************************************/
|
||||
struct tp_system src_topology;
|
||||
struct tp_system dest_topology;
|
||||
|
||||
struct device_maps checkpoint_maps;
|
||||
struct device_maps restore_maps;
|
||||
|
||||
static LIST_HEAD(update_vma_info_list);
|
||||
/**************************************************************************************************/
|
||||
|
||||
int open_drm_render_device(int minor)
|
||||
{
|
||||
@@ -70,7 +76,7 @@ int open_drm_render_device(int minor)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
sprintf(path, "/dev/dri/renderD%d", minor);
|
||||
snprintf(path, sizeof(path), "/dev/dri/renderD%d", minor);
|
||||
fd = open(path, O_RDWR | O_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
if (errno != ENOENT && errno != EPERM) {
|
||||
@@ -176,8 +182,12 @@ static void free_e(CriuKfd *e)
|
||||
}
|
||||
|
||||
for (int i = 0; i < e->n_device_entries; i++) {
|
||||
if (e->device_entries[i])
|
||||
if (e->device_entries[i]) {
|
||||
for (int j = 0; j < e->device_entries[i]->n_iolinks; j++)
|
||||
xfree(e->device_entries[i]->iolinks[j]);
|
||||
|
||||
xfree(e->device_entries[i]);
|
||||
}
|
||||
}
|
||||
xfree(e);
|
||||
}
|
||||
@@ -236,16 +246,148 @@ static int allocate_bo_entries(CriuKfd *e, int num_bos, struct kfd_criu_bo_bucke
|
||||
return 0;
|
||||
}
|
||||
|
||||
int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, DeviceEntry **deviceEntries)
|
||||
{
|
||||
uint32_t devinfo_index = 0;
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
DeviceEntry *devinfo = deviceEntries[devinfo_index++];
|
||||
|
||||
devinfo->node_id = node->id;
|
||||
|
||||
if (NODE_IS_GPU(node)) {
|
||||
devinfo->gpu_id = node->gpu_id;
|
||||
|
||||
devinfo->simd_count = node->simd_count;
|
||||
devinfo->mem_banks_count = node->mem_banks_count;
|
||||
devinfo->caches_count = node->caches_count;
|
||||
devinfo->io_links_count = node->io_links_count;
|
||||
devinfo->max_waves_per_simd = node->max_waves_per_simd;
|
||||
devinfo->lds_size_in_kb = node->lds_size_in_kb;
|
||||
devinfo->num_gws = node->num_gws;
|
||||
devinfo->wave_front_size = node->wave_front_size;
|
||||
devinfo->array_count = node->array_count;
|
||||
devinfo->simd_arrays_per_engine = node->simd_arrays_per_engine;
|
||||
devinfo->cu_per_simd_array = node->cu_per_simd_array;
|
||||
devinfo->simd_per_cu = node->simd_per_cu;
|
||||
devinfo->max_slots_scratch_cu = node->max_slots_scratch_cu;
|
||||
devinfo->vendor_id = node->vendor_id;
|
||||
devinfo->device_id = node->device_id;
|
||||
devinfo->domain = node->domain;
|
||||
devinfo->drm_render_minor = node->drm_render_minor;
|
||||
devinfo->hive_id = node->hive_id;
|
||||
devinfo->num_sdma_engines = node->num_sdma_engines;
|
||||
devinfo->num_sdma_xgmi_engines = node->num_sdma_xgmi_engines;
|
||||
devinfo->num_sdma_queues_per_engine = node->num_sdma_queues_per_engine;
|
||||
devinfo->num_cp_queues = node->num_cp_queues;
|
||||
devinfo->fw_version = node->fw_version;
|
||||
devinfo->capability = node->capability;
|
||||
devinfo->sdma_fw_version = node->sdma_fw_version;
|
||||
devinfo->vram_public = node->vram_public;
|
||||
devinfo->vram_size = node->vram_size;
|
||||
} else {
|
||||
devinfo->cpu_cores_count = node->cpu_cores_count;
|
||||
}
|
||||
|
||||
if (node->num_valid_iolinks) {
|
||||
struct tp_iolink *iolink;
|
||||
uint32_t iolink_index = 0;
|
||||
|
||||
devinfo->iolinks = xmalloc(sizeof(DevIolink *) * node->num_valid_iolinks);
|
||||
if (!devinfo->iolinks)
|
||||
return -ENOMEM;
|
||||
|
||||
list_for_each_entry(iolink, &node->iolinks, listm) {
|
||||
if (!iolink->valid)
|
||||
continue;
|
||||
|
||||
devinfo->iolinks[iolink_index] = xmalloc(sizeof(DevIolink));
|
||||
if (!devinfo->iolinks[iolink_index])
|
||||
return -ENOMEM;
|
||||
|
||||
dev_iolink__init(devinfo->iolinks[iolink_index]);
|
||||
|
||||
devinfo->iolinks[iolink_index]->type = iolink->type;
|
||||
devinfo->iolinks[iolink_index]->node_to_id = iolink->node_to_id;
|
||||
iolink_index++;
|
||||
}
|
||||
devinfo->n_iolinks = iolink_index;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int devinfo_to_topology(DeviceEntry *devinfos[], uint32_t num_devices, struct tp_system *sys)
|
||||
{
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
struct tp_node *node;
|
||||
DeviceEntry *devinfo = devinfos[i];
|
||||
|
||||
node = sys_add_node(sys, devinfo->node_id, devinfo->gpu_id);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
if (devinfo->cpu_cores_count) {
|
||||
node->cpu_cores_count = devinfo->cpu_cores_count;
|
||||
} else {
|
||||
node->simd_count = devinfo->simd_count;
|
||||
node->mem_banks_count = devinfo->mem_banks_count;
|
||||
node->caches_count = devinfo->caches_count;
|
||||
node->io_links_count = devinfo->io_links_count;
|
||||
node->max_waves_per_simd = devinfo->max_waves_per_simd;
|
||||
node->lds_size_in_kb = devinfo->lds_size_in_kb;
|
||||
node->num_gws = devinfo->num_gws;
|
||||
node->wave_front_size = devinfo->wave_front_size;
|
||||
node->array_count = devinfo->array_count;
|
||||
node->simd_arrays_per_engine = devinfo->simd_arrays_per_engine;
|
||||
node->cu_per_simd_array = devinfo->cu_per_simd_array;
|
||||
node->simd_per_cu = devinfo->simd_per_cu;
|
||||
node->max_slots_scratch_cu = devinfo->max_slots_scratch_cu;
|
||||
node->vendor_id = devinfo->vendor_id;
|
||||
node->device_id = devinfo->device_id;
|
||||
node->domain = devinfo->domain;
|
||||
node->drm_render_minor = devinfo->drm_render_minor;
|
||||
node->hive_id = devinfo->hive_id;
|
||||
node->num_sdma_engines = devinfo->num_sdma_engines;
|
||||
node->num_sdma_xgmi_engines = devinfo->num_sdma_xgmi_engines;
|
||||
node->num_sdma_queues_per_engine = devinfo->num_sdma_queues_per_engine;
|
||||
node->num_cp_queues = devinfo->num_cp_queues;
|
||||
node->fw_version = devinfo->fw_version;
|
||||
node->capability = devinfo->capability;
|
||||
node->sdma_fw_version = devinfo->sdma_fw_version;
|
||||
node->vram_public = devinfo->vram_public;
|
||||
node->vram_size = devinfo->vram_size;
|
||||
}
|
||||
|
||||
for (int j = 0; j < devinfo->n_iolinks; j++) {
|
||||
struct tp_iolink *iolink;
|
||||
DevIolink *devlink = (devinfo->iolinks[j]);
|
||||
|
||||
iolink = node_add_iolink(node, devlink->type, devlink->node_to_id);
|
||||
if (!iolink)
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_plugin_init(int stage)
|
||||
{
|
||||
pr_info("amdgpu_plugin: initialized: %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
|
||||
|
||||
topology_init(&src_topology);
|
||||
topology_init(&dest_topology);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_plugin_fini(int stage, int ret)
|
||||
{
|
||||
pr_info("amdgpu_plugin: finished %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
|
||||
|
||||
topology_free(&src_topology);
|
||||
topology_free(&dest_topology);
|
||||
}
|
||||
|
||||
CR_PLUGIN_REGISTER("amdgpu_plugin", amdgpu_plugin_init, amdgpu_plugin_fini)
|
||||
@@ -314,14 +456,21 @@ static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_cri
|
||||
pr_debug("Dumping %d devices\n", args->num_devices);
|
||||
|
||||
e->num_of_gpus = args->num_devices;
|
||||
e->num_of_cpus = src_topology.num_nodes - args->num_devices;
|
||||
|
||||
ret = allocate_device_entries(e, e->num_of_gpus);
|
||||
if (ret) {
|
||||
ret = -ENOMEM;
|
||||
/* The ioctl will only return entries for GPUs, but we also store entries for CPUs and the
|
||||
* information for CPUs is obtained from parsing system topology
|
||||
*/
|
||||
ret = allocate_device_entries(e, src_topology.num_nodes);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
plugin_log_msg("Number of GPUs:%d\n", e->num_of_gpus);
|
||||
pr_debug("Number of CPUs:%d GPUs:%d\n", e->num_of_cpus, e->num_of_gpus);
|
||||
|
||||
/* Store topology information that was obtained from parsing /sys/class/kfd/kfd/topology/ */
|
||||
ret = topology_to_devinfo(&src_topology, &checkpoint_maps, e->device_entries);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
pr_info("Dumped devices %s (ret:%d)\n", ret ? "Failed" : "Ok", ret);
|
||||
@@ -470,6 +619,17 @@ int amdgpu_plugin_dump_file(int fd, int id)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (topology_parse(&src_topology, "Checkpoint"))
|
||||
return -1;
|
||||
|
||||
/* We call topology_determine_iolinks to validate io_links. If io_links are not valid
|
||||
* we do not store them inside the checkpointed images
|
||||
*/
|
||||
if (topology_determine_iolinks(&src_topology)) {
|
||||
pr_err("Failed to determine iolinks from topology\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check whether this plugin was called for kfd or render nodes */
|
||||
if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
|
||||
/* This is RenderD dumper plugin, for now just save renderD
|
||||
@@ -624,15 +784,18 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e)
|
||||
|
||||
args->devices = (uintptr_t)device_buckets;
|
||||
|
||||
for (int i = 0; i < e->num_of_gpus; i++) {
|
||||
for (int entries_i = 0; entries_i < e->num_of_cpus + e->num_of_gpus; entries_i++) {
|
||||
struct kfd_criu_device_bucket *device_bucket;
|
||||
DeviceEntry *devinfo = e->device_entries[i];
|
||||
DeviceEntry *devinfo = e->device_entries[entries_i];
|
||||
|
||||
if (!devinfo->gpu_id)
|
||||
continue;
|
||||
|
||||
device_bucket = &device_buckets[bucket_index++];
|
||||
|
||||
device_bucket->user_gpu_id = devinfo->gpu_id;
|
||||
|
||||
device_bucket->drm_fd = open_drm_render_device(i + DRM_FIRST_RENDER_NODE);
|
||||
device_bucket->drm_fd = open_drm_render_device(bucket_index + DRM_FIRST_RENDER_NODE);
|
||||
if (device_bucket->drm_fd < 0) {
|
||||
pr_perror("amdgpu_plugin: Can't pass NULL drm render fd to driver");
|
||||
goto exit;
|
||||
@@ -878,6 +1041,19 @@ int amdgpu_plugin_restore_file(int id)
|
||||
|
||||
plugin_log_msg("amdgpu_plugin: read image file data\n");
|
||||
|
||||
ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology);
|
||||
if (ret) {
|
||||
pr_err("Failed to convert stored device information to topology\n");
|
||||
ret = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
ret = topology_parse(&dest_topology, "Local");
|
||||
if (ret) {
|
||||
pr_err("Failed to parse local system topology\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
ret = restore_devices(&args, e);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
720
plugins/amdgpu/amdgpu_plugin_topology.c
Normal file
720
plugins/amdgpu/amdgpu_plugin_topology.c
Normal file
@@ -0,0 +1,720 @@
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/limits.h>
|
||||
|
||||
#include <dirent.h>
|
||||
#include "common/list.h"
|
||||
|
||||
#include "xmalloc.h"
|
||||
#include "kfd_ioctl.h"
|
||||
#include "amdgpu_plugin_topology.h"
|
||||
|
||||
#define TOPOLOGY_PATH "/sys/class/kfd/kfd/topology/nodes/"
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define plugin_log_msg(fmt, ...) \
|
||||
{ \
|
||||
}
|
||||
#endif
|
||||
|
||||
static const char *link_type(uint32_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case TOPO_IOLINK_TYPE_PCIE:
|
||||
return "PCIe";
|
||||
case TOPO_IOLINK_TYPE_XGMI:
|
||||
return "XGMI";
|
||||
}
|
||||
return "Unsupported";
|
||||
}
|
||||
|
||||
static struct tp_node *p2pgroup_get_node_by_gpu_id(const struct tp_p2pgroup *group, const uint32_t gpu_id)
|
||||
{
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &group->nodes, listm_p2pgroup) {
|
||||
if (node->gpu_id == gpu_id)
|
||||
return node;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor)
|
||||
{
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
if (node->drm_render_minor == drm_render_minor)
|
||||
return node;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32_t gpu_id)
|
||||
{
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
if (node->gpu_id == gpu_id)
|
||||
return node;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct tp_node *sys_get_node_by_node_id(const struct tp_system *sys, const uint32_t node_id)
|
||||
{
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
if (node->id == node_id)
|
||||
return node;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct tp_p2pgroup *sys_get_p2pgroup_with_gpu_id(const struct tp_system *sys, const int type,
|
||||
const uint32_t gpu_id)
|
||||
{
|
||||
struct tp_p2pgroup *p2pgroup;
|
||||
|
||||
list_for_each_entry(p2pgroup, &sys->xgmi_groups, listm_system) {
|
||||
if (p2pgroup->type != type)
|
||||
continue;
|
||||
|
||||
if (p2pgroup_get_node_by_gpu_id(p2pgroup, gpu_id))
|
||||
return p2pgroup;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct tp_iolink *get_tp_peer_iolink(const struct tp_node *from_node, const struct tp_node *to_node,
|
||||
const uint8_t type)
|
||||
{
|
||||
struct tp_iolink *iolink;
|
||||
|
||||
list_for_each_entry(iolink, &from_node->iolinks, listm) {
|
||||
if (iolink->node_to_id == to_node->id && iolink->type == type)
|
||||
return iolink;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct tp_iolink *node_add_iolink(struct tp_node *node, uint32_t type, uint32_t node_to_id)
|
||||
{
|
||||
struct tp_iolink *iolink = xzalloc(sizeof(*iolink));
|
||||
|
||||
if (!iolink)
|
||||
return NULL;
|
||||
|
||||
iolink->type = type;
|
||||
/* iolink->node_to will be filled in topology_determine_iolinks */
|
||||
iolink->node_to_id = node_to_id;
|
||||
iolink->node_from = node;
|
||||
|
||||
list_add_tail(&iolink->listm, &node->iolinks);
|
||||
return iolink;
|
||||
}
|
||||
|
||||
struct tp_p2pgroup *sys_add_group(struct tp_system *sys, uint32_t type)
|
||||
{
|
||||
struct tp_p2pgroup *group;
|
||||
|
||||
group = xzalloc(sizeof(*group));
|
||||
if (!group)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&group->nodes);
|
||||
group->type = type;
|
||||
list_add_tail(&group->listm_system, &sys->xgmi_groups);
|
||||
if (type == TOPO_IOLINK_TYPE_XGMI)
|
||||
sys->num_xgmi_groups++;
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id)
|
||||
{
|
||||
struct tp_node *node = NULL;
|
||||
|
||||
node = xzalloc(sizeof(*node));
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
node->id = id;
|
||||
node->gpu_id = gpu_id;
|
||||
node->drm_fd = -1;
|
||||
INIT_LIST_HEAD(&node->iolinks);
|
||||
list_add_tail(&node->listm_system, &sys->nodes);
|
||||
sys->num_nodes++;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static bool get_prop(char *line, char *name, uint64_t *value)
|
||||
{
|
||||
if (sscanf(line, " %29s %lu", name, value) != 2)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Parse node properties in /sys/class/kfd/kfd/topology/nodes/N/properties */
|
||||
static int parse_topo_node_properties(struct tp_node *dev, const char *dir_path)
|
||||
{
|
||||
FILE *file;
|
||||
char path[300];
|
||||
char line[300];
|
||||
|
||||
sprintf(path, "%s/properties", dir_path);
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
pr_perror("Failed to access %s", path);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
char name[30];
|
||||
uint64_t value;
|
||||
|
||||
memset(name, 0, sizeof(name));
|
||||
if (!get_prop(line, name, &value))
|
||||
goto fail;
|
||||
|
||||
if (!strcmp(name, "cpu_cores_count"))
|
||||
dev->cpu_cores_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "simd_count"))
|
||||
dev->simd_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "mem_banks_count"))
|
||||
dev->mem_banks_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "caches_count"))
|
||||
dev->caches_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "io_links_count"))
|
||||
dev->io_links_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "max_waves_per_simd"))
|
||||
dev->max_waves_per_simd = (uint32_t)value;
|
||||
else if (!strcmp(name, "lds_size_in_kb"))
|
||||
dev->lds_size_in_kb = (uint32_t)value;
|
||||
else if (!strcmp(name, "num_gws"))
|
||||
dev->num_gws = (uint32_t)value;
|
||||
else if (!strcmp(name, "wave_front_size"))
|
||||
dev->wave_front_size = (uint32_t)value;
|
||||
else if (!strcmp(name, "array_count"))
|
||||
dev->array_count = (uint32_t)value;
|
||||
else if (!strcmp(name, "simd_arrays_per_engine"))
|
||||
dev->simd_arrays_per_engine = (uint32_t)value;
|
||||
else if (!strcmp(name, "cu_per_simd_array"))
|
||||
dev->cu_per_simd_array = (uint32_t)value;
|
||||
else if (!strcmp(name, "simd_per_cu"))
|
||||
dev->simd_per_cu = (uint32_t)value;
|
||||
else if (!strcmp(name, "max_slots_scratch_cu"))
|
||||
dev->max_slots_scratch_cu = (uint32_t)value;
|
||||
else if (!strcmp(name, "vendor_id"))
|
||||
dev->vendor_id = (uint32_t)value;
|
||||
else if (!strcmp(name, "device_id"))
|
||||
dev->device_id = (uint32_t)value;
|
||||
else if (!strcmp(name, "domain"))
|
||||
dev->domain = (uint32_t)value;
|
||||
else if (!strcmp(name, "drm_render_minor"))
|
||||
dev->drm_render_minor = (uint32_t)value;
|
||||
else if (!strcmp(name, "hive_id"))
|
||||
dev->hive_id = value;
|
||||
else if (!strcmp(name, "num_sdma_engines"))
|
||||
dev->num_sdma_engines = (uint32_t)value;
|
||||
else if (!strcmp(name, "num_sdma_xgmi_engines"))
|
||||
dev->num_sdma_xgmi_engines = (uint32_t)value;
|
||||
else if (!strcmp(name, "num_sdma_queues_per_engine"))
|
||||
dev->num_sdma_queues_per_engine = (uint32_t)value;
|
||||
else if (!strcmp(name, "num_cp_queues"))
|
||||
dev->num_cp_queues = (uint32_t)value;
|
||||
else if (!strcmp(name, "fw_version"))
|
||||
dev->fw_version = (uint32_t)value;
|
||||
else if (!strcmp(name, "capability"))
|
||||
dev->capability = (uint32_t)value;
|
||||
else if (!strcmp(name, "sdma_fw_version"))
|
||||
dev->sdma_fw_version = (uint32_t)value;
|
||||
|
||||
if (!dev->gpu_id && dev->cpu_cores_count >= 1) {
|
||||
/* This is a CPU - we do not need to parse the other information */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return 0;
|
||||
fail:
|
||||
pr_err("Failed to parse line = %s\n", line);
|
||||
fclose(file);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Parse node memory properties in /sys/class/kfd/kfd/topology/nodes/N/mem_banks */
|
||||
static int parse_topo_node_mem_banks(struct tp_node *node, const char *dir_path)
|
||||
{
|
||||
struct dirent *dirent_node;
|
||||
DIR *d_node;
|
||||
char path[300];
|
||||
FILE *file = NULL;
|
||||
uint32_t heap_type = 0;
|
||||
uint64_t mem_size = 0;
|
||||
int ret;
|
||||
|
||||
if (!NODE_IS_GPU(node))
|
||||
return 0;
|
||||
|
||||
sprintf(path, "%s/mem_banks", dir_path);
|
||||
|
||||
d_node = opendir(path);
|
||||
if (!d_node) {
|
||||
pr_perror("Can't open %s", path);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
while ((dirent_node = readdir(d_node)) != NULL) {
|
||||
char line[300];
|
||||
char bank_path[1024];
|
||||
struct stat st;
|
||||
int id;
|
||||
|
||||
heap_type = 0;
|
||||
mem_size = 0;
|
||||
|
||||
/* Only parse numeric directories */
|
||||
if (sscanf(dirent_node->d_name, "%d", &id) != 1)
|
||||
continue;
|
||||
|
||||
snprintf(bank_path, sizeof(bank_path), "%s/%s", path, dirent_node->d_name);
|
||||
if (stat(bank_path, &st)) {
|
||||
pr_err("Cannot to access %s\n", path);
|
||||
ret = -EACCES;
|
||||
goto fail;
|
||||
}
|
||||
if ((st.st_mode & S_IFMT) == S_IFDIR) {
|
||||
char properties_path[PATH_MAX];
|
||||
|
||||
snprintf(properties_path, sizeof(properties_path), "%s/properties", bank_path);
|
||||
|
||||
file = fopen(properties_path, "r");
|
||||
if (!file) {
|
||||
pr_perror("Failed to access %s", properties_path);
|
||||
ret = -EACCES;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
char name[30];
|
||||
uint64_t value;
|
||||
|
||||
memset(name, 0, sizeof(name));
|
||||
if (!get_prop(line, name, &value)) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!strcmp(name, "heap_type"))
|
||||
heap_type = (uint32_t)value;
|
||||
if (!strcmp(name, "size_in_bytes"))
|
||||
mem_size = value;
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
file = NULL;
|
||||
}
|
||||
|
||||
if (heap_type == TOPO_HEAP_TYPE_PUBLIC || heap_type == TOPO_HEAP_TYPE_PRIVATE)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((heap_type != TOPO_HEAP_TYPE_PUBLIC && heap_type != TOPO_HEAP_TYPE_PRIVATE) || !mem_size) {
|
||||
pr_err("Failed to determine memory type and size for device in %s\n", dir_path);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
node->vram_public = (heap_type == TOPO_HEAP_TYPE_PUBLIC);
|
||||
node->vram_size = mem_size;
|
||||
closedir(d_node);
|
||||
return 0;
|
||||
fail:
|
||||
if (file)
|
||||
fclose(file);
|
||||
closedir(d_node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parse node iolinks properties in /sys/class/kfd/kfd/topology/nodes/N/io_links */
|
||||
static int parse_topo_node_iolinks(struct tp_node *node, const char *dir_path)
|
||||
{
|
||||
struct dirent *dirent_node;
|
||||
DIR *d_node;
|
||||
char path[300];
|
||||
FILE *file = NULL;
|
||||
int ret = 0;
|
||||
|
||||
snprintf(path, sizeof(path), "%s/io_links", dir_path);
|
||||
|
||||
d_node = opendir(path);
|
||||
if (!d_node) {
|
||||
pr_perror("Can't open %s", path);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
while ((dirent_node = readdir(d_node)) != NULL) {
|
||||
char line[300];
|
||||
char iolink_path[1024];
|
||||
struct stat st;
|
||||
int id;
|
||||
|
||||
uint32_t iolink_type = 0;
|
||||
uint32_t node_to_id = 0;
|
||||
|
||||
/* Only parse numeric directories */
|
||||
if (sscanf(dirent_node->d_name, "%d", &id) != 1)
|
||||
continue;
|
||||
|
||||
snprintf(iolink_path, sizeof(iolink_path), "%s/%s", path, dirent_node->d_name);
|
||||
if (stat(iolink_path, &st)) {
|
||||
pr_err("Cannot to access %s\n", path);
|
||||
ret = -EACCES;
|
||||
goto fail;
|
||||
}
|
||||
if ((st.st_mode & S_IFMT) == S_IFDIR) {
|
||||
char properties_path[PATH_MAX];
|
||||
|
||||
snprintf(properties_path, sizeof(properties_path), "%s/properties", iolink_path);
|
||||
|
||||
file = fopen(properties_path, "r");
|
||||
if (!file) {
|
||||
pr_perror("Failed to access %s", properties_path);
|
||||
ret = -EACCES;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
char name[30];
|
||||
uint64_t value;
|
||||
|
||||
memset(name, 0, sizeof(name));
|
||||
if (!get_prop(line, name, &value)) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!strcmp(name, "type"))
|
||||
iolink_type = (uint32_t)value;
|
||||
if (!strcmp(name, "node_to"))
|
||||
node_to_id = (uint32_t)value;
|
||||
}
|
||||
fclose(file);
|
||||
file = NULL;
|
||||
}
|
||||
|
||||
/* We only store the link information for now, then once all topology parsing is
|
||||
* finished we will confirm iolinks
|
||||
*/
|
||||
if (iolink_type == TOPO_IOLINK_TYPE_PCIE || iolink_type == TOPO_IOLINK_TYPE_XGMI) {
|
||||
if (!node_add_iolink(node, iolink_type, node_to_id)) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(d_node);
|
||||
return 0;
|
||||
fail:
|
||||
if (file)
|
||||
fclose(file);
|
||||
|
||||
closedir(d_node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parse a node (CPU or GPU) in /sys/class/kfd/kfd/topology/nodes/N */
|
||||
static int parse_topo_node(struct tp_node *node, const char *dir_path)
|
||||
{
|
||||
if (parse_topo_node_properties(node, dir_path)) {
|
||||
pr_err("Failed to parse node properties\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (parse_topo_node_mem_banks(node, dir_path)) {
|
||||
pr_err("Failed to parse node mem_banks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (parse_topo_node_iolinks(node, dir_path)) {
|
||||
pr_err("Failed to parse node iolinks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *p2pgroup_to_str(struct tp_p2pgroup *group)
|
||||
{
|
||||
static char topology_printstr[200];
|
||||
struct tp_node *node;
|
||||
size_t str_len = 0;
|
||||
|
||||
topology_printstr[0] = '\0';
|
||||
str_len += sprintf(&topology_printstr[str_len], "type:%s:", link_type(group->type));
|
||||
|
||||
list_for_each_entry(node, &group->nodes, listm_p2pgroup) {
|
||||
str_len += sprintf(&topology_printstr[str_len], "0x%04X ", node->gpu_id);
|
||||
}
|
||||
return topology_printstr;
|
||||
}
|
||||
|
||||
void topology_print(const struct tp_system *sys, const char *message)
|
||||
{
|
||||
struct tp_node *node;
|
||||
struct tp_p2pgroup *xgmi_group;
|
||||
|
||||
pr_info("===System Topology=[%12s]==================================\n", message);
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
struct tp_iolink *iolink;
|
||||
|
||||
if (!NODE_IS_GPU(node)) {
|
||||
pr_info("[%d] CPU\n", node->id);
|
||||
pr_info(" cpu_cores_count:%u\n", node->cpu_cores_count);
|
||||
} else {
|
||||
pr_info("[%d] GPU gpu_id:0x%04X\n", node->id, node->gpu_id);
|
||||
pr_info(" vendor_id:%u device_id:%u\n", node->vendor_id, node->device_id);
|
||||
pr_info(" vram_public:%c vram_size:%lu\n", node->vram_public ? 'Y' : 'N', node->vram_size);
|
||||
pr_info(" io_links_count:%u capability:%u\n", node->io_links_count, node->capability);
|
||||
pr_info(" mem_banks_count:%u caches_count:%d lds_size_in_kb:%u\n", node->mem_banks_count,
|
||||
node->caches_count, node->lds_size_in_kb);
|
||||
pr_info(" simd_count:%u max_waves_per_simd:%u\n", node->simd_count,
|
||||
node->max_waves_per_simd);
|
||||
pr_info(" num_gws:%u wave_front_size:%u array_count:%u\n", node->num_gws,
|
||||
node->wave_front_size, node->array_count);
|
||||
pr_info(" simd_arrays_per_engine:%u simd_per_cu:%u\n", node->simd_arrays_per_engine,
|
||||
node->simd_per_cu);
|
||||
pr_info(" max_slots_scratch_cu:%u cu_per_simd_array:%u\n", node->max_slots_scratch_cu,
|
||||
node->cu_per_simd_array);
|
||||
pr_info(" num_sdma_engines:%u\n", node->num_sdma_engines);
|
||||
pr_info(" num_sdma_xgmi_engines:%u num_sdma_queues_per_engine:%u\n",
|
||||
node->num_sdma_xgmi_engines, node->num_sdma_queues_per_engine);
|
||||
pr_info(" num_cp_queues:%u fw_version:%u sdma_fw_version:%u\n", node->num_cp_queues,
|
||||
node->fw_version, node->sdma_fw_version);
|
||||
}
|
||||
list_for_each_entry(iolink, &node->iolinks, listm) {
|
||||
if (!iolink->valid)
|
||||
continue;
|
||||
|
||||
pr_info(" iolink type:%s node-to:%d (0x%04X) node-from:%d bi-dir:%s\n",
|
||||
link_type(iolink->type), iolink->node_to_id, iolink->node_to->gpu_id,
|
||||
iolink->node_from->id, iolink->peer ? "Y" : "N");
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("===Groups==========================================================\n");
|
||||
list_for_each_entry(xgmi_group, &sys->xgmi_groups, listm_system)
|
||||
pr_info("%s\n", p2pgroup_to_str(xgmi_group));
|
||||
pr_info("===================================================================\n");
|
||||
}
|
||||
|
||||
void topology_init(struct tp_system *sys)
|
||||
{
|
||||
memset(sys, 0, sizeof(*sys));
|
||||
INIT_LIST_HEAD(&sys->nodes);
|
||||
INIT_LIST_HEAD(&sys->xgmi_groups);
|
||||
}
|
||||
|
||||
void topology_free(struct tp_system *sys)
|
||||
{
|
||||
while (!list_empty(&sys->nodes)) {
|
||||
struct tp_node *node = list_first_entry(&sys->nodes, struct tp_node, listm_system);
|
||||
|
||||
list_del(&node->listm_system);
|
||||
|
||||
while (!list_empty(&node->iolinks)) {
|
||||
struct tp_iolink *iolink = list_first_entry(&node->iolinks, struct tp_iolink, listm);
|
||||
|
||||
list_del(&iolink->listm);
|
||||
xfree(iolink);
|
||||
}
|
||||
xfree(node);
|
||||
}
|
||||
|
||||
while (!list_empty(&sys->xgmi_groups)) {
|
||||
struct tp_p2pgroup *p2pgroup = list_first_entry(&sys->xgmi_groups, struct tp_p2pgroup, listm_system);
|
||||
|
||||
list_del(&p2pgroup->listm_system);
|
||||
xfree(p2pgroup);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Validates iolinks and determine XGMI hives in a system topology
|
||||
*
|
||||
* On some systems, some GPUs may not be accessible because they are masked by cgroups, but the
|
||||
* iolinks to these GPUs are still visible. If the peer GPU is not accessible, we consider that link
|
||||
* invalid.
|
||||
* In a XGMI hive, each GPU will have a bi-directional iolink to every other GPU. So we create a
|
||||
* XGMI group (hive) and add all the GPUs in that hive to the group when iterating over the first
|
||||
* GPU in that group.
|
||||
*
|
||||
* @param sys system topology
|
||||
* @return 0 if successful, errno if failed.
|
||||
*/
|
||||
int topology_determine_iolinks(struct tp_system *sys)
|
||||
{
|
||||
int ret = 0;
|
||||
struct tp_node *node;
|
||||
|
||||
list_for_each_entry(node, &sys->nodes, listm_system) {
|
||||
struct tp_iolink *iolink;
|
||||
|
||||
list_for_each_entry(iolink, &node->iolinks, listm) {
|
||||
struct tp_p2pgroup *group = NULL;
|
||||
struct tp_node *peer_node = NULL;
|
||||
struct tp_iolink *peer_iolink = NULL;
|
||||
|
||||
peer_node = sys_get_node_by_node_id(sys, iolink->node_to_id);
|
||||
if (!peer_node) {
|
||||
/* node not accessible, usually because it is masked by cgroups */
|
||||
iolink->valid = false;
|
||||
continue;
|
||||
}
|
||||
iolink->valid = true;
|
||||
node->num_valid_iolinks++;
|
||||
|
||||
iolink->node_to = peer_node;
|
||||
peer_iolink = get_tp_peer_iolink(peer_node, node, iolink->type);
|
||||
if (!peer_iolink)
|
||||
continue; /* This is a one-dir link */
|
||||
|
||||
/* We confirmed both sides have same type of iolink */
|
||||
iolink->peer = peer_iolink;
|
||||
peer_iolink->peer = iolink;
|
||||
|
||||
if (iolink->type == TOPO_IOLINK_TYPE_XGMI) {
|
||||
group = sys_get_p2pgroup_with_gpu_id(sys, iolink->type, node->gpu_id);
|
||||
if (!group) {
|
||||
/* This GPU does not already belong to a group so we create
|
||||
* a new group
|
||||
*/
|
||||
group = sys_add_group(sys, iolink->type);
|
||||
if (!group) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
list_add_tail(&node->listm_p2pgroup, &group->nodes);
|
||||
}
|
||||
|
||||
/* Also add peer GPU to this group */
|
||||
if (!p2pgroup_get_node_by_gpu_id(group, peer_node->gpu_id))
|
||||
list_add_tail(&peer_node->listm_p2pgroup, &group->nodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
/* In case of failure, caller function will call topology_free which will free groups that
|
||||
* were successfully allocated
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse system topology
|
||||
*
|
||||
* Parse system topology exposed by the drivers in /sys/class/kfd/kfd/topology and fill in the
|
||||
* system topology structure.
|
||||
*
|
||||
* @param sys system topology structure to be filled by this function
|
||||
* @param message print this message when printing the topology to logs
|
||||
* @return 0 if successful, errno if failed.
|
||||
*/
|
||||
int topology_parse(struct tp_system *sys, const char *message)
|
||||
{
|
||||
struct dirent *dirent_system;
|
||||
DIR *d_system;
|
||||
char path[300];
|
||||
int ret;
|
||||
|
||||
if (sys->parsed)
|
||||
return 0;
|
||||
|
||||
sys->parsed = true;
|
||||
INIT_LIST_HEAD(&sys->nodes);
|
||||
INIT_LIST_HEAD(&sys->xgmi_groups);
|
||||
|
||||
d_system = opendir(TOPOLOGY_PATH);
|
||||
if (!d_system) {
|
||||
pr_perror("Can't open %s", TOPOLOGY_PATH);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
while ((dirent_system = readdir(d_system)) != NULL) {
|
||||
struct stat stbuf;
|
||||
int id, fd;
|
||||
|
||||
/* Only parse numeric directories */
|
||||
if (sscanf(dirent_system->d_name, "%d", &id) != 1)
|
||||
continue;
|
||||
|
||||
sprintf(path, "%s%s", TOPOLOGY_PATH, dirent_system->d_name);
|
||||
if (stat(path, &stbuf)) {
|
||||
/* When cgroup is masking some devices, the path exists, but it is not
|
||||
* accessible, this is not an error
|
||||
*/
|
||||
pr_info("Cannot to access %s\n", path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {
|
||||
struct tp_node *node;
|
||||
int len;
|
||||
char gpu_id_path[300];
|
||||
char read_buf[7]; /* Max gpu_id len is 6 chars */
|
||||
unsigned int gpu_id;
|
||||
|
||||
sprintf(gpu_id_path, "%s/%s/gpu_id", TOPOLOGY_PATH, dirent_system->d_name);
|
||||
fd = open(gpu_id_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
pr_perror("Failed to access %s", gpu_id_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
len = read(fd, read_buf, sizeof(read_buf) - 1);
|
||||
close(fd);
|
||||
if (len < 0)
|
||||
continue;
|
||||
|
||||
read_buf[len] = '\0';
|
||||
|
||||
if (sscanf(read_buf, "%d", &gpu_id) != 1)
|
||||
continue;
|
||||
|
||||
node = sys_add_node(sys, id, gpu_id);
|
||||
if (!node) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (parse_topo_node(node, path)) {
|
||||
pr_err("Failed to parse node %s\n", path);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(d_system);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
topology_free(sys);
|
||||
return ret;
|
||||
}
|
119
plugins/amdgpu/amdgpu_plugin_topology.h
Normal file
119
plugins/amdgpu/amdgpu_plugin_topology.h
Normal file
@@ -0,0 +1,119 @@
|
||||
#ifndef __KFD_PLUGIN_TOPOLOGY_H__
|
||||
#define __KFD_PLUGIN_TOPOLOGY_H__
|
||||
|
||||
#define DRM_FIRST_RENDER_NODE 128
|
||||
#define DRM_LAST_RENDER_NODE 255
|
||||
|
||||
#define TOPO_HEAP_TYPE_PUBLIC 1 /* HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC */
|
||||
#define TOPO_HEAP_TYPE_PRIVATE 2 /* HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE */
|
||||
|
||||
#define TOPO_IOLINK_TYPE_ANY 0 /* HSA_IOLINKTYPE_UNDEFINED */
|
||||
#define TOPO_IOLINK_TYPE_PCIE 2 /* HSA_IOLINKTYPE_PCIEXPRESS */
|
||||
#define TOPO_IOLINK_TYPE_XGMI 11 /* HSA_IOLINK_TYPE_XGMI */
|
||||
|
||||
#define NODE_IS_GPU(node) ((node)->gpu_id != 0)
|
||||
#define INVALID_CPU_ID 0xFFFF
|
||||
|
||||
/*************************************** Structures ***********************************************/
|
||||
struct tp_node;
|
||||
|
||||
struct tp_iolink {
|
||||
struct list_head listm;
|
||||
uint32_t type;
|
||||
uint32_t node_to_id;
|
||||
struct tp_node *node_to;
|
||||
struct tp_node *node_from;
|
||||
bool valid; /* Set to false if target node is not accessible */
|
||||
struct tp_iolink *peer; /* If link is bi-directional, peer link */
|
||||
};
|
||||
|
||||
struct tp_node {
|
||||
uint32_t id;
|
||||
uint32_t gpu_id;
|
||||
uint32_t cpu_cores_count;
|
||||
uint32_t simd_count;
|
||||
uint32_t mem_banks_count;
|
||||
uint32_t caches_count;
|
||||
uint32_t io_links_count;
|
||||
uint32_t max_waves_per_simd;
|
||||
uint32_t lds_size_in_kb;
|
||||
uint32_t num_gws;
|
||||
uint32_t wave_front_size;
|
||||
uint32_t array_count;
|
||||
uint32_t simd_arrays_per_engine;
|
||||
uint32_t cu_per_simd_array;
|
||||
uint32_t simd_per_cu;
|
||||
uint32_t max_slots_scratch_cu;
|
||||
uint32_t vendor_id;
|
||||
uint32_t device_id;
|
||||
uint32_t domain;
|
||||
uint32_t drm_render_minor;
|
||||
uint64_t hive_id;
|
||||
uint32_t num_sdma_engines;
|
||||
uint32_t num_sdma_xgmi_engines;
|
||||
uint32_t num_sdma_queues_per_engine;
|
||||
uint32_t num_cp_queues;
|
||||
uint32_t fw_version;
|
||||
uint32_t capability;
|
||||
uint32_t sdma_fw_version;
|
||||
bool vram_public;
|
||||
uint64_t vram_size;
|
||||
|
||||
struct list_head listm_system;
|
||||
struct list_head listm_p2pgroup;
|
||||
struct list_head listm_mapping; /* Used only during device mapping */
|
||||
|
||||
uint32_t num_valid_iolinks;
|
||||
struct list_head iolinks;
|
||||
|
||||
int drm_fd;
|
||||
};
|
||||
|
||||
struct tp_p2pgroup {
|
||||
uint32_t type;
|
||||
uint32_t num_nodes;
|
||||
struct list_head listm_system;
|
||||
struct list_head nodes;
|
||||
};
|
||||
|
||||
struct tp_system {
|
||||
bool parsed;
|
||||
uint32_t num_nodes;
|
||||
struct list_head nodes;
|
||||
uint32_t num_xgmi_groups;
|
||||
struct list_head xgmi_groups;
|
||||
};
|
||||
|
||||
struct id_map {
|
||||
uint32_t src;
|
||||
uint32_t dest;
|
||||
|
||||
struct list_head listm;
|
||||
};
|
||||
|
||||
struct device_maps {
|
||||
struct list_head cpu_maps; /* CPUs are mapped using node_id */
|
||||
struct list_head gpu_maps;
|
||||
|
||||
struct list_head *tail_cpu; /* GPUs are mapped using gpu_id */
|
||||
struct list_head *tail_gpu;
|
||||
};
|
||||
|
||||
/**************************************** Functions ***********************************************/
|
||||
void topology_init(struct tp_system *sys);
|
||||
void topology_free(struct tp_system *topology);
|
||||
|
||||
int topology_parse(struct tp_system *topology, const char *msg);
|
||||
int topology_determine_iolinks(struct tp_system *sys);
|
||||
void topology_print(const struct tp_system *sys, const char *msg);
|
||||
|
||||
struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id);
|
||||
struct tp_iolink *node_add_iolink(struct tp_node *node, uint32_t type, uint32_t node_to_id);
|
||||
|
||||
struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32_t gpu_id);
|
||||
struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor);
|
||||
|
||||
void maps_init(struct device_maps *maps);
|
||||
void maps_free(struct device_maps *maps);
|
||||
|
||||
#endif /* __KFD_PLUGIN_TOPOLOGY_H__ */
|
@@ -1,7 +1,43 @@
|
||||
syntax = "proto2";
|
||||
|
||||
message dev_iolink {
|
||||
required uint32 type = 1;
|
||||
required uint32 node_to_id = 2;
|
||||
}
|
||||
|
||||
message device_entry {
|
||||
required uint32 gpu_id = 1;
|
||||
required uint32 node_id = 1;
|
||||
required uint32 gpu_id = 2;
|
||||
required uint32 cpu_cores_count = 3;
|
||||
required uint32 simd_count = 4;
|
||||
required uint32 mem_banks_count = 5;
|
||||
required uint32 caches_count = 6;
|
||||
required uint32 io_links_count = 7;
|
||||
required uint32 max_waves_per_simd = 8;
|
||||
required uint32 lds_size_in_kb = 9;
|
||||
required uint32 gds_size_in_kb = 10;
|
||||
required uint32 num_gws = 11;
|
||||
required uint32 wave_front_size = 12;
|
||||
required uint32 array_count = 13;
|
||||
required uint32 simd_arrays_per_engine = 14;
|
||||
required uint32 cu_per_simd_array = 15;
|
||||
required uint32 simd_per_cu = 16;
|
||||
required uint32 max_slots_scratch_cu = 17;
|
||||
required uint32 vendor_id = 18;
|
||||
required uint32 device_id = 19;
|
||||
required uint32 domain = 20;
|
||||
required uint32 drm_render_minor = 21;
|
||||
required uint64 hive_id = 22;
|
||||
required uint32 num_sdma_engines = 23;
|
||||
required uint32 num_sdma_xgmi_engines = 24;
|
||||
required uint32 num_sdma_queues_per_engine = 25;
|
||||
required uint32 num_cp_queues = 26;
|
||||
required uint32 fw_version = 27;
|
||||
required uint32 capability = 28;
|
||||
required uint32 sdma_fw_version = 29;
|
||||
required uint32 vram_public = 30;
|
||||
required uint64 vram_size = 31;
|
||||
repeated dev_iolink iolinks = 32;
|
||||
}
|
||||
|
||||
message bo_entry {
|
||||
@@ -16,11 +52,12 @@ message bo_entry {
|
||||
message criu_kfd {
|
||||
required uint32 pid = 1;
|
||||
required uint32 num_of_gpus = 2;
|
||||
repeated device_entry device_entries = 3;
|
||||
required uint64 num_of_bos = 4;
|
||||
repeated bo_entry bo_entries = 5;
|
||||
required uint32 num_of_objects = 6;
|
||||
required bytes priv_data = 7;
|
||||
required uint32 num_of_cpus = 3;
|
||||
repeated device_entry device_entries = 4;
|
||||
required uint64 num_of_bos = 5;
|
||||
repeated bo_entry bo_entries = 6;
|
||||
required uint32 num_of_objects = 7;
|
||||
required bytes priv_data = 8;
|
||||
}
|
||||
|
||||
message criu_render_node {
|
||||
|
Reference in New Issue
Block a user