2
0
mirror of https://github.com/Dr-Noob/cpufetch synced 2025-08-31 06:15:08 +00:00

Get number of cores (Intel only) with cpuid. This removes sysconf dependency. This implementation breaks cpufetch in NUMA environments

This commit is contained in:
Dr-Noob
2020-06-20 20:54:38 +02:00
parent 6ad5f65c34
commit 525cf1a76f
3 changed files with 95 additions and 40 deletions

View File

@@ -24,7 +24,7 @@ Peak FLOPS: 512 GFLOP/s(in simple precision)
***/
static const char* VERSION = "0.42";
static const char* VERSION = "0.43";
void print_help(int argc, char *argv[]) {
printf("Usage: %s [--version] [--help] [--style STYLE]\n\
@@ -69,13 +69,17 @@ int main(int argc, char* argv[]) {
if(freq == NULL)
return EXIT_FAILURE;
struct topology* topo = get_topology_info(cpu);
if(topo == NULL)
return EXIT_FAILURE;
struct ascii* art = set_ascii(get_cpu_vendor(cpu),getStyle());
if(art == NULL)
return EXIT_FAILURE;
char* cpuName = get_str_cpu_name();
char* maxFrequency = get_str_freq(freq);
char* nCores = get_str_ncores(cpu);
char* nCores = get_str_topology(topo);
char* avx = get_str_avx(cpu);
char* sse = get_str_sse(cpu);
char* fma = get_str_fma(cpu);
@@ -84,7 +88,7 @@ int main(int argc, char* argv[]) {
char* l1 = get_str_l1(cach);
char* l2 = get_str_l2(cach);
char* l3 = get_str_l3(cach);
char* pp = get_str_peak_performance(cpu,get_freq(freq));
char* pp = get_str_peak_performance(cpu,topo,get_freq(freq));
setAttribute(art,ATTRIBUTE_NAME,cpuName);
setAttribute(art,ATTRIBUTE_FREQUENCY,maxFrequency);

View File

@@ -44,9 +44,6 @@ struct cpuInfo {
VENDOR cpu_vendor;
int nThreads;
// Threads per core
int HT;
// Max cpuids levels
unsigned int maxLevels;
// Max cpuids extended levels
@@ -65,6 +62,13 @@ struct frequency {
long max;
};
struct topology {
int physical_cores;
int logical_cores;
int smt;
bool ht;
};
void init_cpu_info(struct cpuInfo* cpu) {
cpu->AVX = false;
cpu->AVX2 = false;
@@ -129,29 +133,12 @@ struct cpuInfo* get_cpu_info() {
}
//Get max extended level
eax = 0x8000000;
eax = 0x80000000;
ebx = 0;
ecx = 0;
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxExtendedLevels = eax;
//Fill cores and threads
cpu->nThreads = sysconf(_SC_NPROCESSORS_ONLN);
//Always check we can fetch data
if (cpu->maxLevels >= 0x0000000B) {
eax = 0x0000000B;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->HT = ebx & 0xF;
if(cpu->HT == 0) {
//AMD should not work with this, returning 0
//Suppose we have 1
cpu->HT = 1;
}
}
else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Assuming HT is disabled", 0x0000000B, cpu->maxLevels);
cpu->HT = 1;
}
cpu->maxExtendedLevels = eax;
//Fill instructions support
if (cpu->maxLevels >= 0x00000001){
@@ -173,6 +160,7 @@ struct cpuInfo* get_cpu_info() {
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
}
if (cpu->maxLevels >= 0x00000007){
eax = 0x00000007;
ecx = 0x00000000;
@@ -191,6 +179,7 @@ struct cpuInfo* get_cpu_info() {
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000007, cpu->maxLevels);
}
if (cpu->maxExtendedLevels >= 0x80000001){
eax = 0x80000001;
cpuid(&eax, &ebx, &ecx, &edx);
@@ -204,6 +193,65 @@ struct cpuInfo* get_cpu_info() {
return cpu;
}
struct topology* get_topology_info(struct cpuInfo* cpu) {
struct topology* topo = malloc(sizeof(struct cache));
unsigned int eax, ebx, ecx, edx;
int type;
if (cpu->maxLevels >= 0x00000001) {
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
topo->ht = edx & (1 << 28);
}
else {
printWarn("Can't read HT information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Assuming HT is disabled", 0x00000001, cpu->maxLevels);
topo->ht = false;
}
switch(cpu->cpu_vendor) {
case VENDOR_INTEL:
if (cpu->maxLevels >= 0x0000000B) {
//TODO: This idea only works with no NUMA systems
eax = 0x0000000B;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
type = (ecx >> 8) & 0xFF;
if (type != 1) {
printBug("Unexpected type in cpuid 0x0000000B (expected 1, got %d)", type);
return NULL;
}
topo->smt = ebx & 0xFFFF;
eax = 0x0000000B;
ecx = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
type = (ecx >> 8) & 0xFF;
if (type < 2) {
printBug("Unexpected type in cpuid 0x0000000B (expected < 2, got %d)", type);
return NULL;
}
topo->logical_cores = ebx & 0xFFFF;
topo->physical_cores = topo->logical_cores / topo->smt;
}
else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->smt = 1;
}
break;
case VENDOR_AMD:
printBug("Unimplemented!");
break;
default:
printBug("Cant get topology because VENDOR is empty");
return NULL;
}
return topo;
}
// see https://stackoverflow.com/questions/12594208/c-program-to-determine-levels-size-of-cache
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
@@ -333,7 +381,7 @@ void debug_frequency(struct frequency* freq) {
/*** STRING FUNCTIONS ***/
char* get_str_peak_performance(struct cpuInfo* cpu, long freq) {
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, long freq) {
/***
PP = PeakPerformance
SP = SinglePrecision
@@ -358,7 +406,7 @@ char* get_str_peak_performance(struct cpuInfo* cpu, long freq) {
return string;
}
float flops = (cpu->nThreads/cpu->HT)*(freq*1000000)*2;
float flops = topo->physical_cores*(freq*1000000)*2;
if(cpu->FMA3 || cpu->FMA4)
flops = flops*2;
@@ -379,20 +427,19 @@ char* get_str_peak_performance(struct cpuInfo* cpu, long freq) {
return string;
}
char* get_str_ncores(struct cpuInfo* cpu) {
if(cpu->HT > 1) {
char* get_str_topology(struct topology* topo) {
char* string;
if(topo->smt > 1) {
//2(N.Cores)7(' cores(')3(N.Threads)9(' threads)')
int size = 2+7+3+9+1;
char* string = malloc(sizeof(char)*size);
snprintf(string,size,"%d cores(%d threads)",cpu->nThreads/cpu->HT,cpu->nThreads);
return string;
string = malloc(sizeof(char)*size);
snprintf(string,size,"%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
}
else {
char* string = malloc(sizeof(char)*2+7+1);
snprintf(string,2+7+1,"%d cores",cpu->nThreads);
return string;
string = malloc(sizeof(char)*2+7+1);
snprintf(string,2+7+1,"%d cores",topo->physical_cores);
}
return string;
}
char* get_str_avx(struct cpuInfo* cpu) {

View File

@@ -9,6 +9,7 @@
struct cpuInfo;
struct frequency;
struct cache;
struct topology;
typedef int VENDOR;
@@ -17,6 +18,7 @@ VENDOR get_cpu_vendor(struct cpuInfo* cpu);
long get_freq(struct frequency* freq);
struct cache* get_cache_info(struct cpuInfo* cpu);
struct frequency* get_frequency_info(struct cpuInfo* cpu);
struct topology* get_topology_info(struct cpuInfo* cpu);
char* get_str_ncores(struct cpuInfo* cpu);
char* get_str_avx(struct cpuInfo* cpu);
@@ -31,7 +33,9 @@ char* get_str_l3(struct cache* cach);
char* get_str_freq(struct frequency* freq);
char* get_str_peak_performance(struct cpuInfo* cpu, long freq);
char* get_str_topology(struct topology* topo);
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, long freq);
void free_cpuinfo_struct(struct cpuInfo* cpu);
void free_cache_struct(struct cache* cach);