diff --git a/src/main.c b/src/main.c index d650ed8..fb3f682 100644 --- a/src/main.c +++ b/src/main.c @@ -24,7 +24,7 @@ Peak FLOPS: 512 GFLOP/s(in simple precision) ***/ -static const char* VERSION = "0.42"; +static const char* VERSION = "0.43"; void print_help(int argc, char *argv[]) { printf("Usage: %s [--version] [--help] [--style STYLE]\n\ @@ -69,13 +69,17 @@ int main(int argc, char* argv[]) { if(freq == NULL) return EXIT_FAILURE; + struct topology* topo = get_topology_info(cpu); + if(topo == NULL) + return EXIT_FAILURE; + struct ascii* art = set_ascii(get_cpu_vendor(cpu),getStyle()); if(art == NULL) return EXIT_FAILURE; char* cpuName = get_str_cpu_name(); char* maxFrequency = get_str_freq(freq); - char* nCores = get_str_ncores(cpu); + char* nCores = get_str_topology(topo); char* avx = get_str_avx(cpu); char* sse = get_str_sse(cpu); char* fma = get_str_fma(cpu); @@ -84,7 +88,7 @@ int main(int argc, char* argv[]) { char* l1 = get_str_l1(cach); char* l2 = get_str_l2(cach); char* l3 = get_str_l3(cach); - char* pp = get_str_peak_performance(cpu,get_freq(freq)); + char* pp = get_str_peak_performance(cpu,topo,get_freq(freq)); setAttribute(art,ATTRIBUTE_NAME,cpuName); setAttribute(art,ATTRIBUTE_FREQUENCY,maxFrequency); diff --git a/src/standart.c b/src/standart.c index ad0c03a..e3a9dc5 100644 --- a/src/standart.c +++ b/src/standart.c @@ -44,9 +44,6 @@ struct cpuInfo { VENDOR cpu_vendor; - int nThreads; - // Threads per core - int HT; // Max cpuids levels unsigned int maxLevels; // Max cpuids extended levels @@ -65,6 +62,13 @@ struct frequency { long max; }; +struct topology { + int physical_cores; + int logical_cores; + int smt; + bool ht; +}; + void init_cpu_info(struct cpuInfo* cpu) { cpu->AVX = false; cpu->AVX2 = false; @@ -129,29 +133,12 @@ struct cpuInfo* get_cpu_info() { } //Get max extended level - eax = 0x8000000; + eax = 0x80000000; + ebx = 0; + ecx = 0; + edx = 0; cpuid(&eax, &ebx, &ecx, &edx); - cpu->maxExtendedLevels = eax; - - //Fill cores and threads - cpu->nThreads = sysconf(_SC_NPROCESSORS_ONLN); - - //Always check we can fetch data - if (cpu->maxLevels >= 0x0000000B) { - eax = 0x0000000B; - ecx = 0x00000000; - cpuid(&eax, &ebx, &ecx, &edx); - cpu->HT = ebx & 0xF; - if(cpu->HT == 0) { - //AMD should not work with this, returning 0 - //Suppose we have 1 - cpu->HT = 1; - } - } - else { - printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Assuming HT is disabled", 0x0000000B, cpu->maxLevels); - cpu->HT = 1; - } + cpu->maxExtendedLevels = eax; //Fill instructions support if (cpu->maxLevels >= 0x00000001){ @@ -173,6 +160,7 @@ struct cpuInfo* get_cpu_info() { else { printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); } + if (cpu->maxLevels >= 0x00000007){ eax = 0x00000007; ecx = 0x00000000; @@ -191,6 +179,7 @@ struct cpuInfo* get_cpu_info() { else { printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000007, cpu->maxLevels); } + if (cpu->maxExtendedLevels >= 0x80000001){ eax = 0x80000001; cpuid(&eax, &ebx, &ecx, &edx); @@ -204,6 +193,65 @@ struct cpuInfo* get_cpu_info() { return cpu; } +struct topology* get_topology_info(struct cpuInfo* cpu) { + struct topology* topo = malloc(sizeof(struct cache)); + unsigned int eax, ebx, ecx, edx; + int type; + + if (cpu->maxLevels >= 0x00000001) { + eax = 0x00000001; + cpuid(&eax, &ebx, &ecx, &edx); + topo->ht = edx & (1 << 28); + } + else { + printWarn("Can't read HT information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Assuming HT is disabled", 0x00000001, cpu->maxLevels); + topo->ht = false; + } + + switch(cpu->cpu_vendor) { + case VENDOR_INTEL: + if (cpu->maxLevels >= 0x0000000B) { + //TODO: This idea only works with no NUMA systems + eax = 0x0000000B; + ecx = 0x00000000; + cpuid(&eax, &ebx, &ecx, &edx); + type = (ecx >> 8) & 0xFF; + if (type != 1) { + printBug("Unexpected type in cpuid 0x0000000B (expected 1, got %d)", type); + return NULL; + } + topo->smt = ebx & 0xFFFF; + + + eax = 0x0000000B; + ecx = 0x00000001; + cpuid(&eax, &ebx, &ecx, &edx); + type = (ecx >> 8) & 0xFF; + if (type < 2) { + printBug("Unexpected type in cpuid 0x0000000B (expected < 2, got %d)", type); + return NULL; + } + topo->logical_cores = ebx & 0xFFFF; + topo->physical_cores = topo->logical_cores / topo->smt; + } + else { + printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels); + topo->physical_cores = 1; + topo->logical_cores = 1; + topo->smt = 1; + } + break; + case VENDOR_AMD: + printBug("Unimplemented!"); + break; + default: + printBug("Cant get topology because VENDOR is empty"); + return NULL; + } + + return topo; +} + // see https://stackoverflow.com/questions/12594208/c-program-to-determine-levels-size-of-cache struct cache* get_cache_info(struct cpuInfo* cpu) { struct cache* cach = malloc(sizeof(struct cache)); @@ -333,7 +381,7 @@ void debug_frequency(struct frequency* freq) { /*** STRING FUNCTIONS ***/ -char* get_str_peak_performance(struct cpuInfo* cpu, long freq) { +char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, long freq) { /*** PP = PeakPerformance SP = SinglePrecision @@ -358,7 +406,7 @@ char* get_str_peak_performance(struct cpuInfo* cpu, long freq) { return string; } - float flops = (cpu->nThreads/cpu->HT)*(freq*1000000)*2; + float flops = topo->physical_cores*(freq*1000000)*2; if(cpu->FMA3 || cpu->FMA4) flops = flops*2; @@ -379,20 +427,19 @@ char* get_str_peak_performance(struct cpuInfo* cpu, long freq) { return string; } -char* get_str_ncores(struct cpuInfo* cpu) { - if(cpu->HT > 1) { +char* get_str_topology(struct topology* topo) { + char* string; + if(topo->smt > 1) { //2(N.Cores)7(' cores(')3(N.Threads)9(' threads)') int size = 2+7+3+9+1; - char* string = malloc(sizeof(char)*size); - snprintf(string,size,"%d cores(%d threads)",cpu->nThreads/cpu->HT,cpu->nThreads); - return string; + string = malloc(sizeof(char)*size); + snprintf(string,size,"%d cores (%d threads)",topo->physical_cores,topo->logical_cores); } else { - char* string = malloc(sizeof(char)*2+7+1); - snprintf(string,2+7+1,"%d cores",cpu->nThreads); - return string; + string = malloc(sizeof(char)*2+7+1); + snprintf(string,2+7+1,"%d cores",topo->physical_cores); } - + return string; } char* get_str_avx(struct cpuInfo* cpu) { diff --git a/src/standart.h b/src/standart.h index 4086d9c..2eb131d 100644 --- a/src/standart.h +++ b/src/standart.h @@ -9,6 +9,7 @@ struct cpuInfo; struct frequency; struct cache; +struct topology; typedef int VENDOR; @@ -17,6 +18,7 @@ VENDOR get_cpu_vendor(struct cpuInfo* cpu); long get_freq(struct frequency* freq); struct cache* get_cache_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu); +struct topology* get_topology_info(struct cpuInfo* cpu); char* get_str_ncores(struct cpuInfo* cpu); char* get_str_avx(struct cpuInfo* cpu); @@ -31,7 +33,9 @@ char* get_str_l3(struct cache* cach); char* get_str_freq(struct frequency* freq); -char* get_str_peak_performance(struct cpuInfo* cpu, long freq); +char* get_str_topology(struct topology* topo); + +char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, long freq); void free_cpuinfo_struct(struct cpuInfo* cpu); void free_cache_struct(struct cache* cach);