mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-31 14:25:49 +00:00
x86: cpu -- Detect and save fpu status in info
- externd compel_cpuinfo_t to keep all fpu information neded for xsaves mode - fetch xsaves data in compel_cpuid All this will allow us to extend criu to support avx-512 intructions. Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
This commit is contained in:
committed by
Andrei Vagin
parent
f0fb0bc964
commit
3c4dfb6ef4
@@ -6,6 +6,7 @@
|
||||
#include "common/compiler.h"
|
||||
|
||||
#include "log.h"
|
||||
#include "common/bug.h"
|
||||
|
||||
#undef LOG_PREFIX
|
||||
#define LOG_PREFIX "cpu: "
|
||||
@@ -13,6 +14,40 @@
|
||||
static compel_cpuinfo_t rt_info;
|
||||
static bool rt_info_done = false;
|
||||
|
||||
/*
|
||||
* Although we spell it out in here, the Processor Trace
|
||||
* xfeature is completely unused. We use other mechanisms
|
||||
* to save/restore PT state in Linux.
|
||||
*/
|
||||
|
||||
static const char * const xfeature_names[] = {
|
||||
"x87 floating point registers" ,
|
||||
"SSE registers" ,
|
||||
"AVX registers" ,
|
||||
"MPX bounds registers" ,
|
||||
"MPX CSR" ,
|
||||
"AVX-512 opmask" ,
|
||||
"AVX-512 Hi256" ,
|
||||
"AVX-512 ZMM_Hi256" ,
|
||||
"Processor Trace" ,
|
||||
"Protection Keys User registers",
|
||||
"Hardware Duty Cycling" ,
|
||||
};
|
||||
|
||||
static short xsave_cpuid_features[] = {
|
||||
X86_FEATURE_FPU,
|
||||
X86_FEATURE_XMM,
|
||||
X86_FEATURE_AVX,
|
||||
X86_FEATURE_MPX,
|
||||
X86_FEATURE_MPX,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_INTEL_PT,
|
||||
X86_FEATURE_PKU,
|
||||
X86_FEATURE_HDC,
|
||||
};
|
||||
|
||||
void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
|
||||
{
|
||||
if (likely(feature < NCAPINTS_BITS))
|
||||
@@ -32,6 +67,172 @@ int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compel_fpuid(compel_cpuinfo_t *c)
|
||||
{
|
||||
unsigned int last_good_offset;
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
size_t i;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(xsave_cpuid_features) !=
|
||||
ARRAY_SIZE(xfeature_names));
|
||||
|
||||
if (!compel_test_cpu_cap(c, X86_FEATURE_FPU)) {
|
||||
pr_err("fpu: No FPU detected\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVE)) {
|
||||
pr_info("fpu: x87 FPU will use %s\n",
|
||||
compel_test_cpu_cap(c, X86_FEATURE_FXSR) ?
|
||||
"FXSAVE" : "FSAVE");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
c->xfeatures_mask = eax + ((uint64_t)edx << 32);
|
||||
|
||||
if ((c->xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
|
||||
/*
|
||||
* This indicates that something really unexpected happened
|
||||
* with the enumeration.
|
||||
*/
|
||||
pr_err("fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx\n",
|
||||
(unsigned long long)c->xfeatures_mask);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear XSAVE features that are disabled in the normal CPUID.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||
if (!compel_test_cpu_cap(c, xsave_cpuid_features[i]))
|
||||
c->xfeatures_mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
c->xfeatures_mask &= XCNTXT_MASK;
|
||||
c->xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
|
||||
|
||||
/*
|
||||
* xsaves is not enabled in userspace, so
|
||||
* xsaves is mostly for debug purpose.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
c->xsave_size = ebx;
|
||||
c->xsave_size_max = ecx;
|
||||
|
||||
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
|
||||
c->xsaves_size = ebx;
|
||||
|
||||
pr_debug("fpu: xfeatures_mask 0x%llx xsave_size %u xsave_size_max %u xsaves_size %u\n",
|
||||
(unsigned long long)c->xfeatures_mask,
|
||||
c->xsave_size, c->xsave_size_max, c->xsaves_size);
|
||||
|
||||
if (c->xsave_size_max > sizeof(struct xsave_struct))
|
||||
pr_warn_once("fpu: max xsave frame exceed xsave_struct (%u %u)\n",
|
||||
c->xsave_size_max, (unsigned)sizeof(struct xsave_struct));
|
||||
|
||||
memset(c->xstate_offsets, 0xff, sizeof(c->xstate_offsets));
|
||||
memset(c->xstate_sizes, 0xff, sizeof(c->xstate_sizes));
|
||||
memset(c->xstate_comp_offsets, 0xff, sizeof(c->xstate_comp_offsets));
|
||||
memset(c->xstate_comp_sizes, 0xff, sizeof(c->xstate_comp_sizes));
|
||||
|
||||
/* start at the beginnning of the "extended state" */
|
||||
last_good_offset = offsetof(struct xsave_struct, extended_state_area);
|
||||
|
||||
/*
|
||||
* The FP xstates and SSE xstates are legacy states. They are always
|
||||
* in the fixed offsets in the xsave area in either compacted form
|
||||
* or standard form.
|
||||
*/
|
||||
c->xstate_offsets[0] = 0;
|
||||
c->xstate_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
|
||||
c->xstate_offsets[1] = c->xstate_sizes[0];
|
||||
c->xstate_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
|
||||
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (!(c->xfeatures_mask & (1UL << i)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If an xfeature is supervisor state, the offset
|
||||
* in EBX is invalid. We leave it to -1.
|
||||
*
|
||||
* SDM says: If state component 'i' is a user state component,
|
||||
* ECX[0] return 0; if state component i is a supervisor
|
||||
* state component, ECX[0] returns 1.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
if (!(ecx & 1))
|
||||
c->xstate_offsets[i] = ebx;
|
||||
|
||||
c->xstate_sizes[i] = eax;
|
||||
|
||||
/*
|
||||
* In our xstate size checks, we assume that the
|
||||
* highest-numbered xstate feature has the
|
||||
* highest offset in the buffer. Ensure it does.
|
||||
*/
|
||||
if (last_good_offset > c->xstate_offsets[i])
|
||||
pr_warn_once("fpu: misordered xstate %d %d\n",
|
||||
last_good_offset, c->xstate_offsets[i]);
|
||||
|
||||
last_good_offset = c->xstate_offsets[i];
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(sizeof(c->xstate_offsets) != sizeof(c->xstate_sizes));
|
||||
BUILD_BUG_ON(sizeof(c->xstate_comp_offsets) != sizeof(c->xstate_comp_sizes));
|
||||
|
||||
c->xstate_comp_offsets[0] = 0;
|
||||
c->xstate_comp_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
|
||||
c->xstate_comp_offsets[1] = c->xstate_comp_sizes[0];
|
||||
c->xstate_comp_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
|
||||
|
||||
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVES)) {
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if ((c->xfeatures_mask & (1UL << i))) {
|
||||
c->xstate_comp_offsets[i] = c->xstate_offsets[i];
|
||||
c->xstate_comp_sizes[i] = c->xstate_sizes[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
c->xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
|
||||
FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if ((c->xfeatures_mask & (1UL << i)))
|
||||
c->xstate_comp_sizes[i] = c->xstate_sizes[i];
|
||||
else
|
||||
c->xstate_comp_sizes[i] = 0;
|
||||
|
||||
if (i > FIRST_EXTENDED_XFEATURE) {
|
||||
c->xstate_comp_offsets[i] = c->xstate_comp_offsets[i-1]
|
||||
+ c->xstate_comp_sizes[i-1];
|
||||
|
||||
/*
|
||||
* The value returned by ECX[1] indicates the alignment
|
||||
* of state component 'i' when the compacted format
|
||||
* of the extended region of an XSAVE area is used:
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
if (ecx & 2)
|
||||
c->xstate_comp_offsets[i] = ALIGN(c->xstate_comp_offsets[i], 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!pr_quelled(COMPEL_LOG_DEBUG)) {
|
||||
for (i = 0; i < ARRAY_SIZE(c->xstate_offsets); i++) {
|
||||
if (!(c->xfeatures_mask & (1UL << i)))
|
||||
continue;
|
||||
pr_debug("fpu: %-32s xstate_offsets %6d / %-6d xstate_sizes %6d / %-6d\n",
|
||||
xfeature_names[i], c->xstate_offsets[i], c->xstate_comp_offsets[i],
|
||||
c->xstate_sizes[i], c->xstate_comp_sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int compel_cpuid(compel_cpuinfo_t *c)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
@@ -222,7 +423,10 @@ int compel_cpuid(compel_cpuinfo_t *c)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
pr_debug("x86_family %u x86_vendor_id %s x86_model_id %s\n",
|
||||
c->x86_family, c->x86_vendor_id, c->x86_model_id);
|
||||
|
||||
return compel_fpuid(c);
|
||||
}
|
||||
|
||||
bool compel_cpu_has_feature(unsigned int feature)
|
||||
|
@@ -3,6 +3,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <compel/asm/fpu.h>
|
||||
|
||||
/*
|
||||
* Adopted from linux kernel and enhanced from Intel/AMD manuals.
|
||||
* Note these bits are not ABI for linux kernel but they _are_
|
||||
@@ -277,6 +279,7 @@ enum cpuid_leafs {
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
#define X86_FEATURE_HDC (14*32+13) /* HDC base registers present */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
@@ -318,6 +321,7 @@ enum {
|
||||
};
|
||||
|
||||
struct cpuinfo_x86 {
|
||||
/* cpu context */
|
||||
uint8_t x86_family;
|
||||
uint8_t x86_vendor;
|
||||
uint8_t x86_model;
|
||||
@@ -328,6 +332,17 @@ struct cpuinfo_x86 {
|
||||
int cpuid_level;
|
||||
char x86_vendor_id[16];
|
||||
char x86_model_id[64];
|
||||
|
||||
/* fpu context */
|
||||
uint64_t xfeatures_mask;
|
||||
uint32_t xsave_size_max;
|
||||
uint32_t xsave_size;
|
||||
uint32_t xstate_offsets[XFEATURE_MAX];
|
||||
uint32_t xstate_sizes[XFEATURE_MAX];
|
||||
|
||||
uint32_t xsaves_size;
|
||||
uint32_t xstate_comp_offsets[XFEATURE_MAX];
|
||||
uint32_t xstate_comp_sizes[XFEATURE_MAX];
|
||||
};
|
||||
|
||||
typedef struct cpuinfo_x86 compel_cpuinfo_t;
|
||||
|
@@ -19,7 +19,66 @@
|
||||
#define XSTATE_YMM 0x4
|
||||
|
||||
#define FXSAVE_SIZE 512
|
||||
#define XSAVE_SIZE 832
|
||||
#define XSAVE_SIZE 4096
|
||||
|
||||
#define XSAVE_HDR_SIZE 64
|
||||
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
|
||||
|
||||
#define XSAVE_YMM_SIZE 256
|
||||
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
||||
|
||||
/*
|
||||
* List of XSAVE features Linux knows about:
|
||||
*/
|
||||
enum xfeature {
|
||||
XFEATURE_FP,
|
||||
XFEATURE_SSE,
|
||||
/*
|
||||
* Values above here are "legacy states".
|
||||
* Those below are "extended states".
|
||||
*/
|
||||
XFEATURE_YMM,
|
||||
XFEATURE_BNDREGS,
|
||||
XFEATURE_BNDCSR,
|
||||
XFEATURE_OPMASK,
|
||||
XFEATURE_ZMM_Hi256,
|
||||
XFEATURE_Hi16_ZMM,
|
||||
XFEATURE_PT,
|
||||
XFEATURE_PKRU,
|
||||
XFEATURE_HDC,
|
||||
|
||||
XFEATURE_MAX,
|
||||
};
|
||||
|
||||
#define XSTATE_CPUID 0x0000000d
|
||||
|
||||
#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
|
||||
#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
|
||||
#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
|
||||
#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
|
||||
#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
|
||||
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
|
||||
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
|
||||
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_PT (1 << XFEATURE_PT)
|
||||
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
|
||||
#define XFEATURE_MASK_HDC (1 << XFEATURE_HDC)
|
||||
|
||||
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
|
||||
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
|
||||
|
||||
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
|
||||
|
||||
/* Supervisor features */
|
||||
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT | XFEATURE_HDC)
|
||||
|
||||
/* All currently supported features */
|
||||
#define XCNTXT_MASK \
|
||||
(XFEATURE_MASK_FP | XFEATURE_MASK_SSE | \
|
||||
XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | \
|
||||
XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM | \
|
||||
XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | \
|
||||
XFEATURE_MASK_BNDCSR)
|
||||
|
||||
struct fpx_sw_bytes {
|
||||
uint32_t magic1;
|
||||
@@ -66,27 +125,141 @@ struct i387_fxsave_struct {
|
||||
|
||||
struct xsave_hdr_struct {
|
||||
uint64_t xstate_bv;
|
||||
uint64_t reserved1[2];
|
||||
uint64_t reserved2[5];
|
||||
uint64_t xcomp_bv;
|
||||
uint64_t reserved[6];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* xstate_header.xcomp_bv[63] indicates that the extended_state_area
|
||||
* is in compacted format.
|
||||
*/
|
||||
#define XCOMP_BV_COMPACTED_FORMAT ((uint64_t)1 << 63)
|
||||
|
||||
/*
|
||||
* State component 2:
|
||||
*
|
||||
* There are 16x 256-bit AVX registers named YMM0-YMM15.
|
||||
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
|
||||
* and are stored in 'struct fxregs_state::xmm_space[]' in the
|
||||
* "legacy" area.
|
||||
*
|
||||
* The high 128 bits are stored here.
|
||||
*/
|
||||
struct ymmh_struct {
|
||||
uint32_t ymmh_space[64];
|
||||
uint32_t ymmh_space[64];
|
||||
} __packed;
|
||||
|
||||
/* Intel MPX support: */
|
||||
|
||||
struct mpx_bndreg {
|
||||
uint64_t lower_bound;
|
||||
uint64_t upper_bound;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 3 is used for the 4 128-bit bounds registers
|
||||
*/
|
||||
struct mpx_bndreg_state {
|
||||
struct mpx_bndreg bndreg[4];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 4 is used for the 64-bit user-mode MPX
|
||||
* configuration register BNDCFGU and the 64-bit MPX status
|
||||
* register BNDSTATUS. We call the pair "BNDCSR".
|
||||
*/
|
||||
struct mpx_bndcsr {
|
||||
uint64_t bndcfgu;
|
||||
uint64_t bndstatus;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The BNDCSR state is padded out to be 64-bytes in size.
|
||||
*/
|
||||
struct mpx_bndcsr_state {
|
||||
union {
|
||||
struct mpx_bndcsr bndcsr;
|
||||
uint8_t pad_to_64_bytes[64];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* AVX-512 Components: */
|
||||
|
||||
/*
|
||||
* State component 5 is used for the 8 64-bit opmask registers
|
||||
* k0-k7 (opmask state).
|
||||
*/
|
||||
struct avx_512_opmask_state {
|
||||
uint64_t opmask_reg[8];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 6 is used for the upper 256 bits of the
|
||||
* registers ZMM0-ZMM15. These 16 256-bit values are denoted
|
||||
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
|
||||
*/
|
||||
struct avx_512_zmm_uppers_state {
|
||||
uint64_t zmm_upper[16 * 4];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 7 is used for the 16 512-bit registers
|
||||
* ZMM16-ZMM31 (Hi16_ZMM state).
|
||||
*/
|
||||
struct avx_512_hi16_state {
|
||||
uint64_t hi16_zmm[16 * 8];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 9: 32-bit PKRU register. The state is
|
||||
* 8 bytes long but only 4 bytes is used currently.
|
||||
*/
|
||||
struct pkru_state {
|
||||
uint32_t pkru;
|
||||
uint32_t pad;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is our most modern FPU state format, as saved by the XSAVE
|
||||
* and restored by the XRSTOR instructions.
|
||||
*
|
||||
* It consists of a legacy fxregs portion, an xstate header and
|
||||
* subsequent areas as defined by the xstate header. Not all CPUs
|
||||
* support all the extensions, so the size of the extended area
|
||||
* can vary quite a bit between CPUs.
|
||||
*
|
||||
*
|
||||
* One page should be enough for the whole xsave state.
|
||||
*/
|
||||
#define EXTENDED_STATE_AREA_SIZE (4096 - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct))
|
||||
|
||||
/*
|
||||
* cpu requires it to be 64 byte aligned
|
||||
*/
|
||||
struct xsave_struct {
|
||||
struct i387_fxsave_struct i387;
|
||||
struct xsave_hdr_struct xsave_hdr;
|
||||
struct ymmh_struct ymmh;
|
||||
union {
|
||||
/*
|
||||
* This ymmh is unndeed, for
|
||||
* backward compatibility.
|
||||
*/
|
||||
struct ymmh_struct ymmh;
|
||||
uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
|
||||
};
|
||||
} __aligned(FP_MIN_ALIGN_BYTES) __packed;
|
||||
|
||||
struct xsave_struct_ia32 {
|
||||
struct i387_fxsave_struct i387;
|
||||
struct xsave_hdr_struct xsave_hdr;
|
||||
struct ymmh_struct ymmh;
|
||||
union {
|
||||
/*
|
||||
* This ymmh is unndeed, for
|
||||
* backward compatibility.
|
||||
*/
|
||||
struct ymmh_struct ymmh;
|
||||
uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
|
||||
};
|
||||
} __aligned(FXSAVE_ALIGN_BYTES) __packed;
|
||||
|
||||
typedef struct {
|
||||
|
Reference in New Issue
Block a user