diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go
index a5b5b5d..4c16131 100644
--- a/cpu/cpu_x86.go
+++ b/cpu/cpu_x86.go
@@ -64,6 +64,55 @@
func archInit() {
+ const (
+ // eax bits
+ cpuid_AVXVNNI = 1 << 4
+
+ // ecx bits
+ cpuid_SSE3 = 1 << 0
+ cpuid_PCLMULQDQ = 1 << 1
+ cpuid_AVX512VBMI = 1 << 1
+ cpuid_AVX512VBMI2 = 1 << 6
+ cpuid_SSSE3 = 1 << 9
+ cpuid_AVX512GFNI = 1 << 8
+ cpuid_AVX512VAES = 1 << 9
+ cpuid_AVX512VNNI = 1 << 11
+ cpuid_AVX512BITALG = 1 << 12
+ cpuid_FMA = 1 << 12
+ cpuid_AVX512VPOPCNTDQ = 1 << 14
+ cpuid_SSE41 = 1 << 19
+ cpuid_SSE42 = 1 << 20
+ cpuid_POPCNT = 1 << 23
+ cpuid_AES = 1 << 25
+ cpuid_OSXSAVE = 1 << 27
+ cpuid_AVX = 1 << 28
+
+ // "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0
+ cpuid_BMI1 = 1 << 3
+ cpuid_AVX2 = 1 << 5
+ cpuid_BMI2 = 1 << 8
+ cpuid_ERMS = 1 << 9
+ cpuid_AVX512F = 1 << 16
+ cpuid_AVX512DQ = 1 << 17
+ cpuid_ADX = 1 << 19
+ cpuid_AVX512CD = 1 << 28
+ cpuid_SHA = 1 << 29
+ cpuid_AVX512BW = 1 << 30
+ cpuid_AVX512VL = 1 << 31
+
+ // "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0
+ cpuid_AVX512_VBMI = 1 << 1
+ cpuid_AVX512_VBMI2 = 1 << 6
+ cpuid_GFNI = 1 << 8
+ cpuid_AVX512VPCLMULQDQ = 1 << 10
+ cpuid_AVX512_BITALG = 1 << 12
+
+ // edx bits
+ cpuid_FSRM = 1 << 4
+ // edx bits for CPUID 0x80000001
+ cpuid_RDTSCP = 1 << 27
+ )
+
Initialized = true
maxID, _, _, _ := cpuid(0, 0)
@@ -75,16 +124,16 @@
_, _, ecx1, edx1 := cpuid(1, 0)
X86.HasSSE2 = isSet(edx1, 1<<26)
- X86.HasSSE3 = isSet(ecx1, 1<<0)
- X86.HasPCLMULQDQ = isSet(ecx1, 1<<1)
- X86.HasSSSE3 = isSet(ecx1, 1<<9)
- X86.HasFMA = isSet(ecx1, 1<<12)
+ X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
+ X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
+ X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
+ X86.HasFMA = isSet(ecx1, cpuid_FMA)
X86.HasCX16 = isSet(ecx1, 1<<13)
- X86.HasSSE41 = isSet(ecx1, 1<<19)
- X86.HasSSE42 = isSet(ecx1, 1<<20)
- X86.HasPOPCNT = isSet(ecx1, 1<<23)
- X86.HasAES = isSet(ecx1, 1<<25)
- X86.HasOSXSAVE = isSet(ecx1, 1<<27)
+ X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
+ X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
+ X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
+ X86.HasAES = isSet(ecx1, cpuid_AES)
+ X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
X86.HasRDRAND = isSet(ecx1, 1<<30)
var osSupportsAVX, osSupportsAVX512 bool
@@ -103,40 +152,40 @@
}
}
- X86.HasAVX = isSet(ecx1, 1<<28) && osSupportsAVX
+ X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
if maxID < 7 {
return
}
eax7, ebx7, ecx7, edx7 := cpuid(7, 0)
- X86.HasBMI1 = isSet(ebx7, 1<<3)
- X86.HasAVX2 = isSet(ebx7, 1<<5) && osSupportsAVX
- X86.HasBMI2 = isSet(ebx7, 1<<8)
- X86.HasERMS = isSet(ebx7, 1<<9)
+ X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
+ X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
+ X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
+ X86.HasERMS = isSet(ebx7, cpuid_ERMS)
X86.HasRDSEED = isSet(ebx7, 1<<18)
- X86.HasADX = isSet(ebx7, 1<<19)
+ X86.HasADX = isSet(ebx7, cpuid_ADX)
- X86.HasAVX512 = isSet(ebx7, 1<<16) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
+ X86.HasAVX512 = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
if X86.HasAVX512 {
X86.HasAVX512F = true
- X86.HasAVX512CD = isSet(ebx7, 1<<28)
+ X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD)
X86.HasAVX512ER = isSet(ebx7, 1<<27)
X86.HasAVX512PF = isSet(ebx7, 1<<26)
- X86.HasAVX512VL = isSet(ebx7, 1<<31)
- X86.HasAVX512BW = isSet(ebx7, 1<<30)
- X86.HasAVX512DQ = isSet(ebx7, 1<<17)
+ X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
+ X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
+ X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
X86.HasAVX512IFMA = isSet(ebx7, 1<<21)
- X86.HasAVX512VBMI = isSet(ecx7, 1<<1)
+ X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
X86.HasAVX5124VNNIW = isSet(edx7, 1<<2)
X86.HasAVX5124FMAPS = isSet(edx7, 1<<3)
- X86.HasAVX512VPOPCNTDQ = isSet(ecx7, 1<<14)
- X86.HasAVX512VPCLMULQDQ = isSet(ecx7, 1<<10)
- X86.HasAVX512VNNI = isSet(ecx7, 1<<11)
- X86.HasAVX512GFNI = isSet(ecx7, 1<<8)
- X86.HasAVX512VAES = isSet(ecx7, 1<<9)
- X86.HasAVX512VBMI2 = isSet(ecx7, 1<<6)
- X86.HasAVX512BITALG = isSet(ecx7, 1<<12)
+ X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
+ X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
+ X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
+ X86.HasAVX512GFNI = isSet(ecx7, cpuid_AVX512GFNI)
+ X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
+ X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
+ X86.HasAVX512BITALG = isSet(ecx7, cpuid_AVX512BITALG)
}
X86.HasAMXTile = isSet(edx7, 1<<24)
@@ -151,7 +200,7 @@
}
if X86.HasAVX {
X86.HasAVXIFMA = isSet(eax71, 1<<23)
- X86.HasAVXVNNI = isSet(eax71, 1<<4)
+ X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
X86.HasAVXVNNIInt8 = isSet(edx71, 1<<4)
}
}