cpuid.go 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. // Generated, DO NOT EDIT,
  2. // but copy it to your own project and rename the package.
  3. // See more at http://github.com/klauspost/cpuid
  4. package cpuid
  5. import "strings"
  6. // Vendor is a representation of a CPU vendor.
  7. type vendor int
  8. const (
  9. other vendor = iota
  10. intel
  11. amd
  12. via
  13. transmeta
  14. nsc
  15. kvm // Kernel-based Virtual Machine
  16. msvm // Microsoft Hyper-V or Windows Virtual PC
  17. vmware
  18. xenhvm
  19. )
  20. const (
  21. cmov = 1 << iota // i686 CMOV
  22. nx // NX (No-Execute) bit
  23. amd3dnow // AMD 3DNOW
  24. amd3dnowext // AMD 3DNowExt
  25. mmx // standard MMX
  26. mmxext // SSE integer functions or AMD MMX ext
  27. sse // SSE functions
  28. sse2 // P4 SSE functions
  29. sse3 // Prescott SSE3 functions
  30. ssse3 // Conroe SSSE3 functions
  31. sse4 // Penryn SSE4.1 functions
  32. sse4a // AMD Barcelona microarchitecture SSE4a instructions
  33. sse42 // Nehalem SSE4.2 functions
  34. avx // AVX functions
  35. avx2 // AVX2 functions
  36. fma3 // Intel FMA 3
  37. fma4 // Bulldozer FMA4 functions
  38. xop // Bulldozer XOP functions
  39. f16c // Half-precision floating-point conversion
  40. bmi1 // Bit Manipulation Instruction Set 1
  41. bmi2 // Bit Manipulation Instruction Set 2
  42. tbm // AMD Trailing Bit Manipulation
  43. lzcnt // LZCNT instruction
  44. popcnt // POPCNT instruction
  45. aesni // Advanced Encryption Standard New Instructions
  46. clmul // Carry-less Multiplication
  47. htt // Hyperthreading (enabled)
  48. hle // Hardware Lock Elision
  49. rtm // Restricted Transactional Memory
  50. rdrand // RDRAND instruction is available
  51. rdseed // RDSEED instruction is available
  52. adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  53. sha // Intel SHA Extensions
  54. avx512f // AVX-512 Foundation
  55. avx512dq // AVX-512 Doubleword and Quadword Instructions
  56. avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions
  57. avx512pf // AVX-512 Prefetch Instructions
  58. avx512er // AVX-512 Exponential and Reciprocal Instructions
  59. avx512cd // AVX-512 Conflict Detection Instructions
  60. avx512bw // AVX-512 Byte and Word Instructions
  61. avx512vl // AVX-512 Vector Length Extensions
  62. avx512vbmi // AVX-512 Vector Bit Manipulation Instructions
  63. mpx // Intel MPX (Memory Protection Extensions)
  64. erms // Enhanced REP MOVSB/STOSB
  65. rdtscp // RDTSCP Instruction
  66. cx16 // CMPXCHG16B Instruction
  67. sgx // Software Guard Extensions
  68. ibpb // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
  69. stibp // Single Thread Indirect Branch Predictors
  70. // Performance indicators
  71. sse2slow // SSE2 is supported, but usually not faster
  72. sse3slow // SSE3 is supported, but usually not faster
  73. atom // Atom processor, some SSSE3 instructions are slower
  74. )
  75. var flagNames = map[flags]string{
  76. cmov: "CMOV", // i686 CMOV
  77. nx: "NX", // NX (No-Execute) bit
  78. amd3dnow: "AMD3DNOW", // AMD 3DNOW
  79. amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt
  80. mmx: "MMX", // Standard MMX
  81. mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext
  82. sse: "SSE", // SSE functions
  83. sse2: "SSE2", // P4 SSE2 functions
  84. sse3: "SSE3", // Prescott SSE3 functions
  85. ssse3: "SSSE3", // Conroe SSSE3 functions
  86. sse4: "SSE4.1", // Penryn SSE4.1 functions
  87. sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  88. sse42: "SSE4.2", // Nehalem SSE4.2 functions
  89. avx: "AVX", // AVX functions
  90. avx2: "AVX2", // AVX functions
  91. fma3: "FMA3", // Intel FMA 3
  92. fma4: "FMA4", // Bulldozer FMA4 functions
  93. xop: "XOP", // Bulldozer XOP functions
  94. f16c: "F16C", // Half-precision floating-point conversion
  95. bmi1: "BMI1", // Bit Manipulation Instruction Set 1
  96. bmi2: "BMI2", // Bit Manipulation Instruction Set 2
  97. tbm: "TBM", // AMD Trailing Bit Manipulation
  98. lzcnt: "LZCNT", // LZCNT instruction
  99. popcnt: "POPCNT", // POPCNT instruction
  100. aesni: "AESNI", // Advanced Encryption Standard New Instructions
  101. clmul: "CLMUL", // Carry-less Multiplication
  102. htt: "HTT", // Hyperthreading (enabled)
  103. hle: "HLE", // Hardware Lock Elision
  104. rtm: "RTM", // Restricted Transactional Memory
  105. rdrand: "RDRAND", // RDRAND instruction is available
  106. rdseed: "RDSEED", // RDSEED instruction is available
  107. adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  108. sha: "SHA", // Intel SHA Extensions
  109. avx512f: "AVX512F", // AVX-512 Foundation
  110. avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  111. avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  112. avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions
  113. avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  114. avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions
  115. avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions
  116. avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions
  117. avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  118. mpx: "MPX", // Intel MPX (Memory Protection Extensions)
  119. erms: "ERMS", // Enhanced REP MOVSB/STOSB
  120. rdtscp: "RDTSCP", // RDTSCP Instruction
  121. cx16: "CX16", // CMPXCHG16B Instruction
  122. sgx: "SGX", // Software Guard Extensions
  123. ibpb: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
  124. stibp: "STIBP", // Single Thread Indirect Branch Predictors
  125. // Performance indicators
  126. sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
  127. sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
  128. atom: "ATOM", // Atom processor, some SSSE3 instructions are slower
  129. }
  130. // CPUInfo contains information about the detected system CPU.
  131. type cpuInfo struct {
  132. brandname string // Brand name reported by the CPU
  133. vendorid vendor // Comparable CPU vendor ID
  134. features flags // Features of the CPU
  135. physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  136. threadspercore int // Number of threads per physical core. Will be 1 if undetectable.
  137. logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  138. family int // CPU family number
  139. model int // CPU model number
  140. cacheline int // Cache line size in bytes. Will be 0 if undetectable.
  141. cache struct {
  142. l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  143. l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
  144. l2 int // L2 Cache (per core or shared). Will be -1 if undetected
  145. l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  146. }
  147. sgx sgxsupport
  148. maxFunc uint32
  149. maxExFunc uint32
  150. }
  151. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  152. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  153. var xgetbv func(index uint32) (eax, edx uint32)
  154. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  155. // CPU contains information about the CPU as detected on startup,
  156. // or when Detect last was called.
  157. //
  158. // Use this as the primary entry point to you data,
  159. // this way queries are
  160. var cpu cpuInfo
  161. func init() {
  162. initCPU()
  163. detect()
  164. }
  165. // Detect will re-detect current CPU info.
  166. // This will replace the content of the exported CPU variable.
  167. //
  168. // Unless you expect the CPU to change while you are running your program
  169. // you should not need to call this function.
  170. // If you call this, you must ensure that no other goroutine is accessing the
  171. // exported CPU variable.
  172. func detect() {
  173. cpu.maxFunc = maxFunctionID()
  174. cpu.maxExFunc = maxExtendedFunction()
  175. cpu.brandname = brandName()
  176. cpu.cacheline = cacheLine()
  177. cpu.family, cpu.model = familyModel()
  178. cpu.features = support()
  179. cpu.sgx = hasSGX(cpu.features&sgx != 0)
  180. cpu.threadspercore = threadsPerCore()
  181. cpu.logicalcores = logicalCores()
  182. cpu.physicalcores = physicalCores()
  183. cpu.vendorid = vendorID()
  184. cpu.cacheSize()
  185. }
  186. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  187. // Cmov indicates support of CMOV instructions
  188. func (c cpuInfo) cmov() bool {
  189. return c.features&cmov != 0
  190. }
  191. // Amd3dnow indicates support of AMD 3DNOW! instructions
  192. func (c cpuInfo) amd3dnow() bool {
  193. return c.features&amd3dnow != 0
  194. }
  195. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  196. func (c cpuInfo) amd3dnowext() bool {
  197. return c.features&amd3dnowext != 0
  198. }
  199. // MMX indicates support of MMX instructions
  200. func (c cpuInfo) mmx() bool {
  201. return c.features&mmx != 0
  202. }
  203. // MMXExt indicates support of MMXEXT instructions
  204. // (SSE integer functions or AMD MMX ext)
  205. func (c cpuInfo) mmxext() bool {
  206. return c.features&mmxext != 0
  207. }
  208. // SSE indicates support of SSE instructions
  209. func (c cpuInfo) sse() bool {
  210. return c.features&sse != 0
  211. }
  212. // SSE2 indicates support of SSE 2 instructions
  213. func (c cpuInfo) sse2() bool {
  214. return c.features&sse2 != 0
  215. }
  216. // SSE3 indicates support of SSE 3 instructions
  217. func (c cpuInfo) sse3() bool {
  218. return c.features&sse3 != 0
  219. }
  220. // SSSE3 indicates support of SSSE 3 instructions
  221. func (c cpuInfo) ssse3() bool {
  222. return c.features&ssse3 != 0
  223. }
  224. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  225. func (c cpuInfo) sse4() bool {
  226. return c.features&sse4 != 0
  227. }
  228. // SSE42 indicates support of SSE4.2 instructions
  229. func (c cpuInfo) sse42() bool {
  230. return c.features&sse42 != 0
  231. }
  232. // AVX indicates support of AVX instructions
  233. // and operating system support of AVX instructions
  234. func (c cpuInfo) avx() bool {
  235. return c.features&avx != 0
  236. }
  237. // AVX2 indicates support of AVX2 instructions
  238. func (c cpuInfo) avx2() bool {
  239. return c.features&avx2 != 0
  240. }
  241. // FMA3 indicates support of FMA3 instructions
  242. func (c cpuInfo) fma3() bool {
  243. return c.features&fma3 != 0
  244. }
  245. // FMA4 indicates support of FMA4 instructions
  246. func (c cpuInfo) fma4() bool {
  247. return c.features&fma4 != 0
  248. }
  249. // XOP indicates support of XOP instructions
  250. func (c cpuInfo) xop() bool {
  251. return c.features&xop != 0
  252. }
  253. // F16C indicates support of F16C instructions
  254. func (c cpuInfo) f16c() bool {
  255. return c.features&f16c != 0
  256. }
  257. // BMI1 indicates support of BMI1 instructions
  258. func (c cpuInfo) bmi1() bool {
  259. return c.features&bmi1 != 0
  260. }
  261. // BMI2 indicates support of BMI2 instructions
  262. func (c cpuInfo) bmi2() bool {
  263. return c.features&bmi2 != 0
  264. }
  265. // TBM indicates support of TBM instructions
  266. // (AMD Trailing Bit Manipulation)
  267. func (c cpuInfo) tbm() bool {
  268. return c.features&tbm != 0
  269. }
  270. // Lzcnt indicates support of LZCNT instruction
  271. func (c cpuInfo) lzcnt() bool {
  272. return c.features&lzcnt != 0
  273. }
  274. // Popcnt indicates support of POPCNT instruction
  275. func (c cpuInfo) popcnt() bool {
  276. return c.features&popcnt != 0
  277. }
  278. // HTT indicates the processor has Hyperthreading enabled
  279. func (c cpuInfo) htt() bool {
  280. return c.features&htt != 0
  281. }
  282. // SSE2Slow indicates that SSE2 may be slow on this processor
  283. func (c cpuInfo) sse2slow() bool {
  284. return c.features&sse2slow != 0
  285. }
  286. // SSE3Slow indicates that SSE3 may be slow on this processor
  287. func (c cpuInfo) sse3slow() bool {
  288. return c.features&sse3slow != 0
  289. }
  290. // AesNi indicates support of AES-NI instructions
  291. // (Advanced Encryption Standard New Instructions)
  292. func (c cpuInfo) aesni() bool {
  293. return c.features&aesni != 0
  294. }
  295. // Clmul indicates support of CLMUL instructions
  296. // (Carry-less Multiplication)
  297. func (c cpuInfo) clmul() bool {
  298. return c.features&clmul != 0
  299. }
  300. // NX indicates support of NX (No-Execute) bit
  301. func (c cpuInfo) nx() bool {
  302. return c.features&nx != 0
  303. }
  304. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  305. func (c cpuInfo) sse4a() bool {
  306. return c.features&sse4a != 0
  307. }
  308. // HLE indicates support of Hardware Lock Elision
  309. func (c cpuInfo) hle() bool {
  310. return c.features&hle != 0
  311. }
  312. // RTM indicates support of Restricted Transactional Memory
  313. func (c cpuInfo) rtm() bool {
  314. return c.features&rtm != 0
  315. }
  316. // Rdrand indicates support of RDRAND instruction is available
  317. func (c cpuInfo) rdrand() bool {
  318. return c.features&rdrand != 0
  319. }
  320. // Rdseed indicates support of RDSEED instruction is available
  321. func (c cpuInfo) rdseed() bool {
  322. return c.features&rdseed != 0
  323. }
  324. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  325. func (c cpuInfo) adx() bool {
  326. return c.features&adx != 0
  327. }
  328. // SHA indicates support of Intel SHA Extensions
  329. func (c cpuInfo) sha() bool {
  330. return c.features&sha != 0
  331. }
  332. // AVX512F indicates support of AVX-512 Foundation
  333. func (c cpuInfo) avx512f() bool {
  334. return c.features&avx512f != 0
  335. }
  336. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  337. func (c cpuInfo) avx512dq() bool {
  338. return c.features&avx512dq != 0
  339. }
  340. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  341. func (c cpuInfo) avx512ifma() bool {
  342. return c.features&avx512ifma != 0
  343. }
  344. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  345. func (c cpuInfo) avx512pf() bool {
  346. return c.features&avx512pf != 0
  347. }
  348. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  349. func (c cpuInfo) avx512er() bool {
  350. return c.features&avx512er != 0
  351. }
  352. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  353. func (c cpuInfo) avx512cd() bool {
  354. return c.features&avx512cd != 0
  355. }
  356. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  357. func (c cpuInfo) avx512bw() bool {
  358. return c.features&avx512bw != 0
  359. }
  360. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  361. func (c cpuInfo) avx512vl() bool {
  362. return c.features&avx512vl != 0
  363. }
  364. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  365. func (c cpuInfo) avx512vbmi() bool {
  366. return c.features&avx512vbmi != 0
  367. }
  368. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  369. func (c cpuInfo) mpx() bool {
  370. return c.features&mpx != 0
  371. }
  372. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  373. func (c cpuInfo) erms() bool {
  374. return c.features&erms != 0
  375. }
  376. // RDTSCP Instruction is available.
  377. func (c cpuInfo) rdtscp() bool {
  378. return c.features&rdtscp != 0
  379. }
  380. // CX16 indicates if CMPXCHG16B instruction is available.
  381. func (c cpuInfo) cx16() bool {
  382. return c.features&cx16 != 0
  383. }
  384. // TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
  385. // So TSX simply checks that.
  386. func (c cpuInfo) tsx() bool {
  387. return c.features&(mpx|rtm) == mpx|rtm
  388. }
  389. // Atom indicates an Atom processor
  390. func (c cpuInfo) atom() bool {
  391. return c.features&atom != 0
  392. }
  393. // Intel returns true if vendor is recognized as Intel
  394. func (c cpuInfo) intel() bool {
  395. return c.vendorid == intel
  396. }
  397. // AMD returns true if vendor is recognized as AMD
  398. func (c cpuInfo) amd() bool {
  399. return c.vendorid == amd
  400. }
  401. // Transmeta returns true if vendor is recognized as Transmeta
  402. func (c cpuInfo) transmeta() bool {
  403. return c.vendorid == transmeta
  404. }
  405. // NSC returns true if vendor is recognized as National Semiconductor
  406. func (c cpuInfo) nsc() bool {
  407. return c.vendorid == nsc
  408. }
  409. // VIA returns true if vendor is recognized as VIA
  410. func (c cpuInfo) via() bool {
  411. return c.vendorid == via
  412. }
  413. // RTCounter returns the 64-bit time-stamp counter
  414. // Uses the RDTSCP instruction. The value 0 is returned
  415. // if the CPU does not support the instruction.
  416. func (c cpuInfo) rtcounter() uint64 {
  417. if !c.rdtscp() {
  418. return 0
  419. }
  420. a, _, _, d := rdtscpAsm()
  421. return uint64(a) | (uint64(d) << 32)
  422. }
  423. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  424. // This variable is OS dependent, but on Linux contains information
  425. // about the current cpu/core the code is running on.
  426. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  427. func (c cpuInfo) ia32tscaux() uint32 {
  428. if !c.rdtscp() {
  429. return 0
  430. }
  431. _, _, ecx, _ := rdtscpAsm()
  432. return ecx
  433. }
  434. // LogicalCPU will return the Logical CPU the code is currently executing on.
  435. // This is likely to change when the OS re-schedules the running thread
  436. // to another CPU.
  437. // If the current core cannot be detected, -1 will be returned.
  438. func (c cpuInfo) logicalcpu() int {
  439. if c.maxFunc < 1 {
  440. return -1
  441. }
  442. _, ebx, _, _ := cpuid(1)
  443. return int(ebx >> 24)
  444. }
  445. // VM Will return true if the cpu id indicates we are in
  446. // a virtual machine. This is only a hint, and will very likely
  447. // have many false negatives.
  448. func (c cpuInfo) vm() bool {
  449. switch c.vendorid {
  450. case msvm, kvm, vmware, xenhvm:
  451. return true
  452. }
  453. return false
  454. }
  455. // Flags contains detected cpu features and caracteristics
  456. type flags uint64
  457. // String returns a string representation of the detected
  458. // CPU features.
  459. func (f flags) String() string {
  460. return strings.Join(f.strings(), ",")
  461. }
  462. // Strings returns and array of the detected features.
  463. func (f flags) strings() []string {
  464. s := support()
  465. r := make([]string, 0, 20)
  466. for i := uint(0); i < 64; i++ {
  467. key := flags(1 << i)
  468. val := flagNames[key]
  469. if s&key != 0 {
  470. r = append(r, val)
  471. }
  472. }
  473. return r
  474. }
  475. func maxExtendedFunction() uint32 {
  476. eax, _, _, _ := cpuid(0x80000000)
  477. return eax
  478. }
  479. func maxFunctionID() uint32 {
  480. a, _, _, _ := cpuid(0)
  481. return a
  482. }
  483. func brandName() string {
  484. if maxExtendedFunction() >= 0x80000004 {
  485. v := make([]uint32, 0, 48)
  486. for i := uint32(0); i < 3; i++ {
  487. a, b, c, d := cpuid(0x80000002 + i)
  488. v = append(v, a, b, c, d)
  489. }
  490. return strings.Trim(string(valAsString(v...)), " ")
  491. }
  492. return "unknown"
  493. }
  494. func threadsPerCore() int {
  495. mfi := maxFunctionID()
  496. if mfi < 0x4 || vendorID() != intel {
  497. return 1
  498. }
  499. if mfi < 0xb {
  500. _, b, _, d := cpuid(1)
  501. if (d & (1 << 28)) != 0 {
  502. // v will contain logical core count
  503. v := (b >> 16) & 255
  504. if v > 1 {
  505. a4, _, _, _ := cpuid(4)
  506. // physical cores
  507. v2 := (a4 >> 26) + 1
  508. if v2 > 0 {
  509. return int(v) / int(v2)
  510. }
  511. }
  512. }
  513. return 1
  514. }
  515. _, b, _, _ := cpuidex(0xb, 0)
  516. if b&0xffff == 0 {
  517. return 1
  518. }
  519. return int(b & 0xffff)
  520. }
  521. func logicalCores() int {
  522. mfi := maxFunctionID()
  523. switch vendorID() {
  524. case intel:
  525. // Use this on old Intel processors
  526. if mfi < 0xb {
  527. if mfi < 1 {
  528. return 0
  529. }
  530. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  531. // that can be assigned to logical processors in a physical package.
  532. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  533. _, ebx, _, _ := cpuid(1)
  534. logical := (ebx >> 16) & 0xff
  535. return int(logical)
  536. }
  537. _, b, _, _ := cpuidex(0xb, 1)
  538. return int(b & 0xffff)
  539. case amd:
  540. _, b, _, _ := cpuid(1)
  541. return int((b >> 16) & 0xff)
  542. default:
  543. return 0
  544. }
  545. }
  546. func familyModel() (int, int) {
  547. if maxFunctionID() < 0x1 {
  548. return 0, 0
  549. }
  550. eax, _, _, _ := cpuid(1)
  551. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  552. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  553. return int(family), int(model)
  554. }
  555. func physicalCores() int {
  556. switch vendorID() {
  557. case intel:
  558. return logicalCores() / threadsPerCore()
  559. case amd:
  560. if maxExtendedFunction() >= 0x80000008 {
  561. _, _, c, _ := cpuid(0x80000008)
  562. return int(c&0xff) + 1
  563. }
  564. }
  565. return 0
  566. }
  567. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  568. var vendorMapping = map[string]vendor{
  569. "AMDisbetter!": amd,
  570. "AuthenticAMD": amd,
  571. "CentaurHauls": via,
  572. "GenuineIntel": intel,
  573. "TransmetaCPU": transmeta,
  574. "GenuineTMx86": transmeta,
  575. "Geode by NSC": nsc,
  576. "VIA VIA VIA ": via,
  577. "KVMKVMKVMKVM": kvm,
  578. "Microsoft Hv": msvm,
  579. "VMwareVMware": vmware,
  580. "XenVMMXenVMM": xenhvm,
  581. }
  582. func vendorID() vendor {
  583. _, b, c, d := cpuid(0)
  584. v := valAsString(b, d, c)
  585. vend, ok := vendorMapping[string(v)]
  586. if !ok {
  587. return other
  588. }
  589. return vend
  590. }
  591. func cacheLine() int {
  592. if maxFunctionID() < 0x1 {
  593. return 0
  594. }
  595. _, ebx, _, _ := cpuid(1)
  596. cache := (ebx & 0xff00) >> 5 // cflush size
  597. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  598. _, _, ecx, _ := cpuid(0x80000006)
  599. cache = ecx & 0xff // cacheline size
  600. }
  601. // TODO: Read from Cache and TLB Information
  602. return int(cache)
  603. }
  604. func (c *cpuInfo) cacheSize() {
  605. c.cache.l1d = -1
  606. c.cache.l1i = -1
  607. c.cache.l2 = -1
  608. c.cache.l3 = -1
  609. vendor := vendorID()
  610. switch vendor {
  611. case intel:
  612. if maxFunctionID() < 4 {
  613. return
  614. }
  615. for i := uint32(0); ; i++ {
  616. eax, ebx, ecx, _ := cpuidex(4, i)
  617. cacheType := eax & 15
  618. if cacheType == 0 {
  619. break
  620. }
  621. cacheLevel := (eax >> 5) & 7
  622. coherency := int(ebx&0xfff) + 1
  623. partitions := int((ebx>>12)&0x3ff) + 1
  624. associativity := int((ebx>>22)&0x3ff) + 1
  625. sets := int(ecx) + 1
  626. size := associativity * partitions * coherency * sets
  627. switch cacheLevel {
  628. case 1:
  629. if cacheType == 1 {
  630. // 1 = Data Cache
  631. c.cache.l1d = size
  632. } else if cacheType == 2 {
  633. // 2 = Instruction Cache
  634. c.cache.l1i = size
  635. } else {
  636. if c.cache.l1d < 0 {
  637. c.cache.l1i = size
  638. }
  639. if c.cache.l1i < 0 {
  640. c.cache.l1i = size
  641. }
  642. }
  643. case 2:
  644. c.cache.l2 = size
  645. case 3:
  646. c.cache.l3 = size
  647. }
  648. }
  649. case amd:
  650. // Untested.
  651. if maxExtendedFunction() < 0x80000005 {
  652. return
  653. }
  654. _, _, ecx, edx := cpuid(0x80000005)
  655. c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
  656. c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
  657. if maxExtendedFunction() < 0x80000006 {
  658. return
  659. }
  660. _, _, ecx, _ = cpuid(0x80000006)
  661. c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  662. }
  663. return
  664. }
  665. type sgxsupport struct {
  666. available bool
  667. sgx1supported bool
  668. sgx2supported bool
  669. maxenclavesizenot64 int64
  670. maxenclavesize64 int64
  671. }
  672. func hasSGX(available bool) (rval sgxsupport) {
  673. rval.available = available
  674. if !available {
  675. return
  676. }
  677. a, _, _, d := cpuidex(0x12, 0)
  678. rval.sgx1supported = a&0x01 != 0
  679. rval.sgx2supported = a&0x02 != 0
  680. rval.maxenclavesizenot64 = 1 << (d & 0xFF) // pow 2
  681. rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  682. return
  683. }
  684. func support() flags {
  685. mfi := maxFunctionID()
  686. vend := vendorID()
  687. if mfi < 0x1 {
  688. return 0
  689. }
  690. rval := uint64(0)
  691. _, _, c, d := cpuid(1)
  692. if (d & (1 << 15)) != 0 {
  693. rval |= cmov
  694. }
  695. if (d & (1 << 23)) != 0 {
  696. rval |= mmx
  697. }
  698. if (d & (1 << 25)) != 0 {
  699. rval |= mmxext
  700. }
  701. if (d & (1 << 25)) != 0 {
  702. rval |= sse
  703. }
  704. if (d & (1 << 26)) != 0 {
  705. rval |= sse2
  706. }
  707. if (c & 1) != 0 {
  708. rval |= sse3
  709. }
  710. if (c & 0x00000200) != 0 {
  711. rval |= ssse3
  712. }
  713. if (c & 0x00080000) != 0 {
  714. rval |= sse4
  715. }
  716. if (c & 0x00100000) != 0 {
  717. rval |= sse42
  718. }
  719. if (c & (1 << 25)) != 0 {
  720. rval |= aesni
  721. }
  722. if (c & (1 << 1)) != 0 {
  723. rval |= clmul
  724. }
  725. if c&(1<<23) != 0 {
  726. rval |= popcnt
  727. }
  728. if c&(1<<30) != 0 {
  729. rval |= rdrand
  730. }
  731. if c&(1<<29) != 0 {
  732. rval |= f16c
  733. }
  734. if c&(1<<13) != 0 {
  735. rval |= cx16
  736. }
  737. if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
  738. if threadsPerCore() > 1 {
  739. rval |= htt
  740. }
  741. }
  742. // Check XGETBV, OXSAVE and AVX bits
  743. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  744. // Check for OS support
  745. eax, _ := xgetbv(0)
  746. if (eax & 0x6) == 0x6 {
  747. rval |= avx
  748. if (c & 0x00001000) != 0 {
  749. rval |= fma3
  750. }
  751. }
  752. }
  753. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  754. if mfi >= 7 {
  755. _, ebx, ecx, edx := cpuidex(7, 0)
  756. if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
  757. rval |= avx2
  758. }
  759. if (ebx & 0x00000008) != 0 {
  760. rval |= bmi1
  761. if (ebx & 0x00000100) != 0 {
  762. rval |= bmi2
  763. }
  764. }
  765. if ebx&(1<<2) != 0 {
  766. rval |= sgx
  767. }
  768. if ebx&(1<<4) != 0 {
  769. rval |= hle
  770. }
  771. if ebx&(1<<9) != 0 {
  772. rval |= erms
  773. }
  774. if ebx&(1<<11) != 0 {
  775. rval |= rtm
  776. }
  777. if ebx&(1<<14) != 0 {
  778. rval |= mpx
  779. }
  780. if ebx&(1<<18) != 0 {
  781. rval |= rdseed
  782. }
  783. if ebx&(1<<19) != 0 {
  784. rval |= adx
  785. }
  786. if ebx&(1<<29) != 0 {
  787. rval |= sha
  788. }
  789. if edx&(1<<26) != 0 {
  790. rval |= ibpb
  791. }
  792. if edx&(1<<27) != 0 {
  793. rval |= stibp
  794. }
  795. // Only detect AVX-512 features if XGETBV is supported
  796. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  797. // Check for OS support
  798. eax, _ := xgetbv(0)
  799. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  800. // ZMM16-ZMM31 state are enabled by OS)
  801. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  802. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  803. if ebx&(1<<16) != 0 {
  804. rval |= avx512f
  805. }
  806. if ebx&(1<<17) != 0 {
  807. rval |= avx512dq
  808. }
  809. if ebx&(1<<21) != 0 {
  810. rval |= avx512ifma
  811. }
  812. if ebx&(1<<26) != 0 {
  813. rval |= avx512pf
  814. }
  815. if ebx&(1<<27) != 0 {
  816. rval |= avx512er
  817. }
  818. if ebx&(1<<28) != 0 {
  819. rval |= avx512cd
  820. }
  821. if ebx&(1<<30) != 0 {
  822. rval |= avx512bw
  823. }
  824. if ebx&(1<<31) != 0 {
  825. rval |= avx512vl
  826. }
  827. // ecx
  828. if ecx&(1<<1) != 0 {
  829. rval |= avx512vbmi
  830. }
  831. }
  832. }
  833. }
  834. if maxExtendedFunction() >= 0x80000001 {
  835. _, _, c, d := cpuid(0x80000001)
  836. if (c & (1 << 5)) != 0 {
  837. rval |= lzcnt
  838. rval |= popcnt
  839. }
  840. if (d & (1 << 31)) != 0 {
  841. rval |= amd3dnow
  842. }
  843. if (d & (1 << 30)) != 0 {
  844. rval |= amd3dnowext
  845. }
  846. if (d & (1 << 23)) != 0 {
  847. rval |= mmx
  848. }
  849. if (d & (1 << 22)) != 0 {
  850. rval |= mmxext
  851. }
  852. if (c & (1 << 6)) != 0 {
  853. rval |= sse4a
  854. }
  855. if d&(1<<20) != 0 {
  856. rval |= nx
  857. }
  858. if d&(1<<27) != 0 {
  859. rval |= rdtscp
  860. }
  861. /* Allow for selectively disabling SSE2 functions on AMD processors
  862. with SSE2 support but not SSE4a. This includes Athlon64, some
  863. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  864. than SSE2 often enough to utilize this special-case flag.
  865. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  866. so that SSE2 is used unless explicitly disabled by checking
  867. AV_CPU_FLAG_SSE2SLOW. */
  868. if vendorID() != intel &&
  869. rval&sse2 != 0 && (c&0x00000040) == 0 {
  870. rval |= sse2slow
  871. }
  872. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  873. * used unless the OS has AVX support. */
  874. if (rval & avx) != 0 {
  875. if (c & 0x00000800) != 0 {
  876. rval |= xop
  877. }
  878. if (c & 0x00010000) != 0 {
  879. rval |= fma4
  880. }
  881. }
  882. if vendorID() == intel {
  883. family, model := familyModel()
  884. if family == 6 && (model == 9 || model == 13 || model == 14) {
  885. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  886. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  887. * usually slower than mmx. */
  888. if (rval & sse2) != 0 {
  889. rval |= sse2slow
  890. }
  891. if (rval & sse3) != 0 {
  892. rval |= sse3slow
  893. }
  894. }
  895. /* The Atom processor has SSSE3 support, which is useful in many cases,
  896. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  897. * on the Atom, but is generally faster on other processors supporting
  898. * SSSE3. This flag allows for selectively disabling certain SSSE3
  899. * functions on the Atom. */
  900. if family == 6 && model == 28 {
  901. rval |= atom
  902. }
  903. }
  904. }
  905. return flags(rval)
  906. }
  907. func valAsString(values ...uint32) []byte {
  908. r := make([]byte, 4*len(values))
  909. for i, v := range values {
  910. dst := r[i*4:]
  911. dst[0] = byte(v & 0xff)
  912. dst[1] = byte((v >> 8) & 0xff)
  913. dst[2] = byte((v >> 16) & 0xff)
  914. dst[3] = byte((v >> 24) & 0xff)
  915. switch {
  916. case dst[0] == 0:
  917. return r[:i*4]
  918. case dst[1] == 0:
  919. return r[:i*4+1]
  920. case dst[2] == 0:
  921. return r[:i*4+2]
  922. case dst[3] == 0:
  923. return r[:i*4+3]
  924. }
  925. }
  926. return r
  927. }