Files
Android11/external/XNNPACK/third_party/cpuinfo.patch
2023-10-13 14:01:41 +00:00

1476 lines
53 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

diff --git CMakeLists.txt CMakeLists.txt
index e594def..cab4d05 100644
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -119,7 +119,8 @@ ENDIF()
# ---[ cpuinfo library
SET(CPUINFO_SRCS
src/init.c
- src/api.c)
+ src/api.c
+ src/cache.c)
IF(CPUINFO_SUPPORTED_PLATFORM)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
diff --git LICENSE LICENSE
index 4910bfe..3f9a4f0 100644
--- LICENSE
+++ LICENSE
@@ -1,3 +1,4 @@
+Copyright (c) 2019 Google LLC
Copyright (c) 2017-2018 Facebook Inc.
Copyright (C) 2012-2017 Georgia Institute of Technology
Copyright (C) 2010-2012 Marat Dukhan
diff --git include/cpuinfo.h include/cpuinfo.h
index 7d5833f..9938d2b 100644
--- include/cpuinfo.h
+++ include/cpuinfo.h
@@ -38,10 +38,18 @@
#define CPUINFO_ARCH_PNACL 1
#endif
-#if defined(EMSCRIPTEN)
+#if defined(__asmjs__)
#define CPUINFO_ARCH_ASMJS 1
#endif
+#if defined(__wasm__)
+ #if defined(__wasm_simd128__)
+ #define CPUINFO_ARCH_WASMSIMD 1
+ #else
+ #define CPUINFO_ARCH_WASM 1
+ #endif
+#endif
+
#if CPUINFO_ARCH_X86 && defined(_MSC_VER)
#define CPUINFO_ABI __cdecl
#elif CPUINFO_ARCH_X86 && defined(__GNUC__)
@@ -80,6 +88,14 @@
#define CPUINFO_ARCH_ASMJS 0
#endif
+#ifndef CPUINFO_ARCH_WASM
+ #define CPUINFO_ARCH_WASM 0
+#endif
+
+#ifndef CPUINFO_ARCH_WASMSIMD
+ #define CPUINFO_ARCH_WASMSIMD 0
+#endif
+
#define CPUINFO_CACHE_UNIFIED 0x00000001
#define CPUINFO_CACHE_INCLUSIVE 0x00000002
#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004
@@ -278,10 +294,14 @@ enum cpuinfo_uarch {
cpuinfo_uarch_haswell = 0x00100208,
/** Intel Broadwell microarchitecture. */
cpuinfo_uarch_broadwell = 0x00100209,
- /** Intel Sky Lake microarchitecture. */
+ /** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */
cpuinfo_uarch_sky_lake = 0x0010020A,
- /** Intel Kaby Lake microarchitecture. */
- cpuinfo_uarch_kaby_lake = 0x0010020B,
+ /** DEPRECATED (Intel Kaby Lake microarchitecture). */
+ cpuinfo_uarch_kaby_lake = 0x0010020A,
+ /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */
+ cpuinfo_uarch_palm_cove = 0x0010020B,
+ /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */
+ cpuinfo_uarch_sunny_cove = 0x0010020C,
/** Pentium 4 with Willamette, Northwood, or Foster cores. */
cpuinfo_uarch_willamette = 0x00100300,
@@ -289,13 +309,17 @@ enum cpuinfo_uarch {
cpuinfo_uarch_prescott = 0x00100301,
/** Intel Atom on 45 nm process. */
- cpuinfo_uarch_bonnell = 0x00100400,
+ cpuinfo_uarch_bonnell = 0x00100400,
/** Intel Atom on 32 nm process. */
- cpuinfo_uarch_saltwell = 0x00100401,
+ cpuinfo_uarch_saltwell = 0x00100401,
/** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */
- cpuinfo_uarch_silvermont = 0x00100402,
+ cpuinfo_uarch_silvermont = 0x00100402,
/** Intel Airmont microarchitecture (14 nm out-of-order Atom). */
- cpuinfo_uarch_airmont = 0x00100403,
+ cpuinfo_uarch_airmont = 0x00100403,
+ /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */
+ cpuinfo_uarch_goldmont = 0x00100404,
+ /** Intel Goldmont Plus microarchitecture (Gemini Lake). */
+ cpuinfo_uarch_goldmont_plus = 0x00100405,
/** Intel Knights Ferry HPC boards. */
cpuinfo_uarch_knights_ferry = 0x00100500,
@@ -335,8 +359,10 @@ enum cpuinfo_uarch {
cpuinfo_uarch_steamroller = 0x00200107,
/** AMD Excavator microarchitecture (Carizzo APUs). */
cpuinfo_uarch_excavator = 0x00200108,
- /** AMD Zen microarchitecture (Ryzen CPUs). */
+ /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */
cpuinfo_uarch_zen = 0x00200109,
+ /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */
+ cpuinfo_uarch_zen2 = 0x0020010A,
/** NSC Geode and AMD Geode GX and LX. */
cpuinfo_uarch_geode = 0x00200200,
@@ -370,23 +396,34 @@ enum cpuinfo_uarch {
cpuinfo_uarch_cortex_a17 = 0x00300217,
/** ARM Cortex-A32. */
- cpuinfo_uarch_cortex_a32 = 0x00300332,
+ cpuinfo_uarch_cortex_a32 = 0x00300332,
/** ARM Cortex-A35. */
- cpuinfo_uarch_cortex_a35 = 0x00300335,
+ cpuinfo_uarch_cortex_a35 = 0x00300335,
/** ARM Cortex-A53. */
- cpuinfo_uarch_cortex_a53 = 0x00300353,
+ cpuinfo_uarch_cortex_a53 = 0x00300353,
/** ARM Cortex-A55. */
- cpuinfo_uarch_cortex_a55 = 0x00300355,
+ cpuinfo_uarch_cortex_a55 = 0x00300355,
/** ARM Cortex-A57. */
- cpuinfo_uarch_cortex_a57 = 0x00300357,
+ cpuinfo_uarch_cortex_a57 = 0x00300357,
+ /** ARM Cortex-A65. */
+ cpuinfo_uarch_cortex_a65 = 0x00300365,
/** ARM Cortex-A72. */
- cpuinfo_uarch_cortex_a72 = 0x00300372,
+ cpuinfo_uarch_cortex_a72 = 0x00300372,
/** ARM Cortex-A73. */
- cpuinfo_uarch_cortex_a73 = 0x00300373,
+ cpuinfo_uarch_cortex_a73 = 0x00300373,
/** ARM Cortex-A75. */
- cpuinfo_uarch_cortex_a75 = 0x00300375,
+ cpuinfo_uarch_cortex_a75 = 0x00300375,
/** ARM Cortex-A76. */
- cpuinfo_uarch_cortex_a76 = 0x00300376,
+ cpuinfo_uarch_cortex_a76 = 0x00300376,
+ /** ARM Cortex-A76AE. */
+ cpuinfo_uarch_cortex_a76ae = 0x00300378,
+ /** ARM Cortex-A77. */
+ cpuinfo_uarch_cortex_a77 = 0x00300377,
+
+ /** ARM Neoverse N1. */
+ cpuinfo_uarch_neoverse_n1 = 0x00300400,
+ /** ARM Neoverse E1. */
+ cpuinfo_uarch_neoverse_e1 = 0x00300401,
/** Qualcomm Scorpion. */
cpuinfo_uarch_scorpion = 0x00400100,
@@ -406,12 +443,22 @@ enum cpuinfo_uarch {
/** Nvidia Carmel. */
cpuinfo_uarch_carmel = 0x00500102,
- /** Samsung Mongoose M1 (Exynos 8890 big cores). */
+ /** Samsung Exynos M1 (Exynos 8890 big cores). */
+ cpuinfo_uarch_exynos_m1 = 0x00600100,
+ /** Samsung Exynos M2 (Exynos 8895 big cores). */
+ cpuinfo_uarch_exynos_m2 = 0x00600101,
+ /** Samsung Exynos M3 (Exynos 9810 big cores). */
+ cpuinfo_uarch_exynos_m3 = 0x00600102,
+ /** Samsung Exynos M4 (Exynos 9820 big cores). */
+ cpuinfo_uarch_exynos_m4 = 0x00600103,
+ /** Samsung Exynos M5 (Exynos 9830 big cores). */
+ cpuinfo_uarch_exynos_m5 = 0x00600104,
+
+ /* Old names for Exynos. */
cpuinfo_uarch_mongoose_m1 = 0x00600100,
- /** Samsung Mongoose M2 (Exynos 8895 big cores). */
cpuinfo_uarch_mongoose_m2 = 0x00600101,
- /** Samsung Meerkat M3 (Exynos 9810 big cores). */
cpuinfo_uarch_meerkat_m3 = 0x00600102,
+ cpuinfo_uarch_meerkat_m4 = 0x00600103,
/** Apple A6 and A6X processors. */
cpuinfo_uarch_swift = 0x00700100,
@@ -640,6 +687,8 @@ void CPUINFO_ABI cpuinfo_deinitialize(void);
bool avx512bitalg;
bool avx512vpopcntdq;
bool avx512vnni;
+ bool avx512bf16;
+ bool avx512vp2intersect;
bool avx512_4vnniw;
bool avx512_4fmaps;
bool hle;
@@ -1110,6 +1159,22 @@ static inline bool cpuinfo_has_x86_avx512vnni(void) {
#endif
}
+static inline bool cpuinfo_has_x86_avx512bf16(void) {
+ #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ return cpuinfo_isa.avx512bf16;
+ #else
+ return false;
+ #endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vp2intersect(void) {
+ #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ return cpuinfo_isa.avx512vp2intersect;
+ #else
+ return false;
+ #endif
+}
+
static inline bool cpuinfo_has_x86_avx512_4vnniw(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.avx512_4vnniw;
@@ -1682,6 +1747,11 @@ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void);
uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
+/**
+ * Returns upper bound on cache size.
+ */
+uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
+
const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void);
const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
diff --git src/api.c src/api.c
index 98b5805..83744f5 100644
--- src/api.c
+++ src/api.c
@@ -18,6 +18,7 @@ uint32_t cpuinfo_cores_count = 0;
uint32_t cpuinfo_clusters_count = 0;
uint32_t cpuinfo_packages_count = 0;
uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
+uint32_t cpuinfo_max_cache_size = 0;
const struct cpuinfo_processor* cpuinfo_get_processors(void) {
diff --git src/arm/api.h src/arm/api.h
index 11e588b..69274bc 100644
--- src/arm/api.h
+++ src/arm/api.h
@@ -104,6 +104,9 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
struct cpuinfo_cache l1d[restrict static 1],
struct cpuinfo_cache l2[restrict static 1],
struct cpuinfo_cache l3[restrict static 1]);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
+ const struct cpuinfo_processor processor[restrict static 1]);
#else /* defined(__cplusplus) */
CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
diff --git src/arm/cache.c src/arm/cache.c
index 5ada7d9..ccadeb4 100644
--- src/arm/cache.c
+++ src/arm/cache.c
@@ -1,10 +1,12 @@
#include <stdint.h>
#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
#include <arm/api.h>
#include <arm/midr.h>
+
void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
uint32_t cluster_cores,
@@ -109,7 +111,7 @@ void cpuinfo_arm_decode_cache(
* memory accesses and has been optimized for use with the Cortex-A5 processor.
* 8.1.7. Exclusive L2 cache
* The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
- * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
+ * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
*
* +--------------------+-----------+-----------+----------+-----------+
* | Processor model | L1D cache | L1I cache | L2 cache | Reference |
@@ -698,7 +700,7 @@ void cpuinfo_arm_decode_cache(
* [3] https://en.wikichip.org/wiki/hisilicon/kirin/980
*/
if (midr_is_qualcomm_cortex_a55_silver(midr)) {
- /* Qualcomm-modified Cortex-A55 in Snapdragon 710 / 845 */
+ /* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */
uint32_t l3_size = 1024 * 1024;
switch (chipset->series) {
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
@@ -827,6 +829,62 @@ void cpuinfo_arm_decode_cache(
.flags = CPUINFO_CACHE_INCLUSIVE
};
break;
+ case cpuinfo_uarch_cortex_a65:
+ {
+ /*
+ * ARM CortexA65 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system enhances the performance and power efficiency in the CortexA65 core.
+ * It consists of separate instruction and data caches. You can configure instruction and data caches
+ * independently during implementation to sizes of 32KB or 64KB.
+ *
+ * L1 instruction-side memory system
+ * The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
+ * - 64-byte instruction side cache line length.
+ * - 4-way set associative L1 instruction cache.
+ *
+ * L1 data-side memory system
+ * - 64-byte data side cache line length.
+ * - 4-way set associative L1 data cache.
+ *
+ * A7.1 About the L2 memory system
+ * The CortexA65 L2 memory system is required to interface the CortexA65 cores to the L3 memory system.
+ * The L2 memory subsystem consists of:
+ * - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB.
+ * Cache lines have a fixed length of 64 bytes.
+ *
+ * The main features of the L2 memory system are:
+ * - Strictly exclusive with L1 data cache.
+ * - Pseudo-inclusive with L1 instruction cache.
+ * - Private per-core unified L2 cache.
+ */
+ const uint32_t l1_size = 32 * 1024;
+ const uint32_t l2_size = 128 * 1024;
+ const uint32_t l3_size = 512 * 1024;
+ *l1i = (struct cpuinfo_cache) {
+ .size = l1_size,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = l1_size,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = l2_size,
+ .associativity = 4,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = l3_size,
+ /* DynamIQ */
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
case cpuinfo_uarch_cortex_a72:
{
/*
@@ -1047,6 +1105,7 @@ void cpuinfo_arm_decode_cache(
break;
}
case cpuinfo_uarch_cortex_a76:
+ case cpuinfo_uarch_cortex_a76ae:
{
/*
* ARM Cortex-A76 Core Technical Reference Manual
@@ -1119,6 +1178,57 @@ void cpuinfo_arm_decode_cache(
};
break;
}
+ case cpuinfo_uarch_cortex_a77:
+ {
+ /*
+ * ARM Cortex-A77 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
+ *
+ * A6.1.1 L1 instruction-side memory system
+ * The L1 instruction memory system has the following key features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ *
+ * A6.1.2 L1 data-side memory system
+ * The L1 data memory system has the following features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ * - Pseudo-LRU cache replacement policy.
+ *
+ * A7.1 About the L2 memory system
+ * The L2 memory subsystem consist of:
+ * - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines
+ * have a fixed length of 64 bytes.
+ * - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
+ */
+ const uint32_t l2_size = 256 * 1024;
+ const uint32_t l3_size = 1024 * 1024;
+ *l1i = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = l2_size,
+ .associativity = 8,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE,
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = l3_size,
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
case cpuinfo_uarch_scorpion:
/*
@@ -1248,8 +1358,8 @@ void cpuinfo_arm_decode_cache(
.line_size = 64
};
break;
- case cpuinfo_uarch_mongoose_m1:
- case cpuinfo_uarch_mongoose_m2:
+ case cpuinfo_uarch_exynos_m1:
+ case cpuinfo_uarch_exynos_m2:
/*
* - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
* namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
@@ -1283,7 +1393,7 @@ void cpuinfo_arm_decode_cache(
.line_size = 64
};
break;
- case cpuinfo_uarch_meerkat_m3:
+ case cpuinfo_uarch_exynos_m3:
/*
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
@@ -1294,19 +1404,19 @@ void cpuinfo_arm_decode_cache(
* [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
*/
*l1i = (struct cpuinfo_cache) {
- .size = 64 * 1024 /* assume same as in Mongoose cores */,
- .associativity = 4 /* assume same as in Mongoose cores */,
- .line_size = 128 /* assume same as in Mongoose cores */
+ .size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */,
+ .associativity = 4 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 128 /* assume same as in Exynos M1/M2 cores */
};
*l1d = (struct cpuinfo_cache) {
.size = 64 * 1024,
- .associativity = 8 /* assume same as in Mongoose cores */,
- .line_size = 64 /* assume same as in Mongoose cores */,
+ .associativity = 8 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 64 /* assume same as in Exynos M1/M2 cores */,
};
*l2 = (struct cpuinfo_cache) {
.size = 512 * 1024,
- .associativity = 16 /* assume same as in Mongoose cores */,
- .line_size = 64 /* assume same as in Mongoose cores */,
+ .associativity = 16 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 64 /* assume same as in Exynos M1/M2 cores */,
};
*l3 = (struct cpuinfo_cache) {
.size = 4 * 1024 * 1024,
@@ -1393,3 +1503,124 @@ void cpuinfo_arm_decode_cache(
}
}
}
+
+uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) {
+ /*
+ * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo
+ * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum.
+ */
+ switch (processor->core->uarch) {
+ case cpuinfo_uarch_xscale:
+ case cpuinfo_uarch_arm11:
+ case cpuinfo_uarch_scorpion:
+ case cpuinfo_uarch_krait:
+ case cpuinfo_uarch_kryo:
+ case cpuinfo_uarch_exynos_m1:
+ case cpuinfo_uarch_exynos_m2:
+ case cpuinfo_uarch_exynos_m3:
+ /* cpuinfo-detected cache size always correct */
+ return cpuinfo_compute_max_cache_size(processor);
+ case cpuinfo_uarch_cortex_a5:
+ /* Max observed (NXP Vybrid SoC) */
+ return 512 * 1024;
+ case cpuinfo_uarch_cortex_a7:
+ /*
+ * Cortex-A7 MPCore Technical Reference Manual:
+ * 7.1. About the L2 Memory system
+ * The L2 memory system consists of an:
+ * - Optional tightly-coupled L2 cache that includes:
+ * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a8:
+ /*
+ * Cortex-A8 Technical Reference Manual:
+ * 8.1. About the L2 memory system
+ * The key features of the L2 memory system include:
+ * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a9:
+ /* Max observed (e.g. Exynos 4212) */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a12:
+ case cpuinfo_uarch_cortex_a17:
+ /*
+ * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 Memory system
+ * The key features of the L2 memory system include:
+ * - An integrated L2 cache:
+ * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
+ */
+ return 8 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a15:
+ /*
+ * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
+ */
+ return 4 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a35:
+ /*
+ * ARM CortexA35 Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * L2 cache
+ * - Further features of the L2 cache are:
+ * - Configurable size of 128KB, 256KB, 512KB, and 1MB.
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a53:
+ /*
+ * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 memory system
+ * The L2 memory system consists of an:
+ * - Optional tightly-coupled L2 cache that includes:
+ * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
+ */
+ return 2 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a57:
+ /*
+ * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, and 2MB.
+ */
+ return 2 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a72:
+ /*
+ * ARM Cortex-A72 MPCore Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
+ */
+ return 4 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a73:
+ /*
+ * ARM CortexA73 MPCore Processor Technical Reference Manual
+ * 7.1 About the L2 memory system
+ * The L2 memory system consists of:
+ * - A tightly-integrated L2 cache with:
+ * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
+ */
+ return 8 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a55:
+ case cpuinfo_uarch_cortex_a75:
+ case cpuinfo_uarch_cortex_a76:
+ case cpuinfo_uarch_exynos_m4:
+ default:
+ /*
+ * ARM DynamIQ Shared Unit Technical Reference Manual
+ * 1.3 Implementation options
+ * L3_CACHE_SIZE
+ * - 256KB
+ * - 512KB
+ * - 1024KB
+ * - 1536KB
+ * - 2048KB
+ * - 3072KB
+ * - 4096KB
+ */
+ return 4 * 1024 * 1024;
+ }
+}
diff --git src/arm/linux/init.c src/arm/linux/init.c
index a297f63..f0c432c 100644
--- src/arm/linux/init.c
+++ src/arm/linux/init.c
@@ -678,6 +678,8 @@ void cpuinfo_arm_linux_init(void) {
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+ cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/arm/linux/midr.c src/arm/linux/midr.c
index 668fc72..2c3116b 100644
--- src/arm/linux/midr.c
+++ src/arm/linux/midr.c
@@ -220,7 +220,7 @@ static const struct cluster_config cluster_configs[] = {
.model = UINT16_C(7420),
.clusters = 2,
.cluster_cores = {
- [0] = 4,
+ [0] = 4,
[1] = 4,
},
.cluster_midr = {
@@ -229,7 +229,7 @@ static const struct cluster_config cluster_configs[] = {
},
},
{
- /* Exynos 8890: 4x Mongoose + 4x Cortex-A53 */
+ /* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(8890),
@@ -695,7 +695,7 @@ static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (processors[i].package_leader_id == i) {
if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
- midr = processors[i].midr;
+ midr = processors[i].midr;
} else {
cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
/* To be consistent, we copy the MIDR entirely, rather than by parts */
@@ -836,7 +836,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
* - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*/
-
+
if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
{
diff --git src/arm/mach/init.c src/arm/mach/init.c
index 5b14b49..e64cc18 100644
--- src/arm/mach/init.c
+++ src/arm/mach/init.c
@@ -562,6 +562,8 @@ void cpuinfo_arm_mach_init(void) {
cpuinfo_clusters_count = num_clusters;
cpuinfo_packages_count = mach_topology.packages;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/arm/midr.h src/arm/midr.h
index 6363ed7..d5a28e3 100644
--- src/arm/midr.h
+++ src/arm/midr.h
@@ -33,31 +33,31 @@
#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050)
#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
-#define CPUINFO_ARM_MIDR_MONGOOSE UINT32_C(0x530F0010)
+#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010)
#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030)
inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
- return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
- return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
}
inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
- return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
}
inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
- return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
}
inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
- return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
}
@@ -171,13 +171,20 @@ inline static bool midr_is_kryo_gold(uint32_t midr) {
inline static uint32_t midr_score_core(uint32_t midr) {
const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
+ case UINT32_C(0x53000040): /* Exynos M5 */
+ case UINT32_C(0x53000030): /* Exynos M4 */
+ /* These cores are in big role w.r.t Cortex-A75 or Cortex-A76 */
+ return 6;
case UINT32_C(0x4E000030): /* Denver 2 */
- case UINT32_C(0x53000010): /* Mongoose */
- case UINT32_C(0x53000020): /* Meerkat */
+ case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
+ case UINT32_C(0x53000020): /* Exynos M3 */
+ case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
case UINT32_C(0x51008020): /* Kryo 385 Gold */
case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
case UINT32_C(0x51002050): /* Kryo Gold */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+ case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0A0): /* Cortex-A75 */
case UINT32_C(0x4100D090): /* Cortex-A73 */
@@ -191,12 +198,14 @@ inline static uint32_t midr_score_core(uint32_t midr) {
case UINT32_C(0x4100D070): /* Cortex-A57 */
/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
return 4;
+ case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D030): /* Cortex-A53 */
/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
return 2;
case UINT32_C(0x4100D040): /* Cortex-A35 */
case UINT32_C(0x4100C070): /* Cortex-A7 */
+ case UINT32_C(0x51008050): /* Kryo 485 Silver */
case UINT32_C(0x51008030): /* Kryo 385 Silver */
case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
@@ -215,7 +224,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
}
inline static uint32_t midr_little_core_for_big(uint32_t midr) {
- const uint32_t core_mask =
+ const uint32_t core_mask =
CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
case CPUINFO_ARM_MIDR_CORTEX_A75:
@@ -223,7 +232,7 @@ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
case CPUINFO_ARM_MIDR_CORTEX_A73:
case CPUINFO_ARM_MIDR_CORTEX_A72:
case CPUINFO_ARM_MIDR_CORTEX_A57:
- case CPUINFO_ARM_MIDR_MONGOOSE:
+ case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
return CPUINFO_ARM_MIDR_CORTEX_A53;
case CPUINFO_ARM_MIDR_CORTEX_A17:
case CPUINFO_ARM_MIDR_CORTEX_A15:
diff --git src/arm/uarch.c src/arm/uarch.c
index d7d2c63..a38250a 100644
--- src/arm/uarch.c
+++ src/arm/uarch.c
@@ -60,6 +60,9 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD05:
*uarch = cpuinfo_uarch_cortex_a55;
break;
+ case 0xD06:
+ *uarch = cpuinfo_uarch_cortex_a65;
+ break;
case 0xD07:
*uarch = cpuinfo_uarch_cortex_a57;
break;
@@ -75,6 +78,22 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD0B:
*uarch = cpuinfo_uarch_cortex_a76;
break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD0C:
+ *uarch = cpuinfo_uarch_neoverse_n1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+ case 0xD0D:
+ *uarch = cpuinfo_uarch_cortex_a77;
+ break;
+ case 0xD0E:
+ *uarch = cpuinfo_uarch_cortex_a76ae;
+ break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD4A:
+ *uarch = cpuinfo_uarch_neoverse_e1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
default:
switch (midr_get_part(midr) >> 8) {
#if CPUINFO_ARCH_ARM
@@ -242,10 +261,14 @@ void cpuinfo_arm_decode_vendor_uarch(
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a55;
break;
- case 0x804:
+ case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
break;
+ case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
+ *vendor = cpuinfo_vendor_arm;
+ *uarch = cpuinfo_uarch_cortex_a55;
+ break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xC00:
*uarch = cpuinfo_uarch_falkor;
@@ -263,27 +286,43 @@ void cpuinfo_arm_decode_vendor_uarch(
switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case 0x00100010:
/*
- * Exynos 8890 MIDR = 0x531F0011, assume Mongoose M1 has:
+ * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
* - CPU variant 0x1
* - CPU part 0x001
*/
- *uarch = cpuinfo_uarch_mongoose_m1;
+ *uarch = cpuinfo_uarch_exynos_m1;
break;
case 0x00400010:
/*
- * Exynos 8895 MIDR = 0x534F0010, assume Mongoose M2 has:
+ * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
* - CPU variant 0x4
* - CPU part 0x001
*/
- *uarch = cpuinfo_uarch_mongoose_m2;
+ *uarch = cpuinfo_uarch_exynos_m2;
break;
case 0x00100020:
/*
- * Exynos 9810 MIDR = 0x531F0020, assume Meerkat M3 has:
+ * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
* - CPU variant 0x1
* - CPU part 0x002
*/
- *uarch = cpuinfo_uarch_meerkat_m3;
+ *uarch = cpuinfo_uarch_exynos_m3;
+ break;
+ case 0x00100030:
+ /*
+ * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
+ * - CPU variant 0x1
+ * - CPU part 0x003
+ */
+ *uarch = cpuinfo_uarch_exynos_m4;
+ break;
+ case 0x00100040:
+ /*
+ * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
+ * - CPU variant 0x1
+ * - CPU part 0x004
+ */
+ *uarch = cpuinfo_uarch_exynos_m5;
break;
default:
cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
diff --git src/cache.c src/cache.c
new file mode 100644
index 0000000..b976b87
--- /dev/null
+++ src/cache.c
@@ -0,0 +1,18 @@
+#include <stddef.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+
+
+uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) {
+ if (processor->cache.l4 != NULL) {
+ return processor->cache.l4->size;
+ } else if (processor->cache.l3 != NULL) {
+ return processor->cache.l3->size;
+ } else if (processor->cache.l2 != NULL) {
+ return processor->cache.l2->size;
+ } else if (processor->cache.l1d != NULL) {
+ return processor->cache.l1d->size;
+ }
+ return 0;
+}
diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h
index 6045750..717b810 100644
--- src/cpuinfo/internal-api.h
+++ src/cpuinfo/internal-api.h
@@ -31,6 +31,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
+extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
@@ -40,4 +41,6 @@ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
+CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor);
+
typedef void (*cpuinfo_processor_callback)(uint32_t);
diff --git src/x86/isa.c src/x86/isa.c
index bca1ecd..d27dbca 100644
--- src/x86/isa.c
+++ src/x86/isa.c
@@ -42,8 +42,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
{
struct cpuinfo_x86_isa isa = { 0 };
- const struct cpuid_regs structured_feature_info =
+ const struct cpuid_regs structured_feature_info0 =
(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0};
+ const struct cpuid_regs structured_feature_info1 =
+ (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0};
const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
const struct cpuid_regs processor_capacity_info =
@@ -144,9 +146,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* CLFLUSHOPT instruction:
- * - Intel: ebx[bit 23] in structured feature info.
+ * - Intel: ebx[bit 23] in structured feature info (ecx = 0).
*/
- isa.clflushopt = !!(structured_feature_info.ebx & UINT32_C(0x00800000));
+ isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
/*
* MWAIT/MONITOR instructions:
@@ -273,9 +275,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* PREFETCHWT1 instruction:
- * - Intel: ecx[bit 0] of structured feature info. Reserved bit on AMD.
+ * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD.
*/
- isa.prefetchwt1 = !!(structured_feature_info.ecx & UINT32_C(0x00000001));
+ isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
#if CPUINFO_ARCH_X86
/*
@@ -386,111 +388,123 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* AVX2 instructions:
- * - Intel: ebx[bit 5] in structured feature info.
+ * - Intel: ebx[bit 5] in structured feature info (ecx = 0).
*/
- isa.avx2 = avx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00000020));
+ isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
/*
* AVX512F instructions:
- * - Intel: ebx[bit 16] in structured feature info.
+ * - Intel: ebx[bit 16] in structured feature info (ecx = 0).
*/
- isa.avx512f = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00010000));
+ isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
/*
* AVX512PF instructions:
- * - Intel: ebx[bit 26] in structured feature info.
+ * - Intel: ebx[bit 26] in structured feature info (ecx = 0).
*/
- isa.avx512pf = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x04000000));
+ isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
/*
* AVX512ER instructions:
- * - Intel: ebx[bit 27] in structured feature info.
+ * - Intel: ebx[bit 27] in structured feature info (ecx = 0).
*/
- isa.avx512er = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x08000000));
+ isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
/*
* AVX512CD instructions:
- * - Intel: ebx[bit 28] in structured feature info.
+ * - Intel: ebx[bit 28] in structured feature info (ecx = 0).
*/
- isa.avx512cd = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x10000000));
+ isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
/*
* AVX512DQ instructions:
- * - Intel: ebx[bit 17] in structured feature info.
+ * - Intel: ebx[bit 17] in structured feature info (ecx = 0).
*/
- isa.avx512dq = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00020000));
+ isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
/*
* AVX512BW instructions:
- * - Intel: ebx[bit 30] in structured feature info.
+ * - Intel: ebx[bit 30] in structured feature info (ecx = 0).
*/
- isa.avx512bw = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x40000000));
+ isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
/*
* AVX512VL instructions:
- * - Intel: ebx[bit 31] in structured feature info.
+ * - Intel: ebx[bit 31] in structured feature info (ecx = 0).
*/
- isa.avx512vl = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x80000000));
+ isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
/*
* AVX512IFMA instructions:
- * - Intel: ebx[bit 21] in structured feature info.
+ * - Intel: ebx[bit 21] in structured feature info (ecx = 0).
*/
- isa.avx512ifma = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00200000));
+ isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
/*
* AVX512VBMI instructions:
- * - Intel: ecx[bit 1] in structured feature info.
+ * - Intel: ecx[bit 1] in structured feature info (ecx = 0).
*/
- isa.avx512vbmi = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000002));
+ isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
/*
* AVX512VBMI2 instructions:
- * - Intel: ecx[bit 6] in structured feature info.
+ * - Intel: ecx[bit 6] in structured feature info (ecx = 0).
*/
- isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000040));
+ isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
/*
* AVX512BITALG instructions:
- * - Intel: ecx[bit 12] in structured feature info.
+ * - Intel: ecx[bit 12] in structured feature info (ecx = 0).
*/
- isa.avx512bitalg = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00001000));
+ isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
/*
* AVX512VPOPCNTDQ instructions:
- * - Intel: ecx[bit 14] in structured feature info.
+ * - Intel: ecx[bit 14] in structured feature info (ecx = 0).
*/
- isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00004000));
+ isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
/*
* AVX512VNNI instructions:
- * - Intel: ecx[bit 11] in structured feature info.
+ * - Intel: ecx[bit 11] in structured feature info (ecx = 0).
*/
- isa.avx512vnni = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000800));
+ isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
/*
* AVX512_4VNNIW instructions:
- * - Intel: edx[bit 2] in structured feature info.
+ * - Intel: edx[bit 2] in structured feature info (ecx = 0).
*/
- isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000004));
+ isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
/*
* AVX512_4FMAPS instructions:
- * - Intel: edx[bit 3] in structured feature info.
+ * - Intel: edx[bit 3] in structured feature info (ecx = 0).
*/
- isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000008));
+ isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
+
+ /*
+ * AVX512_VP2INTERSECT instructions:
+ * - Intel: edx[bit 8] in structured feature info (ecx = 0).
+ */
+ isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
+
+ /*
+ * AVX512_BF16 instructions:
+ * - Intel: eax[bit 5] in structured feature info (ecx = 1).
+ */
+ isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
/*
* HLE instructions:
- * - Intel: ebx[bit 4] in structured feature info.
+ * - Intel: ebx[bit 4] in structured feature info (ecx = 0).
*/
- isa.hle = !!(structured_feature_info.ebx & UINT32_C(0x00000010));
+ isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
/*
* RTM instructions:
- * - Intel: ebx[bit 11] in structured feature info.
+ * - Intel: ebx[bit 11] in structured feature info (ecx = 0).
*/
- isa.rtm = !!(structured_feature_info.ebx & UINT32_C(0x00000800));
+ isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
/*
* XTEST instruction:
@@ -500,9 +514,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* MPX registers and instructions:
- * - Intel: ebx[bit 14] in structured feature info.
+ * - Intel: ebx[bit 14] in structured feature info (ecx = 0).
*/
- isa.mpx = mpx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00004000));
+ isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
#if CPUINFO_ARCH_X86
/*
@@ -528,9 +542,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* CLWB instruction:
- * - Intel: ebx[bit 24] in structured feature info.
+ * - Intel: ebx[bit 24] in structured feature info (ecx = 0).
*/
- isa.clwb = !!(structured_feature_info.ebx & UINT32_C(0x01000000));
+ isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
/*
* MOVBE instruction:
@@ -549,9 +563,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
- * - Intel: ebx[bit 0] in structured feature info.
+ * - Intel: ebx[bit 0] in structured feature info (ecx = 0).
*/
- isa.fs_gs_base = !!(structured_feature_info.ebx & UINT32_C(0x00000001));
+ isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
/*
* LZCNT instruction:
@@ -573,21 +587,21 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* BMI instructions:
- * - Intel, AMD: ebx[bit 3] in structured feature info.
+ * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
*/
- isa.bmi = !!(structured_feature_info.ebx & UINT32_C(0x00000008));
+ isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
/*
* BMI2 instructions:
- * - Intel: ebx[bit 8] in structured feature info.
+ * - Intel: ebx[bit 8] in structured feature info (ecx = 0).
*/
- isa.bmi2 = !!(structured_feature_info.ebx & UINT32_C(0x00000100));
+ isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
/*
* ADCX/ADOX instructions:
- * - Intel: ebx[bit 19] in structured feature info.
+ * - Intel: ebx[bit 19] in structured feature info (ecx = 0).
*/
- isa.adx = !!(structured_feature_info.ebx & UINT32_C(0x00080000));
+ isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
/*
* AES instructions:
@@ -597,9 +611,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* VAES instructions:
- * - Intel: ecx[bit 9] in structured feature info.
+ * - Intel: ecx[bit 9] in structured feature info (ecx = 0).
*/
- isa.vaes = !!(structured_feature_info.ecx & UINT32_C(0x00000200));
+ isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
/*
* PCLMULQDQ instruction:
@@ -609,15 +623,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* VPCLMULQDQ instruction:
- * - Intel: ecx[bit 10] in structured feature info.
+ * - Intel: ecx[bit 10] in structured feature info (ecx = 0).
*/
- isa.vpclmulqdq = !!(structured_feature_info.ecx & UINT32_C(0x00000400));
+ isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
/*
* GFNI instructions:
- * - Intel: ecx[bit 8] in structured feature info.
+ * - Intel: ecx[bit 8] in structured feature info (ecx = 0).
*/
- isa.gfni = !!(structured_feature_info.ecx & UINT32_C(0x00000100));
+ isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
/*
* RDRAND instruction:
@@ -627,15 +641,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDSEED instruction:
- * - Intel: ebx[bit 18] in structured feature info.
+ * - Intel: ebx[bit 18] in structured feature info (ecx = 0).
*/
- isa.rdseed = !!(structured_feature_info.ebx & UINT32_C(0x00040000));
+ isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
/*
* SHA instructions:
- * - Intel: ebx[bit 29] in structured feature info.
+ * - Intel: ebx[bit 29] in structured feature info (ecx = 0).
*/
- isa.sha = !!(structured_feature_info.ebx & UINT32_C(0x20000000));
+ isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
if (vendor == cpuinfo_vendor_via) {
const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
@@ -700,9 +714,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDPID instruction:
- * - Intel: ecx[bit 22] in structured feature info.
+ * - Intel: ecx[bit 22] in structured feature info (ecx = 0).
*/
- isa.rdpid = !!(structured_feature_info.ecx & UINT32_C(0x00400000));
+ isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
return isa;
}
diff --git src/x86/linux/init.c src/x86/linux/init.c
index b5f74d0..c096336 100644
--- src/x86/linux/init.c
+++ src/x86/linux/init.c
@@ -592,6 +592,8 @@ void cpuinfo_x86_linux_init(void) {
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/x86/mach/init.c src/x86/mach/init.c
index 7b41ad0..ae2be33 100644
--- src/x86/mach/init.c
+++ src/x86/mach/init.c
@@ -327,6 +327,8 @@ void cpuinfo_x86_mach_init(void) {
cpuinfo_clusters_count = mach_topology.packages;
cpuinfo_packages_count = mach_topology.packages;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/x86/uarch.c src/x86/uarch.c
index 71c899e..ba72d8a 100644
--- src/x86/uarch.c
+++ src/x86/uarch.c
@@ -74,13 +74,19 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x4F: // Broadwell-E
case 0x56: // Broadwell-DE
return cpuinfo_uarch_broadwell;
- case 0x4E: // Skylake-U/Y
- case 0x55: // Skylake Server (SKX)
- case 0x5E: // Skylake-H/S
+ case 0x4E: // Sky Lake Client Y/U
+ case 0x55: // Sky/Cascade/Cooper Lake Server
+ case 0x5E: // Sky Lake Client DT/H/S
+ case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
+ case 0x9E: // Kaby/Coffee Lake DT/H/S
return cpuinfo_uarch_sky_lake;
- case 0x8E: // Kaby Lake U/Y
- case 0x9E: // Kaby Lake H/S
- return cpuinfo_uarch_kaby_lake;
+ case 0x66: // Cannon Lake (Core i3-8121U)
+ return cpuinfo_uarch_palm_cove;
+ case 0x6A: // Ice Lake-DE
+ case 0x6C: // Ice Lake-SP
+ case 0x7D: // Ice Lake-Y
+ case 0x7E: // Ice Lake-U
+ return cpuinfo_uarch_sunny_cove;
/* Low-power cores */
case 0x1C: // Diamondville, Silverthorne, Pineview
@@ -90,18 +96,20 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x35: // Cloverview
case 0x36: // Cedarview, Centerton
return cpuinfo_uarch_saltwell;
- case 0x37:
- case 0x4A:
- case 0x4D:
+ case 0x37: // Bay Trail
+ case 0x4A: // Merrifield
+ case 0x4D: // Avoton, Rangeley
case 0x5A: // Moorefield
case 0x5D: // SoFIA
return cpuinfo_uarch_silvermont;
- case 0x4C: // Braswell
- case 0x5F: // Denverton
+ case 0x4C: // Braswell, Cherry Trail
case 0x75: // Spreadtrum SC9853I-IA
- case 0x7A: // Goldmont+
return cpuinfo_uarch_airmont;
-
+ case 0x5C: // Apollo Lake
+ case 0x5F: // Denverton
+ return cpuinfo_uarch_goldmont;
+ case 0x7A: // Gemini Lake
+ return cpuinfo_uarch_goldmont_plus;
/* Knights-series cores */
case 0x57:
return cpuinfo_uarch_knights_landing;
@@ -190,7 +198,15 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
return cpuinfo_uarch_jaguar;
}
case 0x17:
- return cpuinfo_uarch_zen;
+ switch (model_info->model) {
+ case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
+ case 0x08: // 12 nm Pinnacle Ridge
+ case 0x11: // 14 nm Raven Ridge
+ case 0x18: // 12 nm Picasso
+ return cpuinfo_uarch_zen;
+ case 0x71: // Matisse
+ return cpuinfo_uarch_zen2;
+ }
}
break;
default:
diff --git src/x86/windows/init.c src/x86/windows/init.c
index eb3498a..7a2090e 100644
--- src/x86/windows/init.c
+++ src/x86/windows/init.c
@@ -571,6 +571,8 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
cpuinfo_clusters_count = packages_count;
cpuinfo_packages_count = packages_count;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
MemoryBarrier();
cpuinfo_is_initialized = true;
diff --git tools/cache-info.c tools/cache-info.c
index ba0706f..05f69ee 100644
--- tools/cache-info.c
+++ tools/cache-info.c
@@ -60,6 +60,8 @@ int main(int argc, char** argv) {
fprintf(stderr, "failed to initialize CPU information\n");
exit(EXIT_FAILURE);
}
+ printf("Max cache size (upper bound): %"PRIu32" bytes\n", cpuinfo_get_max_cache_size());
+
if (cpuinfo_get_l1i_caches_count() != 0 && (cpuinfo_get_l1i_cache(0)->flags & CPUINFO_CACHE_UNIFIED) == 0) {
report_cache(cpuinfo_get_l1i_caches_count(), cpuinfo_get_l1i_cache(0), 1, "instruction");
}
diff --git tools/cpu-info.c tools/cpu-info.c
index caef424..7fa5187 100644
--- tools/cpu-info.c
+++ tools/cpu-info.c
@@ -73,8 +73,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Broadwell";
case cpuinfo_uarch_sky_lake:
return "Sky Lake";
- case cpuinfo_uarch_kaby_lake:
- return "Kaby Lake";
+ case cpuinfo_uarch_palm_cove:
+ return "Palm Cove";
+ case cpuinfo_uarch_sunny_cove:
+ return "Sunny Cove";
case cpuinfo_uarch_willamette:
return "Willamette";
case cpuinfo_uarch_prescott:
@@ -87,6 +89,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Silvermont";
case cpuinfo_uarch_airmont:
return "Airmont";
+ case cpuinfo_uarch_goldmont:
+ return "Goldmont";
+ case cpuinfo_uarch_goldmont_plus:
+ return "Goldmont Plus";
case cpuinfo_uarch_knights_ferry:
return "Knights Ferry";
case cpuinfo_uarch_knights_corner:
@@ -117,6 +123,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Excavator";
case cpuinfo_uarch_zen:
return "Zen";
+ case cpuinfo_uarch_zen2:
+ return "Zen 2";
case cpuinfo_uarch_geode:
return "Geode";
case cpuinfo_uarch_bobcat:
@@ -157,6 +165,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A55";
case cpuinfo_uarch_cortex_a57:
return "Cortex-A57";
+ case cpuinfo_uarch_cortex_a65:
+ return "Cortex-A65";
case cpuinfo_uarch_cortex_a72:
return "Cortex-A72";
case cpuinfo_uarch_cortex_a73:
@@ -165,6 +175,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A75";
case cpuinfo_uarch_cortex_a76:
return "Cortex-A76";
+ case cpuinfo_uarch_cortex_a76ae:
+ return "Cortex-A76AE";
+ case cpuinfo_uarch_cortex_a77:
+ return "Cortex-A77";
case cpuinfo_uarch_scorpion:
return "Scorpion";
case cpuinfo_uarch_krait:
@@ -181,12 +195,16 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Denver 2";
case cpuinfo_uarch_carmel:
return "Carmel";
- case cpuinfo_uarch_mongoose_m1:
- return "Mongoose M1";
- case cpuinfo_uarch_mongoose_m2:
- return "Mongoose M2";
- case cpuinfo_uarch_meerkat_m3:
- return "Meerkat M3";
+ case cpuinfo_uarch_exynos_m1:
+ return "Exynos M1";
+ case cpuinfo_uarch_exynos_m2:
+ return "Exynos M2";
+ case cpuinfo_uarch_exynos_m3:
+ return "Exynos M3";
+ case cpuinfo_uarch_exynos_m4:
+ return "Exynos M4";
+ case cpuinfo_uarch_exynos_m5:
+ return "Exynos M5";
case cpuinfo_uarch_swift:
return "Swift";
case cpuinfo_uarch_cyclone:
@@ -258,13 +276,23 @@ int main(int argc, char** argv) {
printf(", %s %s\n", vendor_string, uarch_string);
}
}
- printf("Logical processors:\n");
+ printf("Logical processors");
+ #if defined(__linux__)
+ printf(" (System ID)");
+ #endif
+ printf(":\n");
for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
const struct cpuinfo_processor* processor = cpuinfo_get_processor(i);
+ printf("\t%"PRIu32"", i);
+
+ #if defined(__linux__)
+ printf(" (%"PRId32")", processor->linux_id);
+ #endif
+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
- printf("\t%"PRIu32": APIC ID 0x%08"PRIx32"\n", i, processor->apic_id);
+ printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id);
#else
- printf("\t%"PRIu32"\n", i);
+ printf("\n");
#endif
}
}
diff --git tools/isa-info.c tools/isa-info.c
index 594c46a..98ef919 100644
--- tools/isa-info.c
+++ tools/isa-info.c
@@ -67,6 +67,8 @@ int main(int argc, char** argv) {
printf("\tAVX512BITALG: %s\n", cpuinfo_has_x86_avx512bitalg() ? "yes" : "no");
printf("\tAVX512VPOPCNTDQ: %s\n", cpuinfo_has_x86_avx512vpopcntdq() ? "yes" : "no");
printf("\tAVX512VNNI: %s\n", cpuinfo_has_x86_avx512vnni() ? "yes" : "no");
+ printf("\tAVX512BF16: %s\n", cpuinfo_has_x86_avx512bf16() ? "yes" : "no");
+ printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");